Skip to main content

ts_dataplane/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::{collections::HashMap, sync::Arc, time::Instant};
4
5use ts_bart::RoutingTable;
6use ts_overlay_router as or;
7use ts_packet::PacketMut;
8use ts_packetfilter::{FilterExt, IpProto};
9use ts_time::{Handle, Scheduler};
10use ts_transport::{OverlayTransportId, PeerId, UnderlayTransportId};
11use ts_tunnel::{Endpoint, NodeKeyPair};
12use ts_underlay_router as ur;
13
14pub mod async_tokio;
15
16/// The single link-local destination Go's filter `pre()` exempts from the link-local drop: the
17/// cloud-metadata address `169.254.169.254` (Go `isAllowedLinkLocal`).
18const ALLOWED_LINK_LOCAL_V4: std::net::Ipv4Addr = std::net::Ipv4Addr::new(169, 254, 169, 254);
19
20/// Whether an inbound packet to destination `dst` must be dropped BEFORE consulting the ACL rules,
21/// mirroring Go's filter `pre()`: drop multicast destinations (`ReasonMulticast`) and link-local
22/// unicast destinations that are not the allowlisted cloud-metadata address (`ReasonLinkLocalUnicast`).
23/// Returning `true` means drop. This runs ahead of `can_access` so a permissive ACL cannot admit the
24/// multicast / link-local traffic Go rejects unconditionally.
25///
26/// Go's `isAllowedLinkLocal` is `dst == gcpDNSAddr || any(LinkLocalAllowHooks)`; only the static
27/// `gcpDNSAddr` arm is modeled here. The dynamic `LinkLocalAllowHooks` slice is empty in a plain
28/// engine/tsnet embedding (its only upstream producer is the GCP metadata path), so the omission is
29/// behaviorally equivalent for this fork; a feature that needs a dynamic link-local allowlist would
30/// have to extend this. Like Go's `netip.Addr` predicates, an IPv4-mapped-IPv6 destination (e.g.
31/// `::ffff:224.0.0.1`) matches NEITHER arm and falls through to the ACL — we deliberately do not
32/// canonicalize/unmap, to stay byte-faithful to Go (see the mapped-v6 test cases).
33fn drop_before_rules(dst: std::net::IpAddr) -> bool {
34    if dst.is_multicast() {
35        return true;
36    }
37    match dst {
38        // IPv4 link-local is 169.254.0.0/16; allow only the cloud-metadata address (Go parity).
39        std::net::IpAddr::V4(v4) => v4.is_link_local() && v4 != ALLOWED_LINK_LOCAL_V4,
40        // IPv6 unicast link-local is fe80::/10. (`Ipv6Addr::is_unicast_link_local` is unstable, so
41        // test the prefix directly.) This fork is IPv4-only by default, but match Go for any v6.
42        std::net::IpAddr::V6(v6) => (v6.segments()[0] & 0xffc0) == 0xfe80,
43    }
44}
45
46/// IPv4 fragment state read from the base header (Go `net/packet.decode4` reads `b[6:8]`): the
47/// fragment offset in 8-byte blocks and the more-fragments flag. A non-first fragment carries no L4
48/// header, so it needs its own verdict path rather than the (always-port-0) ACL match.
49#[derive(Debug, Clone, Copy)]
50struct Ipv4Fragment {
51    /// Fragment offset in 8-byte blocks (the 13-bit IPv4 field), 0 for the first/only fragment.
52    offset_blocks: u16,
53    /// The "more fragments" (MF) flag.
54    more_fragments: bool,
55}
56
57/// Minimum fragment offset (in 8-byte blocks) Go permits for a non-first fragment — Go
58/// `net/packet.minFragBlks = (60 + 20) / 8 = 10` (max IPv4 header + a basic TCP header). A later
59/// fragment starting before this could overlap a transport header (the RFC 1858 overlapping-fragment
60/// evasion), so Go demotes it to `unknown` and drops it; only fragments at or beyond this offset are
61/// allowed to "slide through".
62const MIN_FRAG_BLKS: u16 = (60 + 20) / 8;
63
64/// The inbound packet-filter verdict for an already-parsed packet (`true` = admit). This is the
65/// proto-switch of Go's filter `runIn4`/`runIn6`, applied after `pre()` and after this fork's
66/// source-attribution and local-destination routing (the analogues of Go's `local4`/`local6`
67/// precondition) have run:
68///
69/// 1. `drop_before_rules` — Go `pre()`'s unconditional multicast / link-local-unicast drops.
70/// 2. **Fragment classification** (Go `net/packet.decode4` + filter `pre()`): a non-first IPv4
71///    fragment carries no L4 header, so it cannot be port-matched. Go classifies it by offset — a
72///    fragment at offset `>= MIN_FRAG_BLKS` is mapped to `ipproto.Fragment` and `pre()` **accepts**
73///    it (stateless pass-through; the receiver's kernel discards it if the head fragment was
74///    dropped), while a fragment at a smaller offset is dropped (RFC 1858). A *fragmented* TSMP is
75///    disallowed (`moreFrags` on a first TSMP fragment → drop). Without this, etherparse leaves the
76///    transport `None` and the port reads as 0, so a normal ACL rule would silently drop every valid
77///    later fragment — breaking large/fragmented inbound traffic on the 1280-MTU overlay.
78/// 3. TSMP (proto 99) is always admitted, bypassing the ACL — Go `case ipproto.TSMP: return Accept`.
79///    TSMP carries in-band control messages between nodes, so it must reach the local stack
80///    regardless of the ACL rules.
81/// 4. Everything else consults the control-derived ACL via `can_access` — Go's `matches4.match`.
82fn inbound_filter_verdict(
83    filter: &(dyn ts_packetfilter::Filter + Send + Sync),
84    proto: IpProto,
85    src: std::net::IpAddr,
86    dst: std::net::IpAddr,
87    dst_port: u16,
88    frag: Option<Ipv4Fragment>,
89) -> bool {
90    if drop_before_rules(dst) {
91        tracing::trace!(?dst, "dropping multicast/link-local dst (pre-rule)");
92        return false;
93    }
94
95    if let Some(frag) = frag {
96        if frag.offset_blocks > 0 {
97            // A non-first fragment (Go `decode4`'s `fragOfs != 0` branch). It has no transport
98            // header to match, so the verdict is decided purely by offset:
99            if frag.offset_blocks < MIN_FRAG_BLKS {
100                // Potentially overlaps a transport header (RFC 1858); Go demotes to `unknown` → drop.
101                tracing::trace!(?dst, "dropping low-offset IPv4 fragment (RFC 1858)");
102                return false;
103            }
104            // A valid later fragment — Go maps it to `ipproto.Fragment`, which `pre()` accepts
105            // ahead of the ACL. Stateless: if the head fragment was filtered the receiver's kernel
106            // drops this on reassembly timeout. Accepting here is what large fragmented inbound
107            // traffic relies on.
108            tracing::trace!(
109                ?dst,
110                "accepting later IPv4 fragment (Go pre() pass-through)"
111            );
112            return true;
113        }
114        // `frag.offset_blocks == 0`: the first fragment (or an unfragmented packet). Go disallows a
115        // *fragmented* TSMP (a first fragment with MF set) — without the whole message it can't be a
116        // valid inter-node control packet. Fall through to the normal proto-switch for everything
117        // else; the first fragment of TCP/UDP carries its L4 header, so `dst_port` was parsed above.
118        if proto == IpProto::TSMP && frag.more_fragments {
119            tracing::trace!(?dst, "dropping fragmented TSMP (Go parity)");
120            return false;
121        }
122    }
123
124    if proto == IpProto::TSMP {
125        tracing::trace!(?dst, "accepting TSMP inbound (bypasses ACL, Go parity)");
126        return true;
127    }
128
129    let info = ts_packetfilter::PacketInfo {
130        ip_proto: proto,
131        port: dst_port,
132        src,
133        dst,
134    };
135    // TODO(npry): wire in nodecaps
136    let caps = [];
137    let verdict = filter.can_access(&info, caps);
138    tracing::trace!(?info, ?caps, verdict);
139    verdict
140}
141
142/// A data plane subsystem that can be the subject of timer events.
143pub enum Subsystem {
144    /// The wireguard component.
145    Wireguard,
146}
147
148/// The direction/path of a captured packet, mirroring Go Tailscale's `capture.Path`. The numeric
149/// values are the on-wire path codes written into each pcap record's Tailscale preamble.
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151pub enum CapturePath {
152    /// A packet from the local device, heading out to a peer (pre-encrypt).
153    FromLocal = 0,
154    /// A packet received from a peer, decrypted, heading to the local device.
155    FromPeer = 1,
156    /// A packet synthesized by us toward the local device. Retained for Go `capture.Path` on-wire
157    /// code parity (so captured pcap path codes match Go's, and a future synthesized-packet tee
158    /// point can emit it); not currently emitted — the tee only produces `FromLocal`/`FromPeer`.
159    SynthesizedToLocal = 2,
160    /// A packet synthesized by us toward a peer. Retained for Go `capture.Path` on-wire code parity
161    /// (see [`Self::SynthesizedToLocal`]); not currently emitted.
162    SynthesizedToPeer = 3,
163}
164
165impl CapturePath {
166    /// The on-wire path code (the `uint16` written into the pcap record preamble).
167    pub fn code(self) -> u16 {
168        self as u16
169    }
170}
171
172/// A debug packet-capture hook. When installed on a [`DataPlane`], it is invoked with the path and
173/// the raw IP packet bytes for every plaintext packet crossing the datapath. It must be cheap and
174/// non-blocking — it runs inline on the single-threaded dataplane step, so a slow hook backs up the
175/// datapath. Wrapped in `Arc` so it is cheap to clone and `Send + Sync` for the actor that installs
176/// it.
177pub type CaptureHook = std::sync::Arc<dyn Fn(CapturePath, &[u8]) + Send + Sync>;
178
179/// Transforms packets to make tailscale happen.
180pub struct DataPlane {
181    /// Wireguard encryption/decryption.
182    pub wireguard: Endpoint,
183
184    /// Outbound overlay router.
185    pub or_out: or::outbound::Router,
186    /// Outbound underlay router.
187    pub ur_out: ur::outbound::Router,
188
189    /// Inbound source filter.
190    pub src_filter_in: Arc<ts_bart::Table<PeerId>>,
191    /// Inbound overlay router.
192    pub or_in: or::inbound::Router,
193
194    /// The packet filter.
195    pub packet_filter: Arc<dyn ts_packetfilter::Filter + Send + Sync>,
196
197    /// Events queued for future processing.
198    pub events: Scheduler<Subsystem>,
199
200    /// Next event for the wireguard subsystem.
201    pub wg_next: Option<Handle<Subsystem>>,
202
203    /// Optional debug packet-capture hook (Go `tstun.Wrapper` capture hook). `None` (the default)
204    /// means no capture and zero datapath overhead. Installed/cleared at runtime by the dataplane
205    /// actor; see [`DataPlane::process_outbound`]/[`DataPlane::process_inbound`] for the tee points.
206    pub capture: Option<CaptureHook>,
207}
208
209impl DataPlane {
210    /// Creates a new data plane for a wireguard node key.
211    pub fn new(my_key: NodeKeyPair) -> Self {
212        DataPlane {
213            wireguard: Endpoint::new(my_key),
214            or_out: Default::default(),
215            ur_out: Default::default(),
216            src_filter_in: Default::default(),
217            or_in: Default::default(),
218            events: Default::default(),
219            packet_filter: Arc::new(ts_packetfilter::DropAllFilter),
220            wg_next: None,
221            capture: None,
222        }
223    }
224
225    /// Processes packets originating from the local device.
226    #[tracing::instrument(skip_all, fields(n_packets = packets.len()))]
227    pub fn process_outbound(&mut self, packets: Vec<PacketMut>) -> OutboundResult {
228        if let Some(hook) = &self.capture {
229            for p in &packets {
230                hook(CapturePath::FromLocal, p.as_ref());
231            }
232        }
233
234        let or::outbound::Result {
235            to_wireguard,
236            loopback,
237        } = self.or_out.route(packets);
238
239        let to_wireguard = to_wireguard
240            .into_iter()
241            .map(|(k, v)| (ts_tunnel::PeerId(k.0), v))
242            .collect::<Vec<_>>();
243
244        let ts_tunnel::SendResult {
245            to_peers: encrypted,
246        } = self.wireguard.send(to_wireguard);
247
248        let to_peers = self
249            .ur_out
250            .route(encrypted.into_iter().map(|(k, v)| (PeerId(k.0), v)));
251
252        if let Some(next) = self.wireguard.next_event()
253            && let Some(prev) = self
254                .wg_next
255                .replace(self.events.add(next, Subsystem::Wireguard))
256        {
257            prev.cancel();
258        }
259
260        OutboundResult { to_peers, loopback }
261    }
262
263    /// Processes packets received from elsewhere.
264    pub fn process_inbound(
265        &mut self,
266        packets: impl IntoIterator<Item = PacketMut>,
267    ) -> InboundResult {
268        let ts_tunnel::RecvResult { to_local, to_peers } = self.wireguard.recv(packets);
269
270        if let Some(hook) = &self.capture {
271            for packets in to_local.values() {
272                for p in packets {
273                    hook(CapturePath::FromPeer, p.as_ref());
274                }
275            }
276        }
277
278        let to_local = to_local
279            .into_iter()
280            .map(|(peer_id, mut packets)| -> Vec<PacketMut> {
281                let _span = tracing::trace_span!(
282                    "src_filter_inbound",
283                    peer_id = ?peer_id,
284                    n_packet = packets.len(),
285                )
286                .entered();
287
288                packets.retain(|packet| {
289                    let Some(src) = packet.get_src_addr() else {
290                        tracing::trace!("does not look like ip packet");
291                        return false;
292                    };
293                    let verdict = if let Some(allowed_peer) = self.src_filter_in.lookup(src) {
294                        *allowed_peer == PeerId(peer_id.0)
295                    } else {
296                        tracing::trace!(remote_ip = %src, "unknown peer address");
297                        false
298                    };
299                    tracing::trace!(?src, verdict);
300                    verdict
301                });
302
303                packets
304            })
305            .map(|mut v| {
306                let _span =
307                    tracing::trace_span!("packet_filter_inbound", n_packet = v.len()).entered();
308
309                v.retain(|pkt| {
310                    let Ok(pkt) = etherparse::SlicedPacket::from_ip(pkt.as_ref()) else {
311                        tracing::trace!("does not look like ip packet");
312                        return false;
313                    };
314
315                    let (proto, src, dst, frag) = match pkt.net {
316                        Some(etherparse::NetSlice::Ipv4(ipv4)) => {
317                            // IPv4 fragment state (Go `net/packet.decode4` reads `b[6:8]`): a
318                            // non-first fragment carries no L4 header, so etherparse leaves
319                            // `transport == None` and the port would read as 0 below — which a normal
320                            // ACL rule never admits. Without classifying the fragment that silently
321                            // drops valid later fragments Go *accepts* (breaking large/fragmented
322                            // inbound traffic on the 1280-MTU overlay). Capture the offset (in 8-byte
323                            // blocks) + the more-fragments bit so the verdict can mirror Go's
324                            // `decode4`/`pre()` fragment handling.
325                            let hdr = ipv4.header();
326                            (
327                                IpProto::new(ipv4.payload().ip_number.0 as _),
328                                hdr.source_addr().into(),
329                                hdr.destination_addr().into(),
330                                Some(Ipv4Fragment {
331                                    offset_blocks: hdr.fragments_offset().value(),
332                                    more_fragments: hdr.more_fragments(),
333                                }),
334                            )
335                        }
336                        Some(etherparse::NetSlice::Ipv6(ipv6)) => (
337                            IpProto::new(ipv6.payload().ip_number.0 as _),
338                            ipv6.header().source_addr().into(),
339                            ipv6.header().destination_addr().into(),
340                            // IPv6 fragmentation is carried in a Fragment extension header, not the
341                            // base header; the tailnet is IPv4-only by default so a v6 fragment can't
342                            // reach here on the live path. Treat v6 as non-fragment (the existing
343                            // behavior) — full v6 fragment parity is tracked separately.
344                            None,
345                        ),
346                        _ => {
347                            // A packet that parsed as IP but is neither IPv4 nor IPv6 (e.g. a
348                            // future/odd `NetSlice` shape). These bytes are attacker-controlled
349                            // post-decrypt, so fail closed — drop it — rather than `unreachable!`,
350                            // which would panic the single-threaded dataplane on a crafted packet.
351                            // Go's filter `pre()` likewise returns Drop/"not-ip" here, never panics.
352                            tracing::trace!("parsed packet is neither IPv4 nor IPv6; dropping");
353                            return false;
354                        }
355                    };
356
357                    let (_src_port, dst_port) = match pkt.transport {
358                        Some(etherparse::TransportSlice::Udp(udp)) => {
359                            (udp.source_port(), udp.destination_port())
360                        }
361                        Some(etherparse::TransportSlice::Tcp(tcp)) => {
362                            (tcp.source_port(), tcp.destination_port())
363                        }
364                        _ => (0, 0),
365                    };
366
367                    // The inbound proto-switch (Go `runIn4`/`runIn6`): Go `pre()` multicast/link-local
368                    // drops, then the fragment classification (Go `decode4` + `pre()`), then
369                    // unconditional TSMP accept, then the control-derived ACL. Source attribution above
370                    // and `or_in.route` below bound this to attributable peers and local destinations
371                    // (Go's `local4`/`local6` precondition).
372                    inbound_filter_verdict(
373                        self.packet_filter.as_ref(),
374                        proto,
375                        src,
376                        dst,
377                        dst_port,
378                        frag,
379                    )
380                });
381
382                v
383            });
384
385        let to_peers = to_peers
386            .into_iter()
387            .map(|(k, v)| (ts_transport::PeerId(k.0), v));
388
389        let to_local = self.or_in.route(to_local.flatten());
390        let to_peers = self.ur_out.route(to_peers);
391
392        if let Some(next) = self.wireguard.next_event()
393            && let Some(prev) = self
394                .wg_next
395                .replace(self.events.add(next, Subsystem::Wireguard))
396        {
397            prev.cancel();
398        }
399
400        InboundResult { to_local, to_peers }
401    }
402
403    /// Return the next time at which [`DataPlane::process_events`] must be called.
404    ///
405    /// [`DataPlane::process_outbound`], [`DataPlane::process_inbound`] and
406    /// [`DataPlane::process_events`] may all update the next event time. Callers should prefer
407    /// calling `next_event` as needed to get a correct result, rather than store the returned
408    /// value.
409    pub fn next_event(&self) -> Option<Instant> {
410        self.events.next_dispatch()
411    }
412
413    /// Process all queued events that are due for processing.
414    ///
415    /// Must be called at least as often as dictated by [`DataPlane::next_event`] for the
416    /// data plane to function correctly. It is harmless to call it more frequently.
417    pub fn process_events(&mut self) -> EventResult {
418        let mut to_peers = HashMap::new();
419        let now = Instant::now();
420        for event in self.events.dispatch(now) {
421            match event {
422                Subsystem::Wireguard => {
423                    let res = self.wireguard.dispatch_events(now);
424                    to_peers.extend(
425                        res.to_peers
426                            .into_iter()
427                            .map(|(id, pkts)| (ts_transport::PeerId(id.0), pkts)),
428                    );
429                }
430            }
431        }
432        let to_peers = self.ur_out.route(to_peers);
433
434        if let Some(next) = self.wireguard.next_event()
435            && let Some(prev) = self
436                .wg_next
437                .replace(self.events.add(next, Subsystem::Wireguard))
438        {
439            prev.cancel();
440        }
441
442        EventResult { to_peers }
443    }
444}
445
446/// The result of processing outbound packets.
447pub struct OutboundResult {
448    /// Packets to be sent into underlay transports for transmission.
449    pub to_peers: HashMap<(UnderlayTransportId, PeerId), Vec<PacketMut>>,
450    /// Packets to be looped back and delivered to overlay transports.
451    pub loopback: HashMap<OverlayTransportId, Vec<PacketMut>>,
452}
453
454/// The result of processing inbound packets.
455pub struct InboundResult {
456    /// Decrypted packets to be delivered to overlay transports.
457    pub to_local: HashMap<OverlayTransportId, Vec<PacketMut>>,
458    /// Encrypted packets to be sent to wireguard peers by the underlay.
459    pub to_peers: HashMap<(UnderlayTransportId, PeerId), Vec<PacketMut>>,
460}
461
462/// The result of processing an event.
463#[derive(Default)]
464pub struct EventResult {
465    /// Encrypted packets to be sent to wireguard peers by the underlay.
466    pub to_peers: HashMap<(UnderlayTransportId, PeerId), Vec<PacketMut>>,
467}
468
469#[cfg(test)]
470mod tests {
471    use std::sync::Mutex;
472
473    use super::*;
474
475    /// Records `(path, bytes)` for each capture-hook invocation in a test.
476    type CaptureLog = Arc<Mutex<Vec<(CapturePath, Vec<u8>)>>>;
477
478    #[test]
479    fn capture_path_codes() {
480        assert_eq!(CapturePath::FromLocal.code(), 0);
481        assert_eq!(CapturePath::FromPeer.code(), 1);
482        assert_eq!(CapturePath::SynthesizedToLocal.code(), 2);
483        assert_eq!(CapturePath::SynthesizedToPeer.code(), 3);
484    }
485
486    /// The pre-rule destination screen (Go filter `pre()`): multicast and non-allowlisted link-local
487    /// destinations are dropped before the ACL; ordinary unicast and the cloud-metadata link-local
488    /// exception pass through to the rules.
489    #[test]
490    fn pre_rule_drop_matches_go() {
491        let ip = |s: &str| s.parse::<std::net::IpAddr>().unwrap();
492        // Dropped pre-rules:
493        assert!(drop_before_rules(ip("224.0.0.1")), "IPv4 multicast dropped");
494        assert!(
495            drop_before_rules(ip("239.255.255.250")),
496            "IPv4 multicast (SSDP) dropped"
497        );
498        assert!(
499            drop_before_rules(ip("169.254.1.1")),
500            "IPv4 link-local dropped"
501        );
502        assert!(drop_before_rules(ip("ff02::1")), "IPv6 multicast dropped");
503        assert!(drop_before_rules(ip("fe80::1")), "IPv6 link-local dropped");
504        assert!(
505            drop_before_rules(ip("febf:ffff::1")),
506            "top of fe80::/10 dropped (locks the 0xffc0/0xfe80 mask)"
507        );
508        // Passed through to the rules:
509        assert!(
510            !drop_before_rules(ip("fec0::1")),
511            "just past fe80::/10 passes (locks the 0xffc0/0xfe80 mask)"
512        );
513        // IPv4-mapped-IPv6 destinations match NEITHER arm and fall through to the ACL, exactly as
514        // Go's `netip.Addr` predicates do (no unmap/canonicalize). Pinning this guards against a
515        // future "canonicalize to be safe" refactor silently diverging from Go.
516        assert!(
517            !drop_before_rules(ip("::ffff:224.0.0.1")),
518            "4in6-mapped multicast falls through to the ACL, matching Go"
519        );
520        assert!(
521            !drop_before_rules(ip("::ffff:169.254.1.1")),
522            "4in6-mapped link-local falls through to the ACL, matching Go"
523        );
524        assert!(
525            !drop_before_rules(ip("100.64.0.5")),
526            "ordinary tailnet unicast passes"
527        );
528        assert!(
529            !drop_before_rules(ip("8.8.8.8")),
530            "ordinary public unicast passes"
531        );
532        assert!(
533            !drop_before_rules(ip("169.254.169.254")),
534            "the cloud-metadata link-local address is the Go-allowlisted exception"
535        );
536        assert!(
537            !drop_before_rules(ip("fd7a:115c:a1e0::1")),
538            "IPv6 ULA (tailnet) passes"
539        );
540    }
541
542    /// A filter that drops everything (returns `None` for every packet). Lets a test prove that TSMP
543    /// is admitted by bypassing the ACL — not by the ACL happening to allow it.
544    struct DenyAll;
545    impl ts_packetfilter::Filter for DenyAll {
546        fn match_for(
547            &self,
548            _info: &ts_packetfilter::PacketInfo,
549            _caps: ts_packetfilter::filter::CapIter,
550        ) -> Option<&str> {
551            None
552        }
553    }
554
555    /// The inbound proto-switch (Go `runIn4`/`runIn6`): TSMP is always admitted, bypassing the ACL;
556    /// `pre()` drops still win over TSMP; non-TSMP defers to the ACL.
557    #[test]
558    fn tsmp_bypasses_acl_matches_go() {
559        let ip = |s: &str| s.parse::<std::net::IpAddr>().unwrap();
560        let src = ip("100.64.0.9");
561        let dst = ip("100.64.0.1");
562        let tsmp = IpProto::new(99);
563
564        // TSMP is accepted even though the ACL denies everything — Go `case TSMP: return Accept`.
565        assert!(
566            inbound_filter_verdict(&DenyAll, tsmp, src, dst, 0, None),
567            "TSMP admitted by bypassing the (deny-all) ACL"
568        );
569        // A non-TSMP proto under the same deny-all ACL is dropped — proves the bypass is TSMP-specific.
570        assert!(
571            !inbound_filter_verdict(&DenyAll, IpProto::TCP, src, dst, 443, None),
572            "TCP still consults the ACL (deny-all → dropped)"
573        );
574        // `pre()` drops outrank the TSMP accept: TSMP to a multicast/link-local dst is still dropped,
575        // exactly as Go runs `pre()` before the proto switch.
576        assert!(
577            !inbound_filter_verdict(&DenyAll, tsmp, src, ip("224.0.0.1"), 0, None),
578            "TSMP to a multicast dst is still dropped (pre() before the switch)"
579        );
580        assert!(
581            !inbound_filter_verdict(&DenyAll, tsmp, src, ip("169.254.1.1"), 0, None),
582            "TSMP to a link-local dst is still dropped (pre() before the switch)"
583        );
584        // IpProto::TSMP is the named constant for proto 99.
585        assert_eq!(IpProto::TSMP, tsmp, "IpProto::TSMP == 99");
586    }
587
588    /// IPv4 fragment handling, mirroring Go `net/packet.decode4` + filter `pre()`:
589    /// - a valid later fragment (offset ≥ `MIN_FRAG_BLKS`) is ACCEPTED ahead of the ACL (Go maps it
590    ///   to `ipproto.Fragment`, which `pre()` admits) — even under a deny-all ACL and even though its
591    ///   parsed port is 0, which a normal rule would never match;
592    /// - a low-offset later fragment (offset < `MIN_FRAG_BLKS`) is DROPPED (RFC 1858);
593    /// - a first fragment (offset 0) defers to the normal proto-switch/ACL on its real port;
594    /// - a *fragmented* TSMP first fragment (offset 0, MF set) is DROPPED (Go disallows it), unlike a
595    ///   non-fragmented TSMP which bypasses the ACL.
596    #[test]
597    fn ipv4_fragment_handling_matches_go_decode4() {
598        let ip = |s: &str| s.parse::<std::net::IpAddr>().unwrap();
599        let src = ip("100.64.0.9");
600        let dst = ip("100.64.0.1");
601        let frag = |offset_blocks: u16, more_fragments: bool| {
602            Some(Ipv4Fragment {
603                offset_blocks,
604                more_fragments,
605            })
606        };
607
608        // A valid later fragment is accepted under a DENY-ALL ACL with port 0 — proves the accept is
609        // the Go `pre()` Fragment pass-through, not the ACL happening to allow it.
610        assert!(
611            inbound_filter_verdict(
612                &DenyAll,
613                IpProto::TCP,
614                src,
615                dst,
616                0,
617                frag(MIN_FRAG_BLKS, false)
618            ),
619            "a valid later fragment (offset >= MIN_FRAG_BLKS) is accepted ahead of the ACL"
620        );
621        assert!(
622            inbound_filter_verdict(
623                &DenyAll,
624                IpProto::UDP,
625                src,
626                dst,
627                0,
628                frag(MIN_FRAG_BLKS + 50, true)
629            ),
630            "a later fragment well past the floor (MF set) is also accepted"
631        );
632
633        // A low-offset later fragment (could overlap a transport header) is dropped — RFC 1858.
634        assert!(
635            !inbound_filter_verdict(
636                &DenyAll,
637                IpProto::TCP,
638                src,
639                dst,
640                0,
641                frag(MIN_FRAG_BLKS - 1, false)
642            ),
643            "a low-offset later fragment is dropped (RFC 1858)"
644        );
645        assert!(
646            !inbound_filter_verdict(&DenyAll, IpProto::TCP, src, dst, 0, frag(1, false)),
647            "the smallest non-zero offset is dropped"
648        );
649
650        // A first fragment (offset 0) defers to the normal ACL on its real port: deny-all drops a
651        // TCP first fragment, exactly as it drops a non-fragmented TCP packet.
652        assert!(
653            !inbound_filter_verdict(&DenyAll, IpProto::TCP, src, dst, 443, frag(0, true)),
654            "a first fragment defers to the ACL (deny-all -> dropped) on its parsed port"
655        );
656
657        // A fragmented TSMP first fragment (offset 0, MF set) is dropped — Go disallows it — even
658        // though a non-fragmented TSMP bypasses the ACL.
659        assert!(
660            !inbound_filter_verdict(&DenyAll, IpProto::TSMP, src, dst, 0, frag(0, true)),
661            "a fragmented TSMP first fragment is dropped (Go parity)"
662        );
663        assert!(
664            inbound_filter_verdict(&DenyAll, IpProto::TSMP, src, dst, 0, frag(0, false)),
665            "a non-fragmented TSMP (offset 0, MF clear) still bypasses the ACL"
666        );
667
668        // A *later* TSMP fragment (offset >= MIN_FRAG_BLKS) is accepted via the offset-based
669        // fragment pass-through, NOT dropped by the fragmented-TSMP rule — that rule is offset-0
670        // only (a first fragment with MF). This proves the later-fragment branch is proto-independent
671        // and wins over the TSMP-specific logic (Go maps any offset>=minFragBlks to ipproto.Fragment
672        // regardless of the L4 proto byte), locking the branch ordering against regression.
673        assert!(
674            inbound_filter_verdict(
675                &DenyAll,
676                IpProto::TSMP,
677                src,
678                dst,
679                0,
680                frag(MIN_FRAG_BLKS, true)
681            ),
682            "a later TSMP fragment is accepted via the fragment path (proto-independent)"
683        );
684    }
685
686    /// Behavioral guard: an installed capture hook MUST be invoked with `CapturePath::FromLocal`
687    /// and the exact packet bytes for every outbound packet. The tee sits at the top of
688    /// `process_outbound`, before `or_out.route` consumes the packets, so it fires regardless of
689    /// whether a wireguard peer exists (an empty router just drops the routed packets afterward).
690    /// This is the only end-to-end guard that the dataplane capture tee actually fires; a refactor
691    /// that drops the tee would leave every byte-layout test green.
692    #[test]
693    fn capture_hook_fires_on_outbound() {
694        let mut dp = DataPlane::new(NodeKeyPair::new());
695
696        let recorded: CaptureLog = Arc::new(Mutex::new(Vec::new()));
697        let sink = recorded.clone();
698        dp.capture = Some(Arc::new(move |path: CapturePath, bytes: &[u8]| {
699            sink.lock().unwrap().push((path, bytes.to_vec()));
700        }));
701
702        // The outbound tee passes `p.as_ref()` as-given; the bytes need not be a valid IP packet.
703        let payload: Vec<u8> = vec![0xde, 0xad, 0xbe, 0xef];
704        let packet = PacketMut::from(payload.clone());
705
706        drop(dp.process_outbound(vec![packet]));
707
708        let captured = recorded.lock().unwrap();
709        assert_eq!(captured.len(), 1, "hook must fire exactly once per packet");
710        assert_eq!(captured[0].0, CapturePath::FromLocal);
711        assert_eq!(captured[0].1, payload);
712    }
713}