Skip to main content

ts_dataplane/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::{collections::HashMap, sync::Arc, time::Instant};
4
5use ts_bart::RoutingTable;
6use ts_overlay_router as or;
7use ts_packet::PacketMut;
8use ts_packetfilter::{FilterExt, IpProto};
9use ts_time::{Handle, Scheduler};
10use ts_transport::{OverlayTransportId, PeerId, UnderlayTransportId};
11use ts_tunnel::{Endpoint, NodeKeyPair};
12use ts_underlay_router as ur;
13
14pub mod async_tokio;
15
16/// The single link-local destination Go's filter `pre()` exempts from the link-local drop: the
17/// cloud-metadata address `169.254.169.254` (Go `isAllowedLinkLocal`).
18const ALLOWED_LINK_LOCAL_V4: std::net::Ipv4Addr = std::net::Ipv4Addr::new(169, 254, 169, 254);
19
20/// Whether an inbound packet to destination `dst` must be dropped BEFORE consulting the ACL rules,
21/// mirroring Go's filter `pre()`: drop multicast destinations (`ReasonMulticast`) and link-local
22/// unicast destinations that are not the allowlisted cloud-metadata address (`ReasonLinkLocalUnicast`).
23/// Returning `true` means drop. This runs ahead of `can_access` so a permissive ACL cannot admit the
24/// multicast / link-local traffic Go rejects unconditionally.
25///
26/// Go's `isAllowedLinkLocal` is `dst == gcpDNSAddr || any(LinkLocalAllowHooks)`; only the static
27/// `gcpDNSAddr` arm is modeled here. The dynamic `LinkLocalAllowHooks` slice is empty in a plain
28/// engine/tsnet embedding (its only upstream producer is the GCP metadata path), so the omission is
29/// behaviorally equivalent for this fork; a feature that needs a dynamic link-local allowlist would
30/// have to extend this. Like Go's `netip.Addr` predicates, an IPv4-mapped-IPv6 destination (e.g.
31/// `::ffff:224.0.0.1`) matches NEITHER arm and falls through to the ACL — we deliberately do not
32/// canonicalize/unmap, to stay byte-faithful to Go (see the mapped-v6 test cases).
33fn drop_before_rules(dst: std::net::IpAddr) -> bool {
34    if dst.is_multicast() {
35        return true;
36    }
37    match dst {
38        // IPv4 link-local is 169.254.0.0/16; allow only the cloud-metadata address (Go parity).
39        std::net::IpAddr::V4(v4) => v4.is_link_local() && v4 != ALLOWED_LINK_LOCAL_V4,
40        // IPv6 unicast link-local is fe80::/10. (`Ipv6Addr::is_unicast_link_local` is unstable, so
41        // test the prefix directly.) This fork is IPv4-only by default, but match Go for any v6.
42        std::net::IpAddr::V6(v6) => (v6.segments()[0] & 0xffc0) == 0xfe80,
43    }
44}
45
46/// The inbound packet-filter verdict for an already-parsed packet (`true` = admit). This is the
47/// proto-switch of Go's filter `runIn4`/`runIn6`, applied after `pre()` and after this fork's
48/// source-attribution and local-destination routing (the analogues of Go's `local4`/`local6`
49/// precondition) have run:
50///
51/// 1. `drop_before_rules` — Go `pre()`'s unconditional multicast / link-local-unicast drops.
52/// 2. TSMP (proto 99) is always admitted, bypassing the ACL — Go `case ipproto.TSMP: return Accept`.
53///    TSMP carries in-band control messages between nodes, so it must reach the local stack
54///    regardless of the ACL rules.
55/// 3. Everything else consults the control-derived ACL via `can_access` — Go's `matches4.match`.
56fn inbound_filter_verdict(
57    filter: &(dyn ts_packetfilter::Filter + Send + Sync),
58    proto: IpProto,
59    src: std::net::IpAddr,
60    dst: std::net::IpAddr,
61    dst_port: u16,
62) -> bool {
63    if drop_before_rules(dst) {
64        tracing::trace!(?dst, "dropping multicast/link-local dst (pre-rule)");
65        return false;
66    }
67
68    if proto == IpProto::TSMP {
69        tracing::trace!(?dst, "accepting TSMP inbound (bypasses ACL, Go parity)");
70        return true;
71    }
72
73    let info = ts_packetfilter::PacketInfo {
74        ip_proto: proto,
75        port: dst_port,
76        src,
77        dst,
78    };
79    // TODO(npry): wire in nodecaps
80    let caps = [];
81    let verdict = filter.can_access(&info, caps);
82    tracing::trace!(?info, ?caps, verdict);
83    verdict
84}
85
86/// A data plane subsystem that can be the subject of timer events.
87pub enum Subsystem {
88    /// The wireguard component.
89    Wireguard,
90}
91
92/// The direction/path of a captured packet, mirroring Go Tailscale's `capture.Path`. The numeric
93/// values are the on-wire path codes written into each pcap record's Tailscale preamble.
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub enum CapturePath {
96    /// A packet from the local device, heading out to a peer (pre-encrypt).
97    FromLocal = 0,
98    /// A packet received from a peer, decrypted, heading to the local device.
99    FromPeer = 1,
100    /// A packet synthesized by us toward the local device. Retained for Go `capture.Path` on-wire
101    /// code parity (so captured pcap path codes match Go's, and a future synthesized-packet tee
102    /// point can emit it); not currently emitted — the tee only produces `FromLocal`/`FromPeer`.
103    SynthesizedToLocal = 2,
104    /// A packet synthesized by us toward a peer. Retained for Go `capture.Path` on-wire code parity
105    /// (see [`Self::SynthesizedToLocal`]); not currently emitted.
106    SynthesizedToPeer = 3,
107}
108
109impl CapturePath {
110    /// The on-wire path code (the `uint16` written into the pcap record preamble).
111    pub fn code(self) -> u16 {
112        self as u16
113    }
114}
115
116/// A debug packet-capture hook. When installed on a [`DataPlane`], it is invoked with the path and
117/// the raw IP packet bytes for every plaintext packet crossing the datapath. It must be cheap and
118/// non-blocking — it runs inline on the single-threaded dataplane step, so a slow hook backs up the
119/// datapath. Wrapped in `Arc` so it is cheap to clone and `Send + Sync` for the actor that installs
120/// it.
121pub type CaptureHook = std::sync::Arc<dyn Fn(CapturePath, &[u8]) + Send + Sync>;
122
123/// Transforms packets to make tailscale happen.
124pub struct DataPlane {
125    /// Wireguard encryption/decryption.
126    pub wireguard: Endpoint,
127
128    /// Outbound overlay router.
129    pub or_out: or::outbound::Router,
130    /// Outbound underlay router.
131    pub ur_out: ur::outbound::Router,
132
133    /// Inbound source filter.
134    pub src_filter_in: Arc<ts_bart::Table<PeerId>>,
135    /// Inbound overlay router.
136    pub or_in: or::inbound::Router,
137
138    /// The packet filter.
139    pub packet_filter: Arc<dyn ts_packetfilter::Filter + Send + Sync>,
140
141    /// Events queued for future processing.
142    pub events: Scheduler<Subsystem>,
143
144    /// Next event for the wireguard subsystem.
145    pub wg_next: Option<Handle<Subsystem>>,
146
147    /// Optional debug packet-capture hook (Go `tstun.Wrapper` capture hook). `None` (the default)
148    /// means no capture and zero datapath overhead. Installed/cleared at runtime by the dataplane
149    /// actor; see [`DataPlane::process_outbound`]/[`DataPlane::process_inbound`] for the tee points.
150    pub capture: Option<CaptureHook>,
151}
152
153impl DataPlane {
154    /// Creates a new data plane for a wireguard node key.
155    pub fn new(my_key: NodeKeyPair) -> Self {
156        DataPlane {
157            wireguard: Endpoint::new(my_key),
158            or_out: Default::default(),
159            ur_out: Default::default(),
160            src_filter_in: Default::default(),
161            or_in: Default::default(),
162            events: Default::default(),
163            packet_filter: Arc::new(ts_packetfilter::DropAllFilter),
164            wg_next: None,
165            capture: None,
166        }
167    }
168
169    /// Processes packets originating from the local device.
170    #[tracing::instrument(skip_all, fields(n_packets = packets.len()))]
171    pub fn process_outbound(&mut self, packets: Vec<PacketMut>) -> OutboundResult {
172        if let Some(hook) = &self.capture {
173            for p in &packets {
174                hook(CapturePath::FromLocal, p.as_ref());
175            }
176        }
177
178        let or::outbound::Result {
179            to_wireguard,
180            loopback,
181        } = self.or_out.route(packets);
182
183        let to_wireguard = to_wireguard
184            .into_iter()
185            .map(|(k, v)| (ts_tunnel::PeerId(k.0), v))
186            .collect::<Vec<_>>();
187
188        let ts_tunnel::SendResult {
189            to_peers: encrypted,
190        } = self.wireguard.send(to_wireguard);
191
192        let to_peers = self
193            .ur_out
194            .route(encrypted.into_iter().map(|(k, v)| (PeerId(k.0), v)));
195
196        if let Some(next) = self.wireguard.next_event()
197            && let Some(prev) = self
198                .wg_next
199                .replace(self.events.add(next, Subsystem::Wireguard))
200        {
201            prev.cancel();
202        }
203
204        OutboundResult { to_peers, loopback }
205    }
206
207    /// Processes packets received from elsewhere.
208    pub fn process_inbound(
209        &mut self,
210        packets: impl IntoIterator<Item = PacketMut>,
211    ) -> InboundResult {
212        let ts_tunnel::RecvResult { to_local, to_peers } = self.wireguard.recv(packets);
213
214        if let Some(hook) = &self.capture {
215            for packets in to_local.values() {
216                for p in packets {
217                    hook(CapturePath::FromPeer, p.as_ref());
218                }
219            }
220        }
221
222        let to_local = to_local
223            .into_iter()
224            .map(|(peer_id, mut packets)| -> Vec<PacketMut> {
225                let _span = tracing::trace_span!(
226                    "src_filter_inbound",
227                    peer_id = ?peer_id,
228                    n_packet = packets.len(),
229                )
230                .entered();
231
232                packets.retain(|packet| {
233                    let Some(src) = packet.get_src_addr() else {
234                        tracing::trace!("does not look like ip packet");
235                        return false;
236                    };
237                    let verdict = if let Some(allowed_peer) = self.src_filter_in.lookup(src) {
238                        *allowed_peer == PeerId(peer_id.0)
239                    } else {
240                        tracing::trace!(remote_ip = %src, "unknown peer address");
241                        false
242                    };
243                    tracing::trace!(?src, verdict);
244                    verdict
245                });
246
247                packets
248            })
249            .map(|mut v| {
250                let _span =
251                    tracing::trace_span!("packet_filter_inbound", n_packet = v.len()).entered();
252
253                v.retain(|pkt| {
254                    let Ok(pkt) = etherparse::SlicedPacket::from_ip(pkt.as_ref()) else {
255                        tracing::trace!("does not look like ip packet");
256                        return false;
257                    };
258
259                    let (proto, src, dst) = match pkt.net {
260                        Some(etherparse::NetSlice::Ipv4(ipv4)) => (
261                            IpProto::new(ipv4.payload().ip_number.0 as _),
262                            ipv4.header().source_addr().into(),
263                            ipv4.header().destination_addr().into(),
264                        ),
265                        Some(etherparse::NetSlice::Ipv6(ipv6)) => (
266                            IpProto::new(ipv6.payload().ip_number.0 as _),
267                            ipv6.header().source_addr().into(),
268                            ipv6.header().destination_addr().into(),
269                        ),
270                        _ => {
271                            // A packet that parsed as IP but is neither IPv4 nor IPv6 (e.g. a
272                            // future/odd `NetSlice` shape). These bytes are attacker-controlled
273                            // post-decrypt, so fail closed — drop it — rather than `unreachable!`,
274                            // which would panic the single-threaded dataplane on a crafted packet.
275                            // Go's filter `pre()` likewise returns Drop/"not-ip" here, never panics.
276                            tracing::trace!("parsed packet is neither IPv4 nor IPv6; dropping");
277                            return false;
278                        }
279                    };
280
281                    let (_src_port, dst_port) = match pkt.transport {
282                        Some(etherparse::TransportSlice::Udp(udp)) => {
283                            (udp.source_port(), udp.destination_port())
284                        }
285                        Some(etherparse::TransportSlice::Tcp(tcp)) => {
286                            (tcp.source_port(), tcp.destination_port())
287                        }
288                        _ => (0, 0),
289                    };
290
291                    // The inbound proto-switch (Go `runIn4`/`runIn6`): Go `pre()` multicast/link-local
292                    // drops, then unconditional TSMP accept, then the control-derived ACL. Source
293                    // attribution above and `or_in.route` below bound this to attributable peers and
294                    // local destinations (Go's `local4`/`local6` precondition).
295                    inbound_filter_verdict(self.packet_filter.as_ref(), proto, src, dst, dst_port)
296                });
297
298                v
299            });
300
301        let to_peers = to_peers
302            .into_iter()
303            .map(|(k, v)| (ts_transport::PeerId(k.0), v));
304
305        let to_local = self.or_in.route(to_local.flatten());
306        let to_peers = self.ur_out.route(to_peers);
307
308        if let Some(next) = self.wireguard.next_event()
309            && let Some(prev) = self
310                .wg_next
311                .replace(self.events.add(next, Subsystem::Wireguard))
312        {
313            prev.cancel();
314        }
315
316        InboundResult { to_local, to_peers }
317    }
318
319    /// Return the next time at which [`DataPlane::process_events`] must be called.
320    ///
321    /// [`DataPlane::process_outbound`], [`DataPlane::process_inbound`] and
322    /// [`DataPlane::process_events`] may all update the next event time. Callers should prefer
323    /// calling `next_event` as needed to get a correct result, rather than store the returned
324    /// value.
325    pub fn next_event(&self) -> Option<Instant> {
326        self.events.next_dispatch()
327    }
328
329    /// Process all queued events that are due for processing.
330    ///
331    /// Must be called at least as often as dictated by [`DataPlane::next_event`] for the
332    /// data plane to function correctly. It is harmless to call it more frequently.
333    pub fn process_events(&mut self) -> EventResult {
334        let mut to_peers = HashMap::new();
335        let now = Instant::now();
336        for event in self.events.dispatch(now) {
337            match event {
338                Subsystem::Wireguard => {
339                    let res = self.wireguard.dispatch_events(now);
340                    to_peers.extend(
341                        res.to_peers
342                            .into_iter()
343                            .map(|(id, pkts)| (ts_transport::PeerId(id.0), pkts)),
344                    );
345                }
346            }
347        }
348        let to_peers = self.ur_out.route(to_peers);
349
350        if let Some(next) = self.wireguard.next_event()
351            && let Some(prev) = self
352                .wg_next
353                .replace(self.events.add(next, Subsystem::Wireguard))
354        {
355            prev.cancel();
356        }
357
358        EventResult { to_peers }
359    }
360}
361
362/// The result of processing outbound packets.
363pub struct OutboundResult {
364    /// Packets to be sent into underlay transports for transmission.
365    pub to_peers: HashMap<(UnderlayTransportId, PeerId), Vec<PacketMut>>,
366    /// Packets to be looped back and delivered to overlay transports.
367    pub loopback: HashMap<OverlayTransportId, Vec<PacketMut>>,
368}
369
370/// The result of processing inbound packets.
371pub struct InboundResult {
372    /// Decrypted packets to be delivered to overlay transports.
373    pub to_local: HashMap<OverlayTransportId, Vec<PacketMut>>,
374    /// Encrypted packets to be sent to wireguard peers by the underlay.
375    pub to_peers: HashMap<(UnderlayTransportId, PeerId), Vec<PacketMut>>,
376}
377
378/// The result of processing an event.
379#[derive(Default)]
380pub struct EventResult {
381    /// Encrypted packets to be sent to wireguard peers by the underlay.
382    pub to_peers: HashMap<(UnderlayTransportId, PeerId), Vec<PacketMut>>,
383}
384
385#[cfg(test)]
386mod tests {
387    use std::sync::Mutex;
388
389    use super::*;
390
391    /// Records `(path, bytes)` for each capture-hook invocation in a test.
392    type CaptureLog = Arc<Mutex<Vec<(CapturePath, Vec<u8>)>>>;
393
394    #[test]
395    fn capture_path_codes() {
396        assert_eq!(CapturePath::FromLocal.code(), 0);
397        assert_eq!(CapturePath::FromPeer.code(), 1);
398        assert_eq!(CapturePath::SynthesizedToLocal.code(), 2);
399        assert_eq!(CapturePath::SynthesizedToPeer.code(), 3);
400    }
401
402    /// The pre-rule destination screen (Go filter `pre()`): multicast and non-allowlisted link-local
403    /// destinations are dropped before the ACL; ordinary unicast and the cloud-metadata link-local
404    /// exception pass through to the rules.
405    #[test]
406    fn pre_rule_drop_matches_go() {
407        let ip = |s: &str| s.parse::<std::net::IpAddr>().unwrap();
408        // Dropped pre-rules:
409        assert!(drop_before_rules(ip("224.0.0.1")), "IPv4 multicast dropped");
410        assert!(
411            drop_before_rules(ip("239.255.255.250")),
412            "IPv4 multicast (SSDP) dropped"
413        );
414        assert!(
415            drop_before_rules(ip("169.254.1.1")),
416            "IPv4 link-local dropped"
417        );
418        assert!(drop_before_rules(ip("ff02::1")), "IPv6 multicast dropped");
419        assert!(drop_before_rules(ip("fe80::1")), "IPv6 link-local dropped");
420        assert!(
421            drop_before_rules(ip("febf:ffff::1")),
422            "top of fe80::/10 dropped (locks the 0xffc0/0xfe80 mask)"
423        );
424        // Passed through to the rules:
425        assert!(
426            !drop_before_rules(ip("fec0::1")),
427            "just past fe80::/10 passes (locks the 0xffc0/0xfe80 mask)"
428        );
429        // IPv4-mapped-IPv6 destinations match NEITHER arm and fall through to the ACL, exactly as
430        // Go's `netip.Addr` predicates do (no unmap/canonicalize). Pinning this guards against a
431        // future "canonicalize to be safe" refactor silently diverging from Go.
432        assert!(
433            !drop_before_rules(ip("::ffff:224.0.0.1")),
434            "4in6-mapped multicast falls through to the ACL, matching Go"
435        );
436        assert!(
437            !drop_before_rules(ip("::ffff:169.254.1.1")),
438            "4in6-mapped link-local falls through to the ACL, matching Go"
439        );
440        assert!(
441            !drop_before_rules(ip("100.64.0.5")),
442            "ordinary tailnet unicast passes"
443        );
444        assert!(
445            !drop_before_rules(ip("8.8.8.8")),
446            "ordinary public unicast passes"
447        );
448        assert!(
449            !drop_before_rules(ip("169.254.169.254")),
450            "the cloud-metadata link-local address is the Go-allowlisted exception"
451        );
452        assert!(
453            !drop_before_rules(ip("fd7a:115c:a1e0::1")),
454            "IPv6 ULA (tailnet) passes"
455        );
456    }
457
458    /// A filter that drops everything (returns `None` for every packet). Lets a test prove that TSMP
459    /// is admitted by bypassing the ACL — not by the ACL happening to allow it.
460    struct DenyAll;
461    impl ts_packetfilter::Filter for DenyAll {
462        fn match_for(
463            &self,
464            _info: &ts_packetfilter::PacketInfo,
465            _caps: ts_packetfilter::filter::CapIter,
466        ) -> Option<&str> {
467            None
468        }
469    }
470
471    /// The inbound proto-switch (Go `runIn4`/`runIn6`): TSMP is always admitted, bypassing the ACL;
472    /// `pre()` drops still win over TSMP; non-TSMP defers to the ACL.
473    #[test]
474    fn tsmp_bypasses_acl_matches_go() {
475        let ip = |s: &str| s.parse::<std::net::IpAddr>().unwrap();
476        let src = ip("100.64.0.9");
477        let dst = ip("100.64.0.1");
478        let tsmp = IpProto::new(99);
479
480        // TSMP is accepted even though the ACL denies everything — Go `case TSMP: return Accept`.
481        assert!(
482            inbound_filter_verdict(&DenyAll, tsmp, src, dst, 0),
483            "TSMP admitted by bypassing the (deny-all) ACL"
484        );
485        // A non-TSMP proto under the same deny-all ACL is dropped — proves the bypass is TSMP-specific.
486        assert!(
487            !inbound_filter_verdict(&DenyAll, IpProto::TCP, src, dst, 443),
488            "TCP still consults the ACL (deny-all → dropped)"
489        );
490        // `pre()` drops outrank the TSMP accept: TSMP to a multicast/link-local dst is still dropped,
491        // exactly as Go runs `pre()` before the proto switch.
492        assert!(
493            !inbound_filter_verdict(&DenyAll, tsmp, src, ip("224.0.0.1"), 0),
494            "TSMP to a multicast dst is still dropped (pre() before the switch)"
495        );
496        assert!(
497            !inbound_filter_verdict(&DenyAll, tsmp, src, ip("169.254.1.1"), 0),
498            "TSMP to a link-local dst is still dropped (pre() before the switch)"
499        );
500        // IpProto::TSMP is the named constant for proto 99.
501        assert_eq!(IpProto::TSMP, tsmp, "IpProto::TSMP == 99");
502    }
503
504    /// Behavioral guard: an installed capture hook MUST be invoked with `CapturePath::FromLocal`
505    /// and the exact packet bytes for every outbound packet. The tee sits at the top of
506    /// `process_outbound`, before `or_out.route` consumes the packets, so it fires regardless of
507    /// whether a wireguard peer exists (an empty router just drops the routed packets afterward).
508    /// This is the only end-to-end guard that the dataplane capture tee actually fires; a refactor
509    /// that drops the tee would leave every byte-layout test green.
510    #[test]
511    fn capture_hook_fires_on_outbound() {
512        let mut dp = DataPlane::new(NodeKeyPair::new());
513
514        let recorded: CaptureLog = Arc::new(Mutex::new(Vec::new()));
515        let sink = recorded.clone();
516        dp.capture = Some(Arc::new(move |path: CapturePath, bytes: &[u8]| {
517            sink.lock().unwrap().push((path, bytes.to_vec()));
518        }));
519
520        // The outbound tee passes `p.as_ref()` as-given; the bytes need not be a valid IP packet.
521        let payload: Vec<u8> = vec![0xde, 0xad, 0xbe, 0xef];
522        let packet = PacketMut::from(payload.clone());
523
524        drop(dp.process_outbound(vec![packet]));
525
526        let captured = recorded.lock().unwrap();
527        assert_eq!(captured.len(), 1, "hook must fire exactly once per packet");
528        assert_eq!(captured[0].0, CapturePath::FromLocal);
529        assert_eq!(captured[0].1, payload);
530    }
531}