Skip to main content

ts_runtime/
lib.rs

1#![doc = include_str!("../README.md")]
2
3extern crate ts_netstack_smoltcp as netstack;
4
5use core::time::Duration;
6use std::sync::Arc;
7
8use kameo::{
9    actor::{ActorRef, Spawn, WeakActorRef},
10    mailbox::Signal,
11};
12use netstack::netcore::Channel;
13use tokio::sync::watch;
14
15use crate::{
16    control_runner::ControlRunner, dataplane::DataplaneActor, direct::DirectManager,
17    forwarder_actor::ForwarderActor, multiderp::Multiderp, netstack_actor::NetstackActor,
18};
19
20/// Pcap stream framer for debug packet capture (`CapturePcap`).
21pub mod capture;
22/// Control runner.
23pub mod control_runner;
24mod dataplane;
25mod derp_latency;
26/// Device connection-state tracking ([`DeviceState`]) and typed registration outcome
27/// ([`RegistrationError`]).
28pub mod device_state;
29mod direct;
30mod env;
31mod error;
32/// Fallback TCP handler registry (`tsnet.Server.RegisterFallbackTCPHandler` parity).
33pub mod fallback_tcp;
34mod forwarder_actor;
35/// Client-side Funnel ingress termination (`tsnet`'s `ListenFunnel` data path).
36pub mod funnel;
37/// Unified IPN notification bus ([`Notify`] / [`watch_ipn_bus`](Runtime::watch_ipn_bus)), mirroring
38/// Go `ipn` `LocalBackend.WatchNotifications` / the `WatchIPNBus` LocalAPI.
39pub mod ipn_bus;
40mod magic_dns;
41mod multiderp;
42mod netstack_actor;
43mod packetfilter;
44pub mod peer_tracker;
45mod peerapi;
46mod peerapi_doh;
47mod route_updater;
48/// Stored Serve config + accept-loop runtime (`tsnet`'s `Get/SetServeConfig` + serving runtime).
49pub mod serve;
50mod src_filter;
51/// Netmap status snapshot, WhoIs, and watcher types.
52pub mod status;
53/// Taildrop peer-to-peer file transfer store.
54pub mod taildrop;
55pub mod taildrop_send;
56/// Tailnet-Lock (TKA) chain-sync orchestration: bootstrap + offer/send driver (the runtime layer
57/// that bridges the `ts_control` sync RPCs and the `ts_tka` chain logic).
58mod tka_sync;
59#[cfg(feature = "tun")]
60mod tun_actor;
61
62pub use device_state::{DeviceState, RegistrationError};
63pub(crate) use env::Env;
64pub use error::{Error, ErrorKind};
65pub use ipn_bus::{IpnBusWatcher, Notify, NotifyWatchOpt};
66pub use status::{FileTarget, NetcheckReport, RegionLatency, Status, StatusNode, WhoIs};
67pub use ts_dataplane::{CaptureHook, CapturePath};
68
69use crate::peer_tracker::PeerTracker;
70
71/// The runtime for a tailscale device.
72pub struct Runtime {
73    /// Reference to the control actor.
74    pub control: ActorRef<ControlRunner>,
75    dataplane: ActorRef<DataplaneActor>,
76    /// Reference to the direct (disco/UDP underlay) manager, retained so [`Runtime::rebind`] can
77    /// ask it to re-bind the underlay socket on a network/link change.
78    direct: ActorRef<DirectManager>,
79    /// Reference to the application netstack actor. `None` in TUN transport mode, where there is
80    /// no userspace application netstack (the application data path is a real kernel TUN device).
81    netstack: Option<WeakActorRef<NetstackActor>>,
82    /// Reference to the peer tracker for peer lookups.
83    pub peer_tracker: WeakActorRef<PeerTracker>,
84    /// Fallback TCP handler registry, bound to the application netstack. `None` in TUN transport
85    /// mode (no application netstack exists to attach it to).
86    fallback_tcp: Option<fallback_tcp::FallbackTcpManager>,
87    /// Reference to the forwarder actor, retained so [`Runtime::set_advertise_routes`] can push a
88    /// new accept/dial route table onto the running forwarder (the local half of advertising
89    /// routes). Without this the strong ref would drop after the startup `GetChannel` and the
90    /// forwarder would be reachable only via the message bus.
91    forwarder: ActorRef<ForwarderActor>,
92    /// Reference to the multiderp manager, retained so [`Runtime::status`] can resolve each
93    /// relayed peer's DERP region id to its region **code** (`ipnstate.PeerStatus.Relay`). Without
94    /// this the strong ref would drop after startup (it is cloned into the direct manager + route
95    /// updater) and the region-code map would be unreachable.
96    multiderp: ActorRef<Multiderp>,
97    env: Env,
98    shutdown: watch::Sender<bool>,
99    /// Sender side of the exit-node selector `watch` cell. Held privately here (not on the cloned
100    /// `Env`, which keeps only the read side) so that only `Runtime::set_exit_node` can mutate the
101    /// selection; the route updater and source filter re-read it via [`Env::exit_node`].
102    exit_node_tx: watch::Sender<Option<ts_control::ExitNodeSelector>>,
103    /// Receiver mirroring the *active* (resolved + fail-closed) exit node's stable id, fed by the
104    /// route updater. Read by [`Runtime::status`] / [`Runtime::active_exit_node`] to report which
105    /// exit node traffic is actually egressing through (vs. the merely-configured selector).
106    active_exit_rx: watch::Receiver<Option<ts_control::StableNodeId>>,
107    /// Receiver for the device connection-state cell, fed by the control runner. Read by
108    /// [`Runtime::watch_state`] and [`Runtime::wait_until_running`].
109    state_rx: watch::Receiver<DeviceState>,
110    /// Receiver for the retained peer-capability grants, fed by the packet-filter updater. Read by
111    /// [`Runtime::whois`] to resolve the flow-scoped cap map (Go `apitype.WhoIsResponse.CapMap`).
112    cap_grants_rx: watch::Receiver<packetfilter::CapGrants>,
113    /// Live advertised-route preference (explicit subnet routes + the exit-node flag), seeded from
114    /// the startup config. [`Runtime::set_advertise_routes`] and [`set_advertise_exit_node`] each
115    /// mutate their part under this lock then re-send the composed set, so the two compose.
116    advertise: std::sync::Mutex<AdvertiseState>,
117}
118
119impl Runtime {
120    /// Spawn a new runtime with the given parameters for connecting to a tailnet.
121    pub async fn spawn(
122        config: ts_control::Config,
123        auth_key: Option<String>,
124        keys: ts_keys::NodeState,
125    ) -> Result<Self, Error> {
126        let (shutdown_tx, shutdown_rx) = watch::channel(false);
127
128        // The exit-node selector is a live `watch` cell so `Device::set_exit_node` can change it at
129        // runtime. `new_with_exit_tx` returns the `Sender` (mutation capability) separately so it is
130        // retained privately on the `Runtime`, while only the `Receiver` (the readers' contract)
131        // lives on the cloned `Env`. The initial value comes from `ForwarderConfig.exit_node`.
132        let (env, exit_node_tx) = Env::new_with_exit_tx(
133            keys,
134            shutdown_rx,
135            env::ForwarderConfig::from_control_config(&config),
136        );
137
138        // Both userspace netstacks (application + forwarder) share one netstack config. Honor the
139        // per-deployment TCP buffer knob when set, otherwise fall back to the netstack default.
140        let netstack_config = netstack_config_from(config.tcp_buffer_size);
141
142        let dataplane = DataplaneActor::spawn(env.clone());
143
144        let (netstack_id, netstack_up, netstack_down) =
145            dataplane.ask(dataplane::NewOverlayTransport).await?;
146
147        // A second overlay transport feeds the dedicated any-IP forwarder netstack. Inbound packets
148        // for advertised subnet routes / the exit-node default route are routed here (see
149        // `route_updater`), keeping forwarded flows off the application netstack.
150        let (forwarder_id, forwarder_up, forwarder_down) =
151            dataplane.ask(dataplane::NewOverlayTransport).await?;
152
153        let multiderp = Multiderp::spawn((env.clone(), dataplane.clone()));
154
155        // Spawn the direct (disco) underlay manager before the route updater. Its `on_start`
156        // binds the UDP socket and registers its transport synchronously, so by the time the
157        // route updater asks it for the direct transport id it is guaranteed to be available.
158        let direct = DirectManager::spawn((env.clone(), dataplane.clone(), multiderp.clone()));
159
160        // Spawn the forwarder before the route updater. Its `on_start` builds the forwarder
161        // netstack, enables any-IP acceptance, and starts the per-port accept loops synchronously,
162        // so by the time the route updater begins delivering advertised prefixes to
163        // `forwarder_id` the netstack is already draining its transport.
164        let forwarder = ForwarderActor::spawn((
165            env.clone(),
166            netstack_config.clone(),
167            forwarder_up,
168            forwarder_down,
169        ));
170        // Force `on_start` to finish (any-IP enabled, accept loops live) before the route updater
171        // can route the first inbound flow to `forwarder_id`: an `ask` blocks until the actor has
172        // started.
173        //
174        // The forwarder netstack's overlay `Channel` is reused by the TUN application path for
175        // recursive / exit-node-DoH MagicDNS forwarding (TUN mode has no application netstack of its
176        // own, but the forwarder netstack runs in both modes and egresses over the overlay — the
177        // anti-leak property `forward_query`/`forward_doh` require). Only the `tun` Tun arm consumes
178        // it, so it is unused when the `tun` feature is off — allow that without warn-as-error.
179        #[cfg_attr(not(feature = "tun"), allow(unused_variables))]
180        let (forwarder_channel,) = forwarder.ask(forwarder_actor::GetChannel).await?;
181
182        // The route updater is the single authoritative resolver of the active (resolved,
183        // fail-closed) exit node; it publishes the resolved stable id into this watch cell so
184        // `Runtime::status` can report which exit is actually engaged (not just configured).
185        let (active_exit_tx, active_exit_rx) = watch::channel(None);
186        route_updater::RouteUpdater::spawn((
187            multiderp.clone(),
188            direct.clone(),
189            env.clone(),
190            netstack_id,
191            forwarder_id,
192            active_exit_tx,
193        ));
194        // The packet-filter updater also surfaces the retained cap-grants (for flow-scoped WhoIs)
195        // through a `watch` cell whose receiver the `Runtime` holds — the bus has no replay, so a
196        // `watch` is how `Runtime::whois` reads the current grants on demand.
197        let (cap_grants_tx, cap_grants_rx) = watch::channel(Default::default());
198        packetfilter::PacketfilterUpdater::spawn((env.clone(), cap_grants_tx));
199        src_filter::SourceFilterUpdater::spawn(env.clone());
200        let peer_tracker = PeerTracker::spawn(env.clone()).downgrade();
201
202        // Select the application data path from the transport mode. The forwarder/egress path
203        // above is UNCHANGED in both modes — TUN mode only swaps the application data path, never
204        // the forwarder. `config` is moved into `ControlRunner::spawn` below, so branch on a
205        // borrow and clone the small `TunConfig` where needed before the move.
206        //
207        // - Netstack (the default, and the only reachable arm when the `tun` feature is off):
208        //   spawn the application netstack + MagicDNS responder + fallback-TCP registry, all on
209        //   the `netstack_up`/`netstack_down` overlay seam.
210        // - Tun: spawn `TunActor` on that same overlay seam instead; no application netstack and
211        //   no MagicDNS responder exist, and `netstack`/`fallback_tcp` are `None`.
212        // - Tun requested but built without the `tun` feature: hard-error (a config/build
213        //   mismatch knowable at spawn time). NEVER silently fall back to netstack.
214        let (netstack, fallback_tcp) = match &config.transport_mode {
215            ts_control::TransportMode::Netstack => {
216                let netstack = NetstackActor::spawn((
217                    env.clone(),
218                    netstack_config,
219                    netstack_up,
220                    netstack_down,
221                ));
222
223                // Fetch the netstack channel while we still hold the strong ActorRef, then spawn
224                // the MagicDNS responder on it. Fire-and-forget: like src_filter/route_updater,
225                // it's owned by the message bus and isn't stored on `Runtime`.
226                let (channel,) = netstack.ask(netstack_actor::GetChannel).await?;
227                // The fallback-TCP registry attaches to the application netstack — the same one
228                // that carries the embedder's explicit `Device::tcp_listen` sockets — so a
229                // fallback handler sees exactly the inbound flows no explicit listener matched.
230                let fallback_tcp = fallback_tcp::FallbackTcpManager::new(channel.clone());
231                magic_dns::MagicDnsActor::spawn((env.clone(), channel));
232
233                (Some(netstack.downgrade()), Some(fallback_tcp))
234            }
235
236            #[cfg(feature = "tun")]
237            ts_control::TransportMode::Tun(tun_cfg) => {
238                // Reuse the same `netstack_up`/`netstack_down` overlay-transport pair that would
239                // have fed the netstack — it is just the application-side overlay seam (the name
240                // is historical). No NetstackActor / MagicDnsActor is spawned.
241                tun_actor::TunActor::spawn((
242                    env.clone(),
243                    tun_cfg.clone(),
244                    netstack_up,
245                    netstack_down,
246                    // Host-route gating inputs derived from `Env`: subnet routes are only steered
247                    // into the TUN when `--accept-routes` is set, and the host `/0` only when the
248                    // embedder configured an exit node. See `tun_actor::host_routes_from_node`.
249                    tun_actor::HostRouteGating {
250                        accept_routes: env.accept_routes,
251                        exit_node_configured: env.exit_node().is_some(),
252                    },
253                    // Reuse the forwarder netstack's overlay `Channel` for recursive / exit-node-DoH
254                    // MagicDNS forwarding in the TUN datapath (TUN mode has no application netstack
255                    // Channel of its own). Egresses over the overlay — anti-leak preserved.
256                    forwarder_channel.clone(),
257                ));
258
259                (None, None)
260            }
261
262            #[cfg(not(feature = "tun"))]
263            ts_control::TransportMode::Tun(_) => {
264                return Err(Error {
265                    kind: ErrorKind::TunUnavailable,
266                    target_actor: None,
267                    message_ty: None,
268                });
269            }
270        };
271
272        // Device connection-state cell. Created here (not inside the actor) so the control runner's
273        // `on_start` can publish `Failed`/`NeedsLogin` and still return `Err` without the sender
274        // being tied to a `Self` that never gets constructed on a hard registration failure.
275        let (state_tx, state_rx) = watch::channel(DeviceState::Connecting);
276
277        // Seed the live advertised-route preference from the startup config before `config` moves
278        // into the control runner, so the runtime setters compose against the configured baseline.
279        let advertise = std::sync::Mutex::new(AdvertiseState {
280            routes: config.advertise_routes.clone(),
281            exit_node: config.advertise_exit_node,
282        });
283
284        let control = ControlRunner::spawn(control_runner::Params {
285            config,
286            auth_key,
287            env: env.clone(),
288            state_tx,
289        });
290
291        Ok(Self {
292            control,
293            dataplane,
294            direct,
295            peer_tracker,
296            fallback_tcp,
297            forwarder,
298            multiderp,
299            netstack,
300            env,
301            shutdown: shutdown_tx,
302            exit_node_tx,
303            active_exit_rx,
304            state_rx,
305            cap_grants_rx,
306            advertise,
307        })
308    }
309
310    /// Register a fallback TCP handler consulted for every inbound TCP flow that matches no
311    /// explicit listener (`tsnet.Server.RegisterFallbackTCPHandler` parity).
312    ///
313    /// The returned [`fallback_tcp::FallbackTcpHandle`] deregisters the handler when dropped. See
314    /// [`fallback_tcp`] for the dispatch contract and anti-leak guarantees.
315    ///
316    /// Returns [`ErrorKind::UnsupportedInTunMode`] in TUN transport mode, where there is no
317    /// application netstack to attach a fallback handler to.
318    pub fn register_fallback_tcp_handler(
319        &self,
320        cb: Arc<
321            dyn Fn(core::net::SocketAddr, core::net::SocketAddr) -> fallback_tcp::FallbackDecision
322                + Send
323                + Sync,
324        >,
325    ) -> Result<fallback_tcp::FallbackTcpHandle, Error> {
326        Ok(self
327            .fallback_tcp
328            .as_ref()
329            .ok_or(Error {
330                kind: ErrorKind::UnsupportedInTunMode,
331                target_actor: None,
332                message_ty: None,
333            })?
334            .register(cb))
335    }
336
337    /// Get a channel to send commands to the netstack.
338    ///
339    /// Returns [`ErrorKind::UnsupportedInTunMode`] in TUN transport mode, where there is no
340    /// application netstack.
341    pub async fn channel(&self) -> Result<Channel, Error> {
342        let (channel,) = self
343            .netstack
344            .as_ref()
345            .ok_or(Error {
346                kind: ErrorKind::UnsupportedInTunMode,
347                target_actor: None,
348                message_ty: None,
349            })?
350            .upgrade()
351            .ok_or(Error {
352                kind: ErrorKind::ActorGone,
353                target_actor: None,
354                message_ty: None,
355            })?
356            .ask(netstack_actor::GetChannel)
357            .await?;
358
359        Ok(channel)
360    }
361
362    /// The Taildrop file store, if Taildrop is enabled (`taildrop_dir` configured and the store
363    /// initialized). `None` when disabled — fail-closed. Shared with the peerAPI Taildrop server so
364    /// the embedder's read APIs and the receive path see the same on-disk store.
365    pub fn taildrop_store(&self) -> Option<Arc<crate::taildrop::TaildropStore>> {
366        self.env.taildrop_store.clone()
367    }
368
369    /// The shared Funnel ingress slot the peerAPI `/v0/ingress` route reads per connection.
370    ///
371    /// `Device::listen_funnel` installs a [`FunnelManager`](crate::funnel::FunnelManager)'s sink here
372    /// to make the route live (the peerAPI server is already running from startup). Returns a clone of
373    /// the runtime-lifetime `Arc` so the device can write the slot without restarting the server. See
374    /// [`crate::funnel`] for the ingress data path.
375    pub fn funnel_ingress_slot(&self) -> crate::funnel::FunnelIngressSlot {
376        self.env.funnel_ingress.clone()
377    }
378
379    /// The shared "Funnel ingress listener active" flag (the same `Arc` the control session reads to
380    /// set `HostInfo.IngressEnabled`). `Device::listen_funnel` flips it `true` while a funnel listener
381    /// is up so control routes Funnel traffic to this node; clearing it advertises no live endpoint.
382    pub fn ingress_active_flag(&self) -> std::sync::Arc<std::sync::atomic::AtomicBool> {
383        self.env.ingress_active.clone()
384    }
385
386    /// Install (`Some`) or clear (`None`) the debug packet-capture hook on the running dataplane.
387    /// `Some(hook)` tees every plaintext packet crossing the datapath to `hook` until it is cleared;
388    /// `None` stops capture. Mirrors Go `tstun.Wrapper.InstallCaptureHook` / `ClearCaptureSink`.
389    pub async fn install_capture(
390        &self,
391        hook: Option<ts_dataplane::CaptureHook>,
392    ) -> Result<(), Error> {
393        self.dataplane
394            .ask(dataplane::InstallCapture { hook })
395            .await
396            .map_err(Into::into)
397    }
398
399    /// Re-bind the underlay UDP socket after a network/link change (Wi-Fi switch, sleep/wake). The
400    /// embedder's own link monitor calls this (the engine owns the socket re-bind; the embedder owns
401    /// OS netmon). Re-binds the socket (same-port-preferred, IPv4-only invariant preserved) and
402    /// resets the now-stale local NAT mapping — clearing learned reflexive addresses and every
403    /// confirmed direct path while keeping candidate endpoints, so peers re-probe over the new socket
404    /// and relay over DERP (never a direct host dial) until a path re-confirms. Peers, control, the
405    /// netmap, disco state, and DERP are untouched. A no-op when the underlay is inert (bind failed
406    /// at startup, DERP-only). Mirrors Go magicsock `Conn.Rebind` + `resetEndpointStates`.
407    pub async fn rebind(&self) -> Result<(), Error> {
408        self.direct.ask(direct::Rebind).await.map_err(Error::from)
409    }
410
411    /// A snapshot of the local netmap: this node plus every known peer.
412    ///
413    /// Combines the self node held by the control runner with the peer set held by the peer
414    /// tracker. Mirrors tsnet's `LocalClient::Status`.
415    ///
416    /// `self_node` is `None` until the first netmap update has been received from control. Peer
417    /// entries carry no online/user/capability data (see the [`status`] module docs for that gap).
418    pub async fn status(&self) -> Result<Status, Error> {
419        let self_node_domain = self.control.ask(control_runner::SelfNode).await?;
420        // The MagicDNS suffix is the self node's FQDN minus its host label — already split into
421        // `Node.tailnet` at decode time (Go derives it the same way in `NetworkMap.MagicDNSSuffix`).
422        // Capture it before the domain `Node` is mapped away into a `StatusNode`.
423        let magic_dns_suffix = self_node_domain.as_ref().and_then(|n| n.tailnet.clone());
424        let self_node = self_node_domain.as_ref().map(StatusNode::from_node);
425
426        let peers_with_ids = self
427            .peer_tracker
428            .upgrade()
429            .ok_or(Error {
430                kind: ErrorKind::ActorGone,
431                target_actor: None,
432                message_ty: None,
433            })?
434            .ask(peer_tracker::GetStatus)
435            .await?;
436
437        // Join per-peer connectivity (Go `PeerStatus.CurAddr`): one batched query to the direct
438        // manager for every peer's current trusted direct endpoint, then fill `cur_addr` on each
439        // `StatusNode`. A peer absent from the map is relayed via DERP (`cur_addr = None`). This is a
440        // live snapshot — the direct path can expire/re-confirm between calls (matches Go's snapshot
441        // semantics). The `watch_netmap` stream intentionally carries no connectivity (it is a netmap
442        // watch, not a path-state watch, and does not re-fire on direct↔relay flips).
443        let ids: Vec<ts_transport::PeerId> = peers_with_ids.iter().map(|(id, _)| *id).collect();
444        let best_addrs = self
445            .direct
446            .ask(direct::BestAddrs { ids: ids.clone() })
447            .await
448            .unwrap_or_default();
449
450        // For the peers with NO direct path (relayed via DERP), resolve the region CODE they relay
451        // through (Go `PeerStatus.Relay`). One batched ask to multiderp; `cur_addr` and `relay` are
452        // mutually exclusive for a routed peer, mirroring Go's empty-vs-set strings.
453        let relay_ids: Vec<ts_transport::PeerId> = ids
454            .into_iter()
455            .filter(|id| !best_addrs.contains_key(id))
456            .collect();
457        let relay_codes = if relay_ids.is_empty() {
458            Default::default()
459        } else {
460            self.multiderp
461                .ask(multiderp::RelayCodesForPeers { ids: relay_ids })
462                .await
463                .unwrap_or_default()
464        };
465
466        let peers = peers_with_ids
467            .into_iter()
468            .map(|(id, mut node)| match best_addrs.get(&id).copied() {
469                Some(addr) => {
470                    node.cur_addr = Some(addr);
471                    node
472                }
473                None => {
474                    node.relay = relay_codes.get(&id).cloned();
475                    node
476                }
477            })
478            .collect();
479
480        Ok(Status {
481            self_node,
482            peers,
483            active_exit_node: self.active_exit_node(),
484            magic_dns_suffix,
485        })
486    }
487
488    /// List the tailnet peers this node can Taildrop a file *to* (Go LocalAPI `FileTargets`).
489    ///
490    /// Mirrors the upstream send-path filter (`feature/taildrop` `Extension::FileTargets`): a peer
491    /// qualifies when it advertises a reachable peerAPI **and** is either owned by the same user as
492    /// this node **or** explicitly granted the file-sharing-target capability. The whole list is
493    /// gated on this node holding the file-sharing capability (control sets it when the admin enables
494    /// Taildrop) — absent that, an empty list (fail-closed, not an error, matching how the receive
495    /// store returns empty when disabled). Results are sorted by the peer's MagicDNS name.
496    ///
497    /// Targets are listed regardless of current online state (upstream's `FileTargets` does not gate
498    /// on online either; an offline target's send will simply time out). The self node is never
499    /// included. Returns empty before the first netmap.
500    ///
501    /// Divergence from Go: the upstream filter also excludes `tvOS` peers, which this fork cannot
502    /// reproduce (the domain node carries no OS string); the impact is negligible — the actual send
503    /// fail-closes if such a peer refused the transfer.
504    pub async fn file_targets(&self) -> Result<Vec<FileTarget>, Error> {
505        // Node-level gate: this node must hold the file-sharing capability (Taildrop enabled by the
506        // admin). Read it off the self node's cap map, like Go's `hasCapFileSharing()`.
507        let self_node = self.control.ask(control_runner::SelfNode).await?;
508        let Some(self_node) = self_node else {
509            return Ok(Vec::new()); // no netmap yet
510        };
511        if !self_node.can_share_files() {
512            return Ok(Vec::new()); // Taildrop not enabled for the tailnet — fail-closed
513        }
514        let self_user_id = self_node.user_id;
515
516        let peers = self
517            .peer_tracker
518            .upgrade()
519            .ok_or(Error {
520                kind: ErrorKind::ActorGone,
521                target_actor: None,
522                message_ty: None,
523            })?
524            .ask(peer_tracker::AllPeers)
525            .await?;
526
527        // Eligibility + ordering live in `build_file_targets` (pure, unit-tested in `status`).
528        Ok(status::build_file_targets(peers, self_user_id))
529    }
530
531    /// The stable id of the exit node traffic is currently egressing through, or `None` if none is
532    /// engaged. This is the route updater's resolved + fail-closed answer (see
533    /// [`Status::active_exit_node`](crate::status::Status::active_exit_node)): it differs from the
534    /// configured [`exit_node`](Self::exit_node) selector, which may name a peer that is absent or
535    /// no longer advertising a default route (in which case egress is dropped and this returns
536    /// `None`).
537    pub fn active_exit_node(&self) -> Option<ts_control::StableNodeId> {
538        self.active_exit_rx.borrow().clone()
539    }
540
541    /// Request an OIDC ID token from control scoped to `audience` (workload-identity federation).
542    ///
543    /// Returns the signed JWT, or the token RPC's own [`ts_control::IdTokenError`]. The kameo
544    /// delegated-reply send error is flattened: a handler error carries the real `IdTokenError`,
545    /// any other send failure (actor shutdown / mailbox closed) is surfaced as
546    /// [`ts_control::IdTokenError::NetworkError`].
547    pub async fn fetch_id_token(
548        &self,
549        audience: String,
550    ) -> Result<String, ts_control::IdTokenError> {
551        self.control
552            .ask(control_runner::FetchIdToken { audience })
553            .await
554            .map_err(flatten_send_err)
555    }
556
557    /// Log this node out of the tailnet: deregister it by expiring its current node key.
558    ///
559    /// Forwards to the control runner, which re-POSTs `/machine/register` with a past expiry over a
560    /// fresh Noise channel. This is a control-plane state change only — it does NOT shut the runtime
561    /// down (the caller follows with [`graceful_shutdown`](Self::graceful_shutdown)) and does not
562    /// touch the on-disk node key. The kameo delegated-reply send error is flattened the same way as
563    /// [`fetch_id_token`](Self::fetch_id_token): a handler error carries the real
564    /// [`ts_control::LogoutError`]; any other send failure (actor shutdown / mailbox closed) is
565    /// surfaced as [`ts_control::LogoutError::NetworkError`].
566    pub async fn logout(&self) -> Result<(), ts_control::LogoutError> {
567        self.control
568            .ask(control_runner::Logout)
569            .await
570            .map_err(flatten_logout_send_err)
571    }
572
573    /// Publish a `TXT` DNS record for this node via control's `/machine/set-dns` (Go
574    /// `LocalClient.SetDNS`).
575    ///
576    /// Forwards to the control runner, which POSTs the record over a fresh Noise channel. The kameo
577    /// delegated-reply send error is flattened the same way as [`fetch_id_token`](Self::fetch_id_token):
578    /// a handler error carries the real [`ts_control::SetDnsError`]; any other send failure (actor
579    /// shutdown / mailbox closed) is surfaced as [`ts_control::SetDnsError::NetworkError`].
580    pub async fn set_dns(
581        &self,
582        name: String,
583        value: String,
584    ) -> Result<(), ts_control::SetDnsError> {
585        self.control
586            .ask(control_runner::SetDns { name, value })
587            .await
588            .map_err(flatten_set_dns_send_err)
589    }
590
591    /// Issue a real Let's Encrypt certificate for this node's MagicDNS `name` (`acme` feature).
592    ///
593    /// Mirrors [`fetch_id_token`](Self::fetch_id_token): forwards to the control runner, which runs
594    /// the client-side ACME DNS-01 flow on a spawned task and publishes the challenge TXT via the
595    /// node's set-dns RPC. The kameo delegated-reply send error is flattened — a handler error
596    /// carries the real [`ts_control::CertError`]; any other send failure (actor shutdown / mailbox
597    /// closed) is surfaced as a [`ts_control::CertError::Io`]. SaaS-only: a self-hosted control
598    /// plane 501s on set-dns.
599    #[cfg(feature = "acme")]
600    pub async fn get_certificate(
601        &self,
602        name: String,
603    ) -> Result<ts_control::tls::CertifiedKey, ts_control::CertError> {
604        self.control
605            .ask(control_runner::GetCertificate { name })
606            .await
607            .map_err(flatten_cert_send_err)
608    }
609
610    /// Resolve which node owns a tailnet source address.
611    ///
612    /// Maps the destination IP of `addr` to its owning node. Mirrors tsnet's `LocalClient::WhoIs`.
613    /// Returns `None` if no peer holds that tailnet IP.
614    ///
615    /// The returned [`WhoIs`] additionally carries the **flow-scoped** peer-capability grants
616    /// ([`WhoIs::cap_map`], Go `apitype.WhoIsResponse.CapMap`): the caps control's packet-filter
617    /// application rules authorize for traffic from THIS node (the flow source) to `addr` (the
618    /// destination). Empty when no grant matches. (The node-level cap map rides
619    /// [`WhoIs::capabilities`].)
620    pub async fn whois(&self, addr: core::net::SocketAddr) -> Result<Option<WhoIs>, Error> {
621        let whois = self
622            .peer_tracker
623            .upgrade()
624            .ok_or(Error {
625                kind: ErrorKind::ActorGone,
626                target_actor: None,
627                message_ty: None,
628            })?
629            .ask(peer_tracker::Whois { addr })
630            .await?;
631
632        let Some(mut whois) = whois else {
633            return Ok(None);
634        };
635
636        // Fill the flow-scoped cap map: src = this node's own tailnet IP (of the dst's family),
637        // dst = the queried address. A grant applies when its source matches the flow source — `src`
638        // ∈ its src prefixes OR this node holds one of its source node-caps — AND `dst` ∈ its dst
639        // prefixes (Go `Filter.CapsWithValues`). Resolve our own IP + cap map from the self node; if
640        // it isn't known yet, leave the map empty (no grants resolvable without a source).
641        let dst = addr.ip();
642        if let Some(self_node) = self.control.ask(control_runner::SelfNode).await? {
643            let src: core::net::IpAddr = if dst.is_ipv6() {
644                self_node.tailnet_address.ipv6.addr().into()
645            } else {
646                self_node.tailnet_address.ipv4.addr().into()
647            };
648            let grants = self.cap_grants_rx.borrow();
649            whois.cap_map = ts_packetfilter_state::caps_for(&grants, src, dst, |cap| {
650                self_node.has_node_attr(cap)
651            });
652        }
653
654        Ok(Some(whois))
655    }
656
657    /// The current direct-path status to the peer holding tailnet IP `dst`: its confirmed direct UDP
658    /// endpoint and that path's last-measured RTT, or `None` when there is no direct path right now
659    /// (the peer is relayed via DERP, is unknown, or has no disco key).
660    ///
661    /// The latency is the RTT of the most recent disco ping/pong that confirmed the path — a live
662    /// snapshot up to one probe interval stale, NOT a fresh on-demand round-trip (that is a separate,
663    /// heavier capability). Mirrors the direct-path latency Go surfaces for `ipnstate.PeerStatus`.
664    pub async fn direct_path(
665        &self,
666        dst: core::net::IpAddr,
667    ) -> Result<Option<(core::net::SocketAddr, Duration)>, Error> {
668        let peer_tracker = self.peer_tracker.upgrade().ok_or(Error {
669            kind: ErrorKind::ActorGone,
670            target_actor: None,
671            message_ty: None,
672        })?;
673
674        // Resolve the tailnet IP to its node, then to its disco key. No node / no disco key ⇒ no
675        // direct path is possible (a peer with no disco key can only be reached via DERP).
676        let Some(node) = peer_tracker
677            .ask(peer_tracker::PeerByTailnetIp { ip: dst })
678            .await?
679        else {
680            return Ok(None);
681        };
682        let Some(disco) = node.disco_key else {
683            return Ok(None);
684        };
685
686        self.direct
687            .ask(direct::DirectPathLatency { disco })
688            .await
689            .map_err(Into::into)
690    }
691
692    /// Send a disco ping to the peer holding tailnet IP `dst` **now** and await the pong, returning
693    /// the fresh round-trip latency and the endpoint that answered, or `None` if no pong arrives
694    /// within `timeout` (or the peer is unknown / has no disco key / no candidate path). This is the
695    /// true on-demand `PingType::Disco` (Go `tailscale ping`), as opposed to
696    /// [`direct_path`](Self::direct_path) which reports the last periodic probe's RTT.
697    ///
698    /// The ping round-trip is awaited OFF the direct manager's mailbox (we take a `MagicSock` handle
699    /// and await on it directly), so a slow/timing-out ping never blocks the actor.
700    pub async fn ping_disco(
701        &self,
702        dst: core::net::IpAddr,
703        timeout: Duration,
704    ) -> Result<Option<(core::net::SocketAddr, Duration)>, Error> {
705        let peer_tracker = self.peer_tracker.upgrade().ok_or(Error {
706            kind: ErrorKind::ActorGone,
707            target_actor: None,
708            message_ty: None,
709        })?;
710
711        let Some(node) = peer_tracker
712            .ask(peer_tracker::PeerByTailnetIp { ip: dst })
713            .await?
714        else {
715            return Ok(None);
716        };
717        let Some(disco) = node.disco_key else {
718            return Ok(None);
719        };
720
721        // Cheap synchronous handle fetch, then await the ping OFF the actor mailbox.
722        let Some(sock) = self.direct.ask(direct::SockHandle).await? else {
723            return Ok(None);
724        };
725        // A `ping_now` error is an underlay UDP send failure (not an actor problem); surface it as a
726        // reply-level error. A timed-out / unanswered ping is `Ok(None)`, not an error.
727        sock.ping_now(&disco, timeout).await.map_err(|_| Error {
728            kind: ErrorKind::ReplyErr,
729            target_actor: None,
730            message_ty: None,
731        })
732    }
733
734    /// Change the selected exit node at runtime (the equivalent of Go `tsnet`'s
735    /// `LocalClient.EditPrefs(ExitNodeID/ExitNodeIP)`), without recreating the device.
736    ///
737    /// Updates the live exit-node selector, then asks the peer tracker to re-broadcast the current
738    /// peer set so the route updater and source filter re-resolve the new selector immediately.
739    /// `None` clears the exit node (internet-bound traffic is then dropped, fail-closed, unless this
740    /// node egresses directly). The selection is re-resolved against the live peer set, so passing a
741    /// selector for a peer not yet in the netmap simply takes effect once that peer appears.
742    pub async fn set_exit_node(
743        &self,
744        selector: Option<ts_control::ExitNodeSelector>,
745    ) -> Result<(), Error> {
746        // Update the live cell every reader borrows from. `send_replace` keeps the value current
747        // even with no active receivers (none can have dropped while the runtime is up, but it is
748        // the right non-failing primitive here).
749        self.exit_node_tx.send_replace(selector);
750
751        // Trigger an immediate re-resolution: the route updater (outbound routes + DoH delegation)
752        // and the source filter (inbound validation) both recompute on an `Arc<PeerState>`, so a
753        // re-broadcast applies the new exit without waiting for the next netmap update.
754        self.peer_tracker
755            .upgrade()
756            .ok_or(Error {
757                kind: ErrorKind::ActorGone,
758                target_actor: None,
759                message_ty: None,
760            })?
761            .ask(peer_tracker::RepublishState)
762            .await
763            .map_err(Into::into)
764    }
765
766    /// The currently-selected exit node, or `None` if none is selected.
767    pub fn exit_node(&self) -> Option<ts_control::ExitNodeSelector> {
768        self.env.exit_node()
769    }
770
771    /// Change the set of subnet routes this node advertises at runtime (Go `tailscale set
772    /// --advertise-routes`). Applies BOTH halves together so the wire and the data path agree:
773    ///
774    /// 1. **Wire** — re-advertise `Hostinfo.RoutableIPs` to control on the live map-poll connection
775    ///    (so control grants the node the subnet-router role for exactly these prefixes).
776    /// 2. **Local** — swap the forwarder's accept/dial route table (so the node actually forwards the
777    ///    prefixes it advertises). New flows see the new set; in-flight flows keep their routing.
778    ///
779    /// `routes` is filtered to the IPv4-only, deduplicated set this fork can honor (IPv6 prefixes are
780    /// dropped under the IPv6-off posture — we never advertise a route we won't forward), so the wire
781    /// and forwarder are fed the identical final set. This sets the explicit subnet prefixes only; it
782    /// does NOT touch the exit-node `0.0.0.0/0` advertisement (a separate concern).
783    pub async fn set_advertise_routes(&self, routes: Vec<ipnet::IpNet>) -> Result<(), Error> {
784        // Update the explicit-subnet part of the live preference, keep the exit-node flag, and
785        // re-send the composed set. Composes with `set_advertise_exit_node` (neither clobbers the
786        // other's contribution to `Hostinfo.RoutableIPs`).
787        let composed = {
788            let mut adv = self.advertise.lock().unwrap_or_else(|p| p.into_inner());
789            adv.routes = routes;
790            compose_advertised_routes(adv.routes.clone(), adv.exit_node)
791        };
792        self.apply_advertised_routes(composed).await
793    }
794
795    /// Advertise (or stop advertising) this node as an **exit node** — the `0.0.0.0/0` default route
796    /// (Go `tailscale set --advertise-exit-node`). Composes with
797    /// [`set_advertise_routes`](Self::set_advertise_routes): toggling the exit node re-sends the
798    /// explicit subnet routes plus (when `enable`) `0.0.0.0/0`, so the two preferences are
799    /// independent. Like `set_advertise_routes`, this both re-advertises `Hostinfo.RoutableIPs` to
800    /// control AND updates the forwarder's accept/dial set, applied together. Control still gates
801    /// whether the advertised exit node is actually *usable* by peers (this only advertises it).
802    pub async fn set_advertise_exit_node(&self, enable: bool) -> Result<(), Error> {
803        let composed = {
804            let mut adv = self.advertise.lock().unwrap_or_else(|p| p.into_inner());
805            adv.exit_node = enable;
806            compose_advertised_routes(adv.routes.clone(), adv.exit_node)
807        };
808        self.apply_advertised_routes(composed).await
809    }
810
811    /// Push a freshly-composed advertised-route set to BOTH halves: the forwarder's accept/dial
812    /// table (local) FIRST — so the node forwards a prefix before control grants it, never the
813    /// reverse — then re-advertise `Hostinfo.RoutableIPs` to control on the live map-poll connection
814    /// (wire). `composed` is already filtered + exit-node-folded by [`compose_advertised_routes`].
815    async fn apply_advertised_routes(&self, composed: Vec<ipnet::IpNet>) -> Result<(), Error> {
816        self.forwarder
817            .ask(forwarder_actor::UpdateRoutes {
818                routes: composed.clone(),
819            })
820            .await?;
821        self.control
822            .ask(control_runner::SetAdvertiseRoutes { routes: composed })
823            .await
824            .map_err(Into::into)
825    }
826
827    /// Change this node's hostname at runtime (Go `tailscale set --hostname`), re-reporting
828    /// `Hostinfo.Hostname` to control on the live map-poll connection. Hostname is display-only
829    /// (control reflects it in the netmap), so there is no dataplane half. The new value is also
830    /// what a subsequent re-registration reports, so it persists across a reconnect.
831    pub async fn set_hostname(&self, hostname: String) -> Result<(), Error> {
832        self.control
833            .ask(control_runner::SetHostname { hostname })
834            .await
835            .map_err(Into::into)
836    }
837
838    /// Subscribe to netmap peer-change events: the **narrow** peer-set view.
839    ///
840    /// Returns a [`watch::Receiver`] whose value is the current set of peer [`StatusNode`]s,
841    /// updated on every netmap state update from control. Await
842    /// [`watch::Receiver::changed`](tokio::sync::watch::Receiver::changed) to react to peers
843    /// joining, leaving, or changing. For the unified Go-`WatchIPNBus` feed that merges this with
844    /// device-state and the interactive-login URL, see [`watch_ipn_bus`](Self::watch_ipn_bus); this
845    /// method is the peer-only projection of the same underlying cell.
846    pub async fn watch_netmap(&self) -> Result<watch::Receiver<Vec<StatusNode>>, Error> {
847        self.peer_tracker
848            .upgrade()
849            .ok_or(Error {
850                kind: ErrorKind::ActorGone,
851                target_actor: None,
852                message_ty: None,
853            })?
854            .ask(peer_tracker::WatchNetmap)
855            .await
856            .map_err(Into::into)
857    }
858
859    /// The current device connection-[`DeviceState`].
860    pub fn device_state(&self) -> DeviceState {
861        self.state_rx.borrow().clone()
862    }
863
864    /// Watch the device connection-[`DeviceState`] (`Connecting` → `Running` / `NeedsLogin` /
865    /// `Expired` / `Failed`).
866    ///
867    /// Returns a [`watch::Receiver`]; await
868    /// [`changed`](tokio::sync::watch::Receiver::changed) to react push-style to control connection
869    /// transitions instead of polling [`status`](Self::status). The initial value is the current
870    /// state. Note: a transient per-reconnect dip back to `Connecting` is **not** currently
871    /// emitted (control transparently reconnects below this layer); the state reflects registration
872    /// outcome and node-key expiry.
873    pub fn watch_state(&self) -> watch::Receiver<DeviceState> {
874        self.state_rx.clone()
875    }
876
877    /// Wait until the device finishes registering, returning a typed outcome.
878    ///
879    /// Resolves `Ok(())` once the device reaches [`DeviceState::Running`]. Returns a typed
880    /// [`RegistrationError`] otherwise — the actionable distinction between "retry", "re-pair", and
881    /// "drive interactive login" that replaces polling [`ipv4_addr`](Self::ipv4_addr) in a loop:
882    /// - `AuthRejected` — bad/expired/unknown auth key. **Permanent** (re-pair).
883    /// - `NeedsLogin(url)` — interactive authorization required (no usable auth key). **Not
884    ///   permanent**: the runtime keeps retrying and will reach `Running` once the user authorizes
885    ///   the URL. An **auth-key** caller should treat this as a failure; an **interactive** caller
886    ///   should ignore this return and instead drive the flow via [`watch_state`](Self::watch_state)
887    ///   (this method returns the URL eagerly rather than blocking for the whole login).
888    /// - `NetworkUnreachable` — control unreachable. **Transient** (retry).
889    /// - `Timeout` — no settled state within `timeout`.
890    ///
891    /// `KeyExpired` is not produced by this initial wait (a node key expires only *after* it has
892    /// come up); observe post-registration expiry via [`watch_state`](Self::watch_state).
893    /// `timeout` of `None` waits indefinitely for a settled state.
894    pub async fn wait_until_running(
895        &self,
896        timeout: Option<Duration>,
897    ) -> Result<(), RegistrationError> {
898        device_state::wait_for_running(self.state_rx.clone(), timeout).await
899    }
900
901    /// Subscribe to the unified IPN notification bus (Go `ipn` `WatchIPNBus` /
902    /// `LocalBackend.WatchNotifications`).
903    ///
904    /// Returns an [`IpnBusWatcher`]; await [`next`](IpnBusWatcher::next) to receive [`Notify`]
905    /// events that coalesce device-[`DeviceState`] changes (including the interactive-login URL as
906    /// `browse_to_url`) and netmap peer-set changes into one feed. `mask`
907    /// ([`NotifyWatchOpt`]) selects which current-state fields are front-loaded as an initial
908    /// snapshot on subscribe (`INITIAL_STATE` / `INITIAL_NETMAP`), exactly like Go's
909    /// `NotifyInitialState` / `NotifyInitialNetMap`.
910    ///
911    /// This composes the same `watch` cells as [`watch_state`](Self::watch_state),
912    /// [`watch_netmap`](Self::watch_netmap), and `pop_browser_url` — one source of truth, so the
913    /// merged feed cannot diverge from those narrow views. Besides the registration-time login URL
914    /// (carried by `NeedsLogin`), `browse_to_url` also streams the mid-session
915    /// `MapResponse.PopBrowserURL` (re-auth / consent on an already-running node). Delivery is
916    /// best-effort/lossy (a bounded per-watcher buffer; a notification is dropped rather than
917    /// blocking the runtime if a slow consumer's buffer fills), matching Go's bus. The stream ends
918    /// (`next` returns `None`) on runtime shutdown or when the watcher is dropped.
919    pub async fn watch_ipn_bus(&self, mask: NotifyWatchOpt) -> Result<IpnBusWatcher, Error> {
920        // The peer-set cell lives on the peer-tracker actor; obtain a receiver the same way
921        // `watch_netmap` does. State + shutdown cells are held here.
922        let peer_rx = self
923            .peer_tracker
924            .upgrade()
925            .ok_or(Error {
926                kind: ErrorKind::ActorGone,
927                target_actor: None,
928                message_ty: None,
929            })?
930            .ask(peer_tracker::WatchNetmap)
931            .await?;
932        // The running-node consent-URL cell lives on the control runner; obtain its receiver the
933        // same way (the control actor ref is strong, so no upgrade needed).
934        let browser_rx = self.control.ask(control_runner::WatchBrowserUrl).await?;
935        Ok(ipn_bus::spawn_watcher(
936            mask,
937            self.state_rx.clone(),
938            peer_rx,
939            browser_rx,
940            self.shutdown.subscribe(),
941        ))
942    }
943
944    /// Attempt to shut down the runtime gracefully.
945    ///
946    /// Returns false if the shutdown timed out. It is still shut down if it timed out, just
947    /// more violently and with possible resource leaks.
948    pub async fn graceful_shutdown(self, timeout: Option<Duration>) -> bool {
949        self.shutdown.send_replace(true);
950
951        async fn _shutdown_all(runtime: Runtime) {
952            // See the note in `Drop` for why we only need to stop these actors to bring down the
953            // whole runtime.
954
955            let _ignore = runtime.control.stop_gracefully().await;
956            let _ignore = runtime.dataplane.stop_gracefully().await;
957            let _ignore = runtime.env.bus.stop_gracefully().await;
958
959            tokio::join![
960                runtime.control.wait_for_shutdown(),
961                runtime.dataplane.wait_for_shutdown(),
962                runtime.env.bus.wait_for_shutdown(),
963            ];
964        }
965
966        let fut = _shutdown_all(self);
967
968        match timeout {
969            Some(timeout) => tokio::time::timeout(timeout, fut).await.is_ok(),
970            None => {
971                fut.await;
972                true
973            }
974        }
975    }
976}
977
978impl Drop for Runtime {
979    fn drop(&mut self) {
980        // We must have already run `graceful_shutdown`: on the happy path, this does nothing, but
981        // if it timed out, we need to make sure the actors are dead so we don't leak them and their
982        // dependents.
983        if *self.shutdown.borrow() {
984            self.control.kill();
985            self.dataplane.kill();
986            self.env.bus.kill();
987            return;
988        }
989
990        self.shutdown.send_replace(true);
991
992        // Actors shut down when the last ActorRef to them is dropped (as nothing can send them
993        // messages anymore). If we don't hold an ActorRef in Runtime, in general the only thing
994        // that has one is the MessageBus, which each actor subscribes to for a subset of messages.
995        // Hence, if we shut down the bus, most actors die as well.
996
997        // First shut down the actors we have an ActorRef to:
998        try_shutdown(&self.control);
999        try_shutdown(&self.dataplane);
1000
1001        // Then shutdown the message bus, stopping the rest of the actors:
1002        try_shutdown(&self.env.bus);
1003    }
1004}
1005
1006fn try_shutdown(a: &ActorRef<impl kameo::Actor>) {
1007    if let Err(e) = a.mailbox_sender().try_send(Signal::Stop) {
1008        tracing::error!(error = %e, "graceful shutdown failed, killing actor");
1009        a.kill();
1010    }
1011}
1012
1013/// Build the netstack config shared by both userspace netstacks (application + forwarder) from the
1014/// per-deployment `tcp_buffer_size` knob.
1015///
1016/// `None` keeps the netstack default (256 KiB/direction); `Some(n)` overrides it (e.g. a smaller
1017/// window on a memory-constrained exit node forwarding many concurrent flows — see
1018/// [`netstack::netcore::Config::tcp_buffer_size`]). Factored out of [`Runtime::spawn`] so the
1019/// None-default / Some-override mapping is unit-testable without standing up the actor system.
1020fn netstack_config_from(tcp_buffer_size: Option<usize>) -> netstack::netcore::Config {
1021    let mut c = netstack::netcore::Config::default();
1022    if let Some(tcp_buffer_size) = tcp_buffer_size {
1023        c.tcp_buffer_size = tcp_buffer_size;
1024    }
1025    c
1026}
1027
1028/// Filter a requested advertise-route set to the IPv4-only, deduplicated set this fork can honor,
1029/// mirroring [`ts_control::Config::advertised_routes`] so a runtime `set_advertise_routes` feeds the
1030/// wire (control grant) and the forwarder (accept/dial table) the identical final set. IPv6 prefixes
1031/// are dropped under the IPv6-off posture — we never advertise a route we won't forward. Order is
1032/// preserved (first occurrence wins). Factored out so the filter is unit-testable without an actor.
1033fn filter_advertise_routes(routes: Vec<ipnet::IpNet>) -> Vec<ipnet::IpNet> {
1034    let mut filtered: Vec<ipnet::IpNet> = Vec::new();
1035    for net in routes {
1036        if matches!(net, ipnet::IpNet::V4(_)) {
1037            if !filtered.contains(&net) {
1038                filtered.push(net);
1039            }
1040        } else {
1041            tracing::warn!(prefix = %net, "dropping IPv6 advertise route (IPv6-off posture)");
1042        }
1043    }
1044    filtered
1045}
1046
1047/// Compose the final advertised-route set from the explicit subnet `routes` and the exit-node flag,
1048/// mirroring [`ts_control::Config::advertised_routes`]: the IPv4-only, deduplicated subnet prefixes,
1049/// plus `0.0.0.0/0` appended when `exit_node` is set. This is the single source of truth both
1050/// runtime advertise mutators (`set_advertise_routes`, `set_advertise_exit_node`) feed, so the two
1051/// compose instead of clobbering. Factored out so the composition is unit-testable without an actor.
1052fn compose_advertised_routes(routes: Vec<ipnet::IpNet>, exit_node: bool) -> Vec<ipnet::IpNet> {
1053    let mut filtered = filter_advertise_routes(routes);
1054    if exit_node {
1055        let default_v4 = ipnet::IpNet::V4(
1056            ipnet::Ipv4Net::new(core::net::Ipv4Addr::UNSPECIFIED, 0)
1057                .expect("0.0.0.0/0 is a valid prefix"),
1058        );
1059        if !filtered.contains(&default_v4) {
1060            filtered.push(default_v4);
1061        }
1062    }
1063    filtered
1064}
1065
1066/// The runtime's live advertised-route preference: the explicit subnet routes plus whether this node
1067/// advertises itself as an exit node. Held behind a `Mutex` on the [`Runtime`] so
1068/// [`Runtime::set_advertise_routes`] and [`Runtime::set_advertise_exit_node`] each mutate their own
1069/// part and re-send the composed set — they compose rather than clobber (Go `EditPrefs` keeps
1070/// `AdvertiseRoutes` and the exit-node advertisement as independent prefs that both feed
1071/// `Hostinfo.RoutableIPs`).
1072#[derive(Debug, Default, Clone)]
1073struct AdvertiseState {
1074    /// The explicit subnet prefixes (pre-filter; the last value passed to `set_advertise_routes`).
1075    routes: Vec<ipnet::IpNet>,
1076    /// Whether this node advertises the exit-node default route (`0.0.0.0/0`).
1077    exit_node: bool,
1078}
1079
1080/// Flatten a kameo delegated-reply [`SendError`] for the id-token RPC into the RPC's own
1081/// [`ts_control::IdTokenError`].
1082///
1083/// A [`SendError::HandlerError`](kameo::error::SendError::HandlerError) carries the real
1084/// `IdTokenError` produced by the handler and is surfaced verbatim. Any other send failure (actor
1085/// not running / stopped, mailbox full, send timeout) is a delivery problem rather than an RPC
1086/// result, so it collapses to a transient [`ts_control::IdTokenError::NetworkError`]. Factored out
1087/// of [`Runtime::fetch_id_token`] so this mapping is unit-testable without standing up an actor.
1088fn flatten_send_err<M>(
1089    e: kameo::error::SendError<M, ts_control::IdTokenError>,
1090) -> ts_control::IdTokenError {
1091    match e {
1092        kameo::error::SendError::HandlerError(err) => err,
1093        _ => ts_control::IdTokenError::NetworkError,
1094    }
1095}
1096
1097/// Flatten a kameo `SendError` from the `Logout` ask into a [`ts_control::LogoutError`].
1098///
1099/// A `HandlerError` carries the real `LogoutError` from the control RPC and is surfaced verbatim;
1100/// any other send failure (actor not running / stopped, mailbox full, send timeout) — a delivery
1101/// problem, not a logout result — collapses to the transient [`ts_control::LogoutError::NetworkError`]
1102/// (logout is idempotent, so a retry after a delivery failure is safe). Factored out of
1103/// [`Runtime::logout`] so the mapping is unit-testable without standing up an actor.
1104fn flatten_logout_send_err<M>(
1105    e: kameo::error::SendError<M, ts_control::LogoutError>,
1106) -> ts_control::LogoutError {
1107    match e {
1108        kameo::error::SendError::HandlerError(err) => err,
1109        _ => ts_control::LogoutError::NetworkError,
1110    }
1111}
1112
1113/// Flatten a kameo `SendError` from the `SetDns` ask into a [`ts_control::SetDnsError`].
1114///
1115/// A `HandlerError` carries the real `SetDnsError` from the set-dns RPC and is surfaced verbatim;
1116/// any other send failure (actor not running / stopped, mailbox full, send timeout) — a delivery
1117/// problem, not a publish result — collapses to the transient
1118/// [`ts_control::SetDnsError::NetworkError`]. Factored out of [`Runtime::set_dns`] so the mapping is
1119/// unit-testable without standing up an actor.
1120fn flatten_set_dns_send_err<M>(
1121    e: kameo::error::SendError<M, ts_control::SetDnsError>,
1122) -> ts_control::SetDnsError {
1123    match e {
1124        kameo::error::SendError::HandlerError(err) => err,
1125        _ => ts_control::SetDnsError::NetworkError,
1126    }
1127}
1128
1129/// Flatten a kameo `SendError` from the `GetCertificate` ask into a [`ts_control::CertError`].
1130///
1131/// A `HandlerError` carries the real `CertError` produced by the ACME issuance and is surfaced
1132/// verbatim. `CertError` has no transient-network variant, so any other send failure (actor not
1133/// running / stopped, mailbox full, send timeout) — a delivery problem rather than an issuance
1134/// result — collapses to a [`ts_control::CertError::Io`]. Factored out of
1135/// [`Runtime::get_certificate`] so this mapping is unit-testable without standing up an actor.
1136#[cfg(feature = "acme")]
1137fn flatten_cert_send_err<M>(
1138    e: kameo::error::SendError<M, ts_control::CertError>,
1139) -> ts_control::CertError {
1140    match e {
1141        kameo::error::SendError::HandlerError(err) => err,
1142        _ => ts_control::CertError::Io(std::io::Error::other(
1143            "control runner unavailable for certificate issuance",
1144        )),
1145    }
1146}
1147
1148#[cfg(test)]
1149mod tests {
1150    use super::*;
1151
1152    /// `None` must leave the netstack's own default TCP window in place (the 256 KiB throughput
1153    /// default), and must not silently coerce to some other value.
1154    #[test]
1155    fn netstack_config_none_uses_netstack_default() {
1156        let default = netstack::netcore::Config::default();
1157        let built = netstack_config_from(None);
1158        assert_eq!(
1159            built.tcp_buffer_size, default.tcp_buffer_size,
1160            "None must inherit the netstack default TCP buffer size"
1161        );
1162    }
1163
1164    /// `Some(n)` must override the TCP window (the memory-vs-throughput knob exit-node operators
1165    /// reach for), reaching the config that both netstacks are built from.
1166    #[test]
1167    fn netstack_config_some_overrides_buffer() {
1168        let built = netstack_config_from(Some(64 * 1024));
1169        assert_eq!(
1170            built.tcp_buffer_size,
1171            64 * 1024,
1172            "Some(n) must override the TCP buffer size that both netstacks use"
1173        );
1174    }
1175
1176    /// `set_advertise_routes` must feed the wire and the forwarder the IDENTICAL filtered set:
1177    /// IPv4-only (IPv6 dropped under the IPv6-off posture), deduplicated, order preserved.
1178    #[test]
1179    fn filter_advertise_routes_keeps_v4_dedups_drops_v6() {
1180        let v4a: ipnet::IpNet = "10.0.0.0/24".parse().unwrap();
1181        let v4b: ipnet::IpNet = "192.168.1.0/24".parse().unwrap();
1182        let v6: ipnet::IpNet = "2001:db8::/32".parse().unwrap();
1183
1184        // Mixed input with a duplicate v4 and a v6 prefix.
1185        let out = filter_advertise_routes(vec![v4a, v6, v4b, v4a]);
1186
1187        assert_eq!(
1188            out,
1189            vec![v4a, v4b],
1190            "v6 dropped, duplicate v4 collapsed, first-occurrence order preserved"
1191        );
1192    }
1193
1194    /// An all-IPv6 request filters to empty (we never advertise a route we won't forward) rather
1195    /// than erroring — clearing the advertised set is a legitimate outcome.
1196    #[test]
1197    fn filter_advertise_routes_all_v6_is_empty() {
1198        let v6: ipnet::IpNet = "2001:db8::/32".parse().unwrap();
1199        assert!(filter_advertise_routes(vec![v6]).is_empty());
1200    }
1201
1202    /// `compose_advertised_routes` folds the exit-node `0.0.0.0/0` onto the filtered subnet routes
1203    /// when (and only when) the exit-node flag is set — so `set_advertise_routes` and
1204    /// `set_advertise_exit_node` compose. The two preferences are independent.
1205    #[test]
1206    fn compose_advertised_routes_folds_exit_node() {
1207        let subnet: ipnet::IpNet = "10.0.0.0/24".parse().unwrap();
1208        let default_v4: ipnet::IpNet = "0.0.0.0/0".parse().unwrap();
1209
1210        // Exit node off: just the (filtered) subnet routes.
1211        assert_eq!(
1212            compose_advertised_routes(vec![subnet], false),
1213            vec![subnet],
1214            "exit-node off ⇒ no default route"
1215        );
1216        // Exit node on: subnet routes PLUS 0.0.0.0/0.
1217        assert_eq!(
1218            compose_advertised_routes(vec![subnet], true),
1219            vec![subnet, default_v4],
1220            "exit-node on ⇒ 0.0.0.0/0 appended"
1221        );
1222        // Exit node on with NO subnet routes: just the default route.
1223        assert_eq!(
1224            compose_advertised_routes(vec![], true),
1225            vec![default_v4],
1226            "exit-node alone advertises only 0.0.0.0/0"
1227        );
1228        // Idempotent: an explicit 0.0.0.0/0 already in the routes isn't duplicated by the fold.
1229        assert_eq!(
1230            compose_advertised_routes(vec![default_v4], true),
1231            vec![default_v4],
1232            "the exit-node fold dedups against an explicit default route"
1233        );
1234    }
1235
1236    /// A `HandlerError` carries the real `IdTokenError` from the RPC handler and must pass through
1237    /// verbatim, not be flattened to a generic network error. Using an `Internal(_)` payload (not
1238    /// `NetworkError`) makes the passthrough observable: a buggy flatten that always returned
1239    /// `NetworkError` would fail this assertion.
1240    #[test]
1241    fn flatten_send_err_handler_error_passes_through() {
1242        // Build an `Internal(_)` payload via the public `From<Utf8Error>` conversion (no extra
1243        // deps): it is distinct from the `_ => NetworkError` fallback, so a buggy flatten that
1244        // always returned `NetworkError` would fail this assertion.
1245        // Route the invalid bytes through a runtime Vec so the `invalid_from_utf8` lint (which only
1246        // fires on compile-time-known literals) doesn't flag this intentional bad input.
1247        let bytes = vec![0xffu8, 0xfe];
1248        let utf8_err = core::str::from_utf8(&bytes).unwrap_err();
1249        let inner = ts_control::IdTokenError::from(utf8_err);
1250        assert!(matches!(inner, ts_control::IdTokenError::Internal(_)));
1251        let e: kameo::error::SendError<control_runner::FetchIdToken, ts_control::IdTokenError> =
1252            kameo::error::SendError::HandlerError(inner.clone());
1253        assert_eq!(flatten_send_err(e), inner);
1254    }
1255
1256    /// A non-handler send failure (actor stopped) is a delivery problem, not an RPC result, so it
1257    /// must collapse to a transient `NetworkError`.
1258    #[test]
1259    fn flatten_send_err_actor_stopped_is_network_error() {
1260        let e: kameo::error::SendError<control_runner::FetchIdToken, ts_control::IdTokenError> =
1261            kameo::error::SendError::ActorStopped;
1262        assert_eq!(flatten_send_err(e), ts_control::IdTokenError::NetworkError);
1263    }
1264
1265    /// `ActorNotRunning` (the message bounces back undelivered) is likewise a delivery failure and
1266    /// must map to a transient `NetworkError`.
1267    #[test]
1268    fn flatten_send_err_actor_not_running_is_network_error() {
1269        let e: kameo::error::SendError<control_runner::FetchIdToken, ts_control::IdTokenError> =
1270            kameo::error::SendError::ActorNotRunning(control_runner::FetchIdToken {
1271                audience: "sts.amazonaws.com".to_string(),
1272            });
1273        assert_eq!(flatten_send_err(e), ts_control::IdTokenError::NetworkError);
1274    }
1275
1276    /// A `HandlerError` from the logout RPC carries the real `LogoutError` and must pass through
1277    /// verbatim. An `Internal(_)` payload (distinct from the `_ => NetworkError` fallback) makes the
1278    /// passthrough observable.
1279    #[test]
1280    fn flatten_logout_send_err_handler_error_passes_through() {
1281        let inner = ts_control::LogoutError::Internal(ts_control::LogoutInternalErrorKind::Http);
1282        assert!(matches!(inner, ts_control::LogoutError::Internal(_)));
1283        let e: kameo::error::SendError<control_runner::Logout, ts_control::LogoutError> =
1284            kameo::error::SendError::HandlerError(inner.clone());
1285        assert_eq!(flatten_logout_send_err(e), inner);
1286    }
1287
1288    /// A non-handler send failure (actor stopped) is a delivery problem, not a logout result, and
1289    /// collapses to a transient `NetworkError` (logout is idempotent, so a retry is safe).
1290    #[test]
1291    fn flatten_logout_send_err_actor_stopped_is_network_error() {
1292        let e: kameo::error::SendError<control_runner::Logout, ts_control::LogoutError> =
1293            kameo::error::SendError::ActorStopped;
1294        assert_eq!(
1295            flatten_logout_send_err(e),
1296            ts_control::LogoutError::NetworkError
1297        );
1298    }
1299
1300    /// A `HandlerError` from the set-dns RPC carries the real `SetDnsError` and must pass through
1301    /// verbatim. An `Internal(_)` payload (distinct from the `_ => NetworkError` fallback) makes the
1302    /// passthrough observable.
1303    #[test]
1304    fn flatten_set_dns_send_err_handler_error_passes_through() {
1305        let inner = ts_control::SetDnsError::Internal(ts_control::SetDnsInternalErrorKind::Http);
1306        assert!(matches!(inner, ts_control::SetDnsError::Internal(_)));
1307        let e: kameo::error::SendError<control_runner::SetDns, ts_control::SetDnsError> =
1308            kameo::error::SendError::HandlerError(inner.clone());
1309        assert_eq!(flatten_set_dns_send_err(e), inner);
1310    }
1311
1312    /// A non-handler send failure (actor stopped) is a delivery problem, not a publish result, and
1313    /// collapses to a transient `NetworkError`.
1314    #[test]
1315    fn flatten_set_dns_send_err_actor_stopped_is_network_error() {
1316        let e: kameo::error::SendError<control_runner::SetDns, ts_control::SetDnsError> =
1317            kameo::error::SendError::ActorStopped;
1318        assert_eq!(
1319            flatten_set_dns_send_err(e),
1320            ts_control::SetDnsError::NetworkError
1321        );
1322    }
1323}