ts_runtime/lib.rs
1#![doc = include_str!("../README.md")]
2
3extern crate ts_netstack_smoltcp as netstack;
4
5use core::time::Duration;
6use std::sync::Arc;
7
8use kameo::{
9 actor::{ActorRef, Spawn, WeakActorRef},
10 mailbox::Signal,
11};
12use netstack::netcore::Channel;
13use tokio::sync::watch;
14
15use crate::{
16 control_runner::ControlRunner, dataplane::DataplaneActor, direct::DirectManager,
17 forwarder_actor::ForwarderActor, multiderp::Multiderp, netstack_actor::NetstackActor,
18};
19
20/// Pcap stream framer for debug packet capture (`CapturePcap`).
21pub mod capture;
22/// Control runner.
23pub mod control_runner;
24mod dataplane;
25mod derp_latency;
26/// Device connection-state tracking ([`DeviceState`]) and typed registration outcome
27/// ([`RegistrationError`]).
28pub mod device_state;
29mod direct;
30mod env;
31mod error;
32/// Fallback TCP handler registry (`tsnet.Server.RegisterFallbackTCPHandler` parity).
33pub mod fallback_tcp;
34mod forwarder_actor;
35/// Client-side Funnel ingress termination (`tsnet`'s `ListenFunnel` data path).
36pub mod funnel;
37/// Unified IPN notification bus ([`Notify`] / [`watch_ipn_bus`](Runtime::watch_ipn_bus)), mirroring
38/// Go `ipn` `LocalBackend.WatchNotifications` / the `WatchIPNBus` LocalAPI.
39pub mod ipn_bus;
40mod magic_dns;
41pub use magic_dns::DnsQueryResult;
42mod multiderp;
43/// OS network-link-change supervisor (opt-in `network-monitor` feature): re-binds + re-probes
44/// connectivity on a link change. Compiled out entirely when the feature is off.
45#[cfg(feature = "network-monitor")]
46mod netmon;
47mod netstack_actor;
48mod packetfilter;
49pub mod peer_tracker;
50mod peerapi;
51mod peerapi_doh;
52mod route_updater;
53/// Stored Serve config + accept-loop runtime (`tsnet`'s `Get/SetServeConfig` + serving runtime).
54pub mod serve;
55mod src_filter;
56/// Netmap status snapshot, WhoIs, and watcher types.
57pub mod status;
58/// Taildrop peer-to-peer file transfer store.
59pub mod taildrop;
60pub mod taildrop_send;
61/// Tailnet-Lock (TKA) chain-sync orchestration: bootstrap + offer/send driver (the runtime layer
62/// that bridges the `ts_control` sync RPCs and the `ts_tka` chain logic).
63mod tka_sync;
64#[cfg(feature = "tun")]
65mod tun_actor;
66
67pub use device_state::{DeviceState, RegistrationError};
68pub(crate) use env::Env;
69pub use error::{Error, ErrorKind};
70pub use ipn_bus::{IpnBusWatcher, Notify, NotifyWatchOpt};
71pub use status::{FileTarget, NetcheckReport, RegionLatency, Status, StatusNode, WhoIs};
72pub use ts_dataplane::{CaptureHook, CapturePath};
73
74use crate::peer_tracker::PeerTracker;
75
76/// The runtime for a tailscale device.
77pub struct Runtime {
78 /// Reference to the control actor.
79 pub control: ActorRef<ControlRunner>,
80 dataplane: ActorRef<DataplaneActor>,
81 /// Reference to the direct (disco/UDP underlay) manager, retained so [`Runtime::rebind`] can
82 /// ask it to re-bind the underlay socket on a network/link change.
83 direct: ActorRef<DirectManager>,
84 /// Reference to the application netstack actor. `None` in TUN transport mode, where there is
85 /// no userspace application netstack (the application data path is a real kernel TUN device).
86 netstack: Option<WeakActorRef<NetstackActor>>,
87 /// Reference to the peer tracker for peer lookups.
88 pub peer_tracker: WeakActorRef<PeerTracker>,
89 /// Fallback TCP handler registry, bound to the application netstack. `None` in TUN transport
90 /// mode (no application netstack exists to attach it to).
91 fallback_tcp: Option<fallback_tcp::FallbackTcpManager>,
92 /// Reference to the MagicDNS responder, retained so [`Runtime::query_dns`] can run a query
93 /// through the live `100.100.100.100` forward path. `None` in TUN transport mode (no
94 /// `MagicDnsActor` is spawned there — TUN-mode MagicDNS is an in-packet intercept, not an actor).
95 magic_dns: Option<ActorRef<magic_dns::MagicDnsActor>>,
96 /// Reference to the forwarder actor, retained so [`Runtime::set_advertise_routes`] can push a
97 /// new accept/dial route table onto the running forwarder (the local half of advertising
98 /// routes). Without this the strong ref would drop after the startup `GetChannel` and the
99 /// forwarder would be reachable only via the message bus.
100 forwarder: ActorRef<ForwarderActor>,
101 /// Reference to the multiderp manager, retained so [`Runtime::status`] can resolve each
102 /// relayed peer's DERP region id to its region **code** (`ipnstate.PeerStatus.Relay`). Without
103 /// this the strong ref would drop after startup (it is cloned into the direct manager + route
104 /// updater) and the region-code map would be unreachable.
105 multiderp: ActorRef<Multiderp>,
106 env: Env,
107 shutdown: watch::Sender<bool>,
108 /// Sender side of the exit-node selector `watch` cell. Held privately here (not on the cloned
109 /// `Env`, which keeps only the read side) so that only `Runtime::set_exit_node` can mutate the
110 /// selection; the route updater and source filter re-read it via [`Env::exit_node`].
111 exit_node_tx: watch::Sender<Option<ts_control::ExitNodeSelector>>,
112 /// Sender side of the accept-routes preference `watch` cell. Held privately here (same rationale
113 /// as [`exit_node_tx`](Self::exit_node_tx)) so that only [`Runtime::set_accept_routes`] can
114 /// toggle it; the route updater and source filter re-read it via [`Env::accept_routes`].
115 accept_routes_tx: watch::Sender<bool>,
116 /// Sender side of the accept-dns preference `watch` cell. Held privately here (same rationale as
117 /// [`accept_routes_tx`](Self::accept_routes_tx)) so that only [`Runtime::set_accept_dns`] can
118 /// toggle it; the MagicDNS responder re-reads it via [`Env::accept_dns`] when it rebuilds its
119 /// view (the republish that `set_accept_dns` triggers).
120 accept_dns_tx: watch::Sender<bool>,
121 /// Receiver mirroring the *active* (resolved + fail-closed) exit node's stable id, fed by the
122 /// route updater. Read by [`Runtime::status`] / [`Runtime::active_exit_node`] to report which
123 /// exit node traffic is actually egressing through (vs. the merely-configured selector).
124 active_exit_rx: watch::Receiver<Option<ts_control::StableNodeId>>,
125 /// Receiver for the device connection-state cell, fed by the control runner. Read by
126 /// [`Runtime::watch_state`] and [`Runtime::wait_until_running`].
127 state_rx: watch::Receiver<DeviceState>,
128 /// Receiver for the retained peer-capability grants, fed by the packet-filter updater. Read by
129 /// [`Runtime::whois`] to resolve the flow-scoped cap map (Go `apitype.WhoIsResponse.CapMap`).
130 cap_grants_rx: watch::Receiver<packetfilter::CapGrants>,
131 /// Live advertised-route preference (explicit subnet routes + the exit-node flag), seeded from
132 /// the startup config. [`Runtime::set_advertise_routes`] and [`set_advertise_exit_node`] each
133 /// mutate their part under this lock then re-send the composed set, so the two compose.
134 advertise: std::sync::Mutex<AdvertiseState>,
135 /// Background task that periodically reaps abandoned taildrop `.partial` files (Go
136 /// `feature/taildrop/delete.go` `fileDeleter`). `None` when no taildrop store is configured.
137 /// Aborted on [`Drop`] so it cannot outlive the runtime (the `reauth_bridge` pattern).
138 taildrop_reaper: Option<tokio::task::JoinHandle<()>>,
139 /// The opt-in OS network-link-change supervisor (`network-monitor` feature + the
140 /// `Config::network_monitor` flag). Retained so the actor — and thus the
141 /// `LinkMonitorHandle` it holds, which aborts the monitor's watcher task on drop — lives for the
142 /// device's life and is torn down when the runtime drops. `None` when the flag is off. Only
143 /// present when built with the `network-monitor` feature.
144 #[cfg(feature = "network-monitor")]
145 #[allow(dead_code)]
146 netmon_supervisor: Option<ActorRef<netmon::NetmonSupervisor>>,
147}
148
149impl Runtime {
150 /// Spawn a new runtime with the given parameters for connecting to a tailnet.
151 pub async fn spawn(
152 config: ts_control::Config,
153 auth_key: Option<String>,
154 keys: ts_keys::NodeState,
155 ) -> Result<Self, Error> {
156 let (shutdown_tx, shutdown_rx) = watch::channel(false);
157
158 // The exit-node selector, accept-routes, and accept-dns preferences are live `watch` cells so
159 // `Device::set_exit_node` / `set_accept_routes` / `set_accept_dns` can change them at runtime.
160 // `new_with_runtime_txs` returns each `Sender` (mutation capability) grouped in `pref_cells`
161 // so they are retained privately on the `Runtime`, while only the `Receiver`s (the readers'
162 // contract) live on the cloned `Env`. Initial values come from `ForwarderConfig`.
163 let (env, pref_cells) = Env::new_with_runtime_txs(
164 keys,
165 shutdown_rx,
166 env::ForwarderConfig::from_control_config(&config),
167 );
168
169 // Both userspace netstacks (application + forwarder) share one netstack config. Honor the
170 // per-deployment TCP buffer knob, and set the netstack MTU to the overlay/tunnel MTU so the
171 // advertised MSS fits the tunnel — leaving it at the netstack's generic 1500 default would
172 // emit over-1280 segments into the WireGuard path. The MTU comes from a `Tun` transport's
173 // `TunConfig` when one is configured (so the netstack and the TUN agree), else the 1280
174 // overlay default (the `Netstack` userspace mode — the common case — has no per-OS MTU knob,
175 // but the tailnet overlay MTU is still 1280).
176 let configured_mtu = match &config.transport_mode {
177 ts_control::TransportMode::Tun(tun_cfg) => tun_cfg.mtu,
178 ts_control::TransportMode::Netstack => None,
179 };
180 let netstack_config = netstack_config_from(config.tcp_buffer_size, configured_mtu);
181
182 let dataplane = DataplaneActor::spawn(env.clone());
183
184 let (netstack_id, netstack_up, netstack_down) =
185 dataplane.ask(dataplane::NewOverlayTransport).await?;
186
187 // A second overlay transport feeds the dedicated any-IP forwarder netstack. Inbound packets
188 // for advertised subnet routes / the exit-node default route are routed here (see
189 // `route_updater`), keeping forwarded flows off the application netstack.
190 let (forwarder_id, forwarder_up, forwarder_down) =
191 dataplane.ask(dataplane::NewOverlayTransport).await?;
192
193 // The selected DERP home region (Go `report.PreferredDERP`): the control runner is the sole
194 // writer (it applies the netcheck `bestRecent` + hysteresis smoothing), and `Multiderp`
195 // reads it to drive the local home relay — so the relay follows the SAME smoothed home the
196 // runner advertises to control, instead of picking it from the raw per-cycle latency minimum
197 // (which flapped on jitter and could disagree with the advertised home). Created here so it
198 // outlives both actors; `None` until the first home is chosen.
199 let (home_region_tx, home_region_rx) = watch::channel::<Option<ts_derp::RegionId>>(None);
200
201 let multiderp = Multiderp::spawn((env.clone(), dataplane.clone(), home_region_rx));
202
203 // Spawn the direct (disco) underlay manager before the route updater. Its `on_start`
204 // binds the UDP socket and registers its transport synchronously, so by the time the
205 // route updater asks it for the direct transport id it is guaranteed to be available.
206 let direct = DirectManager::spawn((env.clone(), dataplane.clone(), multiderp.clone()));
207
208 // Spawn the forwarder before the route updater. Its `on_start` builds the forwarder
209 // netstack, enables any-IP acceptance, and starts the per-port accept loops synchronously,
210 // so by the time the route updater begins delivering advertised prefixes to
211 // `forwarder_id` the netstack is already draining its transport.
212 let forwarder = ForwarderActor::spawn((
213 env.clone(),
214 netstack_config.clone(),
215 forwarder_up,
216 forwarder_down,
217 ));
218 // Force `on_start` to finish (any-IP enabled, accept loops live) before the route updater
219 // can route the first inbound flow to `forwarder_id`: an `ask` blocks until the actor has
220 // started.
221 //
222 // The forwarder netstack's overlay `Channel` is reused by the TUN application path for
223 // recursive / exit-node-DoH MagicDNS forwarding (TUN mode has no application netstack of its
224 // own, but the forwarder netstack runs in both modes and egresses over the overlay — the
225 // anti-leak property `forward_query`/`forward_doh` require). Only the `tun` Tun arm consumes
226 // it, so it is unused when the `tun` feature is off — allow that without warn-as-error.
227 #[cfg_attr(not(feature = "tun"), allow(unused_variables))]
228 let (forwarder_channel,) = forwarder.ask(forwarder_actor::GetChannel).await?;
229
230 // The route updater is the single authoritative resolver of the active (resolved,
231 // fail-closed) exit node; it publishes the resolved stable id into this watch cell so
232 // `Runtime::status` can report which exit is actually engaged (not just configured).
233 let (active_exit_tx, active_exit_rx) = watch::channel(None);
234 route_updater::RouteUpdater::spawn((
235 multiderp.clone(),
236 direct.clone(),
237 env.clone(),
238 netstack_id,
239 forwarder_id,
240 active_exit_tx,
241 ));
242 // The packet-filter updater also surfaces the retained cap-grants (for flow-scoped WhoIs)
243 // through a `watch` cell whose receiver the `Runtime` holds — the bus has no replay, so a
244 // `watch` is how `Runtime::whois` reads the current grants on demand.
245 let (cap_grants_tx, cap_grants_rx) = watch::channel(Default::default());
246 packetfilter::PacketfilterUpdater::spawn((env.clone(), cap_grants_tx));
247 src_filter::SourceFilterUpdater::spawn(env.clone());
248 // TKA enforcement-authority cell (Go `tkaFilterNetmapLocked`). Created here — before both
249 // actors spawn — so the control runner (sole writer, `Sender`) and the peer tracker (reader,
250 // `Receiver`) share one `watch` cell. A `watch` (not a bus message) is the transport for this
251 // security-critical state: last-write-wins, never dropped under load, ordered by the control
252 // runner's writes, so a disable (`None`) can never be reordered behind or dropped before a
253 // stale `Some`. `None` = no lock synced / disabled (admit all).
254 let (tka_authority_tx, tka_authority_rx) =
255 watch::channel::<Option<std::sync::Arc<ts_tka::Authority>>>(None);
256 let peer_tracker = PeerTracker::spawn((env.clone(), tka_authority_rx)).downgrade();
257
258 // Select the application data path from the transport mode. The forwarder/egress path
259 // above is UNCHANGED in both modes — TUN mode only swaps the application data path, never
260 // the forwarder. `config` is moved into `ControlRunner::spawn` below, so branch on a
261 // borrow and clone the small `TunConfig` where needed before the move.
262 //
263 // - Netstack (the default, and the only reachable arm when the `tun` feature is off):
264 // spawn the application netstack + MagicDNS responder + fallback-TCP registry, all on
265 // the `netstack_up`/`netstack_down` overlay seam.
266 // - Tun: spawn `TunActor` on that same overlay seam instead; no application netstack and
267 // no MagicDNS responder exist, and `netstack`/`fallback_tcp` are `None`.
268 // - Tun requested but built without the `tun` feature: hard-error (a config/build
269 // mismatch knowable at spawn time). NEVER silently fall back to netstack.
270 let (netstack, fallback_tcp, magic_dns) = match &config.transport_mode {
271 ts_control::TransportMode::Netstack => {
272 let netstack = NetstackActor::spawn((
273 env.clone(),
274 netstack_config,
275 netstack_up,
276 netstack_down,
277 ));
278
279 // Fetch the netstack channel while we still hold the strong ActorRef, then spawn
280 // the MagicDNS responder on it. Its ActorRef is retained on `Runtime` so
281 // `query_dns` can drive the live forward path; the serve loop itself is owned by the
282 // actor's internal JoinSet.
283 let (channel,) = netstack.ask(netstack_actor::GetChannel).await?;
284 // The fallback-TCP registry attaches to the application netstack — the same one
285 // that carries the embedder's explicit `Device::tcp_listen` sockets — so a
286 // fallback handler sees exactly the inbound flows no explicit listener matched.
287 let fallback_tcp = fallback_tcp::FallbackTcpManager::new(channel.clone());
288 let magic_dns = magic_dns::MagicDnsActor::spawn((env.clone(), channel));
289
290 (
291 Some(netstack.downgrade()),
292 Some(fallback_tcp),
293 Some(magic_dns),
294 )
295 }
296
297 #[cfg(feature = "tun")]
298 ts_control::TransportMode::Tun(tun_cfg) => {
299 // Reuse the same `netstack_up`/`netstack_down` overlay-transport pair that would
300 // have fed the netstack — it is just the application-side overlay seam (the name
301 // is historical). No NetstackActor / MagicDnsActor is spawned.
302 tun_actor::TunActor::spawn((
303 env.clone(),
304 tun_cfg.clone(),
305 netstack_up,
306 netstack_down,
307 // Reuse the forwarder netstack's overlay `Channel` for recursive / exit-node-DoH
308 // MagicDNS forwarding in the TUN datapath (TUN mode has no application netstack
309 // Channel of its own). Egresses over the overlay — anti-leak preserved.
310 //
311 // Host-route gating (subnet routes gated on `--accept-routes`, the host `/0` from
312 // the selected exit peer) is no longer snapshotted here: `TunActor` reads the live
313 // `Env` cells (`accept_routes`/`exit_node`) on every host-FIB apply — both the
314 // device-build path and the `PeerState` re-apply path — and folds the union of
315 // peers' AllowedIPs (see `tun_actor::host_routes_from_node`). A runtime
316 // `set_accept_routes` / `set_exit_node` toggle re-broadcasts the peer state, so the
317 // host routing table is re-steered live (no device rebuild needed).
318 forwarder_channel.clone(),
319 ));
320
321 (None, None, None)
322 }
323
324 #[cfg(not(feature = "tun"))]
325 ts_control::TransportMode::Tun(_) => {
326 return Err(Error {
327 kind: ErrorKind::TunUnavailable,
328 target_actor: None,
329 message_ty: None,
330 });
331 }
332 };
333
334 // Device connection-state cell. Created here (not inside the actor) so the control runner's
335 // `on_start` can publish `Failed`/`NeedsLogin` and still return `Err` without the sender
336 // being tied to a `Self` that never gets constructed on a hard registration failure.
337 let (state_tx, state_rx) = watch::channel(DeviceState::Connecting);
338
339 // Seed the live advertised-route preference from the startup config before `config` moves
340 // into the control runner, so the runtime setters compose against the configured baseline.
341 let advertise = std::sync::Mutex::new(AdvertiseState {
342 routes: config.advertise_routes.clone(),
343 exit_node: config.advertise_exit_node,
344 });
345
346 // Unbounded mailbox (not the default bounded-64): the control runner SELF-messages — a
347 // spawned TKA sync task delivers its result back via `self_ref.tell(TkaSynced)`, and the
348 // netmap stream pump tells `StreamMessage::Next` onto the same mailbox. The stall path: the
349 // netmap handler ends by parking on `env.publish().await` into the bounded-64 *bus* (a slow
350 // bus subscriber, e.g. a busy TKA-enforcing peer tracker, holds the bus full); while it is
351 // parked, a concurrently-finishing sync task's `TkaSynced` self-tell queues behind a full
352 // *ControlRunner* mailbox and blocks waiting for capacity, delaying the verified-authority
353 // (or lock-disable) write to the enforcement cell — i.e. stale TKA enforcement under churn.
354 // kameo gates its self-tell deadlock warning on `is_current()`, which is false for the
355 // detached sync task, so the stall is silent. An unbounded mailbox lets the self-tell and the
356 // stream pump enqueue without ever awaiting capacity (kameo's documented choice for a
357 // self-messaging actor); the runner's inputs are control-paced (the netmap stream + a few RPC
358 // replies; the bus delivers best-effort and never backpressures this mailbox), not an attacker
359 // flood, so unbounded growth is not a practical exposure.
360 let control = ControlRunner::spawn_with_mailbox(
361 control_runner::Params {
362 config,
363 auth_key,
364 env: env.clone(),
365 state_tx,
366 tka_authority: tka_authority_tx,
367 home_region: home_region_tx,
368 },
369 kameo::mailbox::unbounded(),
370 );
371
372 // Spawn the taildrop partial-reaper if a store is configured; it sweeps abandoned `.partial`
373 // files every `DELETE_DELAY` and exits on shutdown (the handle is aborted in `Drop`).
374 let taildrop_reaper = env.taildrop_store.as_ref().map(|store| {
375 crate::taildrop::spawn_partial_reaper(store.clone(), shutdown_tx.subscribe())
376 });
377
378 // Opt-in OS network-link monitor (`Config::network_monitor`, default off). When enabled it
379 // spawns a `NetmonSupervisor` that, on a coalesced link change, asks the direct manager to
380 // rebind + re-probe and republishes `MeasureNow` for a re-netcheck — the auto-recovery a
381 // real `tailscaled` performs and the engine otherwise leaves to the embedder. When the flag
382 // is off this is a complete no-op: zero extra threads/sockets, byte-for-byte today's
383 // behavior. The manual `Device::rebind` path is unchanged either way.
384 //
385 // Feature gating is strict and never silent: with the `network-monitor` feature ON the
386 // supervisor (and its `ts_netmon` dep) compile in and spawn when the flag is set; with the
387 // feature OFF, setting the flag is a HARD error at spawn (mirrors the `TransportMode::Tun`
388 // without-`tun`-feature error above), so a build that cannot honor the request fails loudly
389 // rather than booting a node that silently won't auto-recover.
390 #[cfg(feature = "network-monitor")]
391 let netmon_supervisor = if env.network_monitor {
392 // Slice (a): no OS event-source backend is wired yet (the Linux netlink / macOS
393 // PF_ROUTE backends are later slices), so the supervisor runs against a `NoopLinkMonitor`
394 // — it is live and correctly shaped (it will react the moment a real backend feeds it),
395 // it just never sees a synthetic/OS event in this build. Production end-to-end reaction
396 // is proven in the integration test via a `ManualLinkMonitor`.
397 let monitor: std::sync::Arc<dyn ts_netmon::LinkMonitor> =
398 std::sync::Arc::new(ts_netmon::NoopLinkMonitor);
399 Some(netmon::NetmonSupervisor::spawn(
400 netmon::NetmonSupervisorArgs {
401 monitor,
402 direct: direct.clone(),
403 env: env.clone(),
404 },
405 ))
406 } else {
407 None
408 };
409
410 #[cfg(not(feature = "network-monitor"))]
411 if env.network_monitor {
412 // The flag is set but this build cannot honor it. Fail loudly (never a silent no-op).
413 return Err(Error {
414 kind: ErrorKind::NetworkMonitorUnavailable,
415 target_actor: None,
416 message_ty: None,
417 });
418 }
419
420 Ok(Self {
421 control,
422 dataplane,
423 direct,
424 peer_tracker,
425 fallback_tcp,
426 magic_dns,
427 forwarder,
428 multiderp,
429 netstack,
430 env,
431 shutdown: shutdown_tx,
432 exit_node_tx: pref_cells.exit_node,
433 accept_routes_tx: pref_cells.accept_routes,
434 accept_dns_tx: pref_cells.accept_dns,
435 active_exit_rx,
436 state_rx,
437 cap_grants_rx,
438 advertise,
439 taildrop_reaper,
440 #[cfg(feature = "network-monitor")]
441 netmon_supervisor,
442 })
443 }
444
445 /// Register a fallback TCP handler consulted for every inbound TCP flow that matches no
446 /// explicit listener (`tsnet.Server.RegisterFallbackTCPHandler` parity).
447 ///
448 /// The returned [`fallback_tcp::FallbackTcpHandle`] deregisters the handler when dropped. See
449 /// [`fallback_tcp`] for the dispatch contract and anti-leak guarantees.
450 ///
451 /// Returns [`ErrorKind::UnsupportedInTunMode`] in TUN transport mode, where there is no
452 /// application netstack to attach a fallback handler to.
453 pub fn register_fallback_tcp_handler(
454 &self,
455 cb: Arc<
456 dyn Fn(core::net::SocketAddr, core::net::SocketAddr) -> fallback_tcp::FallbackDecision
457 + Send
458 + Sync,
459 >,
460 ) -> Result<fallback_tcp::FallbackTcpHandle, Error> {
461 Ok(self
462 .fallback_tcp
463 .as_ref()
464 .ok_or(Error {
465 kind: ErrorKind::UnsupportedInTunMode,
466 target_actor: None,
467 message_ty: None,
468 })?
469 .register(cb))
470 }
471
472 /// Get a channel to send commands to the netstack.
473 ///
474 /// Returns [`ErrorKind::UnsupportedInTunMode`] in TUN transport mode, where there is no
475 /// application netstack.
476 pub async fn channel(&self) -> Result<Channel, Error> {
477 let (channel,) = self
478 .netstack
479 .as_ref()
480 .ok_or(Error {
481 kind: ErrorKind::UnsupportedInTunMode,
482 target_actor: None,
483 message_ty: None,
484 })?
485 .upgrade()
486 .ok_or(Error {
487 kind: ErrorKind::ActorGone,
488 target_actor: None,
489 message_ty: None,
490 })?
491 .ask(netstack_actor::GetChannel)
492 .await?;
493
494 Ok(channel)
495 }
496
497 /// Resolve `name` for `qtype` through the live MagicDNS responder (the `100.100.100.100`
498 /// forward path), returning the raw DNS response, its RCODE, and the upstream resolver(s)
499 /// consulted (analogue of Go `LocalClient.QueryDNS`).
500 ///
501 /// This drives the *real* responder — the same `decide`/forward logic an on-the-wire query
502 /// hits — so the answer and its anti-leak posture (a tailnet-suffix name never egresses; a
503 /// recursive forward delegates to the active exit node's DoH; only IPv4 upstreams are dialed)
504 /// match exactly what a tailnet client observes. `qtype` is the raw RFC 1035 TYPE (`1`=A,
505 /// `28`=AAAA, `12`=PTR, or any other).
506 ///
507 /// Returns [`ErrorKind::UnsupportedInTunMode`] in TUN transport mode, where MagicDNS is an
508 /// in-packet intercept on the host's own resolver rather than an actor that can be queried, and
509 /// [`ErrorKind::ActorGone`] if the responder has shut down.
510 pub async fn query_dns(
511 &self,
512 name: &str,
513 qtype: u16,
514 ) -> Result<magic_dns::DnsQueryResult, Error> {
515 let result = self
516 .magic_dns
517 .as_ref()
518 .ok_or(Error {
519 kind: ErrorKind::UnsupportedInTunMode,
520 target_actor: None,
521 message_ty: None,
522 })?
523 .ask(magic_dns::Query {
524 name: name.to_owned(),
525 qtype,
526 })
527 .await?;
528
529 Ok(result)
530 }
531
532 /// The Taildrop file store, if Taildrop is enabled (`taildrop_dir` configured and the store
533 /// initialized). `None` when disabled — fail-closed. Shared with the peerAPI Taildrop server so
534 /// the embedder's read APIs and the receive path see the same on-disk store.
535 pub fn taildrop_store(&self) -> Option<Arc<crate::taildrop::TaildropStore>> {
536 self.env.taildrop_store.clone()
537 }
538
539 /// The shared Funnel ingress slot the peerAPI `/v0/ingress` route reads per connection.
540 ///
541 /// `Device::listen_funnel` installs a [`FunnelManager`](crate::funnel::FunnelManager)'s sink here
542 /// to make the route live (the peerAPI server is already running from startup). Returns a clone of
543 /// the runtime-lifetime `Arc` so the device can write the slot without restarting the server. See
544 /// [`crate::funnel`] for the ingress data path.
545 pub fn funnel_ingress_slot(&self) -> crate::funnel::FunnelIngressSlot {
546 self.env.funnel_ingress.clone()
547 }
548
549 /// The shared "Funnel ingress listener active" flag (the same `Arc` the control session reads to
550 /// set `HostInfo.IngressEnabled`). `Device::listen_funnel` flips it `true` while a funnel listener
551 /// is up so control routes Funnel traffic to this node; clearing it advertises no live endpoint.
552 pub fn ingress_active_flag(&self) -> std::sync::Arc<std::sync::atomic::AtomicBool> {
553 self.env.ingress_active.clone()
554 }
555
556 /// Install (`Some`) or clear (`None`) the debug packet-capture hook on the running dataplane.
557 /// `Some(hook)` tees every plaintext packet crossing the datapath to `hook` until it is cleared;
558 /// `None` stops capture. Mirrors Go `tstun.Wrapper.InstallCaptureHook` / `ClearCaptureSink`.
559 pub async fn install_capture(
560 &self,
561 hook: Option<ts_dataplane::CaptureHook>,
562 ) -> Result<(), Error> {
563 self.dataplane
564 .ask(dataplane::InstallCapture { hook })
565 .await
566 .map_err(Into::into)
567 }
568
569 /// Re-bind the underlay UDP socket after a network/link change (Wi-Fi switch, sleep/wake). The
570 /// embedder's own link monitor calls this (the engine owns the socket re-bind; the embedder owns
571 /// OS netmon). Re-binds the socket (same-port-preferred, IPv4-only invariant preserved) and
572 /// resets the now-stale local NAT mapping — clearing learned reflexive addresses and every
573 /// confirmed direct path while keeping candidate endpoints, so peers re-probe over the new socket
574 /// and relay over DERP (never a direct host dial) until a path re-confirms. Peers, control, the
575 /// netmap, disco state, and DERP are untouched. A no-op when the underlay is inert (bind failed
576 /// at startup, DERP-only). Mirrors Go magicsock `Conn.Rebind` + `resetEndpointStates`.
577 pub async fn rebind(&self) -> Result<(), Error> {
578 self.direct.ask(direct::Rebind).await.map_err(Error::from)
579 }
580
581 /// Force an immediate STUN / endpoint re-probe **without** rebinding the underlay socket —
582 /// Go magicsock's `Conn.ReSTUN`. Asks the `DirectManager` to run one STUN sweep now (re-learn
583 /// our reflexive/public address) while leaving the socket, its NAT mapping, learned paths, peers,
584 /// control, and DERP untouched. Lighter than [`rebind`](Self::rebind): no socket swap, no
585 /// re-ping. A no-op when the underlay is inert (bind failed at startup, DERP-only). No control
586 /// round-trip.
587 pub async fn re_stun(&self) -> Result<(), Error> {
588 self.direct.ask(direct::ReStun).await.map_err(Error::from)
589 }
590
591 /// A snapshot of the local netmap: this node plus every known peer.
592 ///
593 /// Combines the self node held by the control runner with the peer set held by the peer
594 /// tracker. Mirrors tsnet's `LocalClient::Status`.
595 ///
596 /// `self_node` is `None` until the first netmap update has been received from control. Peer
597 /// entries carry no online/user/capability data (see the [`status`] module docs for that gap).
598 pub async fn status(&self) -> Result<Status, Error> {
599 let self_node_domain = self.control.ask(control_runner::SelfNode).await?;
600 // The MagicDNS suffix is the self node's FQDN minus its host label — already split into
601 // `Node.tailnet` at decode time (Go derives it the same way in `NetworkMap.MagicDNSSuffix`).
602 // Capture it before the domain `Node` is mapped away into a `StatusNode`.
603 let magic_dns_suffix = self_node_domain.as_ref().and_then(|n| n.tailnet.clone());
604 let self_node = self_node_domain.as_ref().map(StatusNode::from_node);
605
606 let peers_with_ids = self
607 .peer_tracker
608 .upgrade()
609 .ok_or(Error {
610 kind: ErrorKind::ActorGone,
611 target_actor: None,
612 message_ty: None,
613 })?
614 .ask(peer_tracker::GetStatus)
615 .await?;
616
617 // Join per-peer connectivity (Go `PeerStatus.CurAddr`): one batched query to the direct
618 // manager for every peer's current trusted direct endpoint, then fill `cur_addr` on each
619 // `StatusNode`. A peer absent from the map is relayed via DERP (`cur_addr = None`). This is a
620 // live snapshot — the direct path can expire/re-confirm between calls (matches Go's snapshot
621 // semantics). The `watch_netmap` stream intentionally carries no connectivity (it is a netmap
622 // watch, not a path-state watch, and does not re-fire on direct↔relay flips).
623 let ids: Vec<ts_transport::PeerId> = peers_with_ids.iter().map(|(id, _)| *id).collect();
624 let best_addrs = self
625 .direct
626 .ask(direct::BestAddrs { ids: ids.clone() })
627 .await
628 .unwrap_or_default();
629
630 // For the peers with NO direct path (relayed via DERP), resolve the region CODE they relay
631 // through (Go `PeerStatus.Relay`). One batched ask to multiderp; `cur_addr` and `relay` are
632 // mutually exclusive for a routed peer, mirroring Go's empty-vs-set strings.
633 let relay_ids: Vec<ts_transport::PeerId> = ids
634 .into_iter()
635 .filter(|id| !best_addrs.contains_key(id))
636 .collect();
637 let relay_codes = if relay_ids.is_empty() {
638 Default::default()
639 } else {
640 self.multiderp
641 .ask(multiderp::RelayCodesForPeers { ids: relay_ids })
642 .await
643 .unwrap_or_default()
644 };
645
646 let peers = peers_with_ids
647 .into_iter()
648 .map(|(id, mut node)| match best_addrs.get(&id).copied() {
649 Some(addr) => {
650 node.cur_addr = Some(addr);
651 node
652 }
653 None => {
654 node.relay = relay_codes.get(&id).cloned();
655 node
656 }
657 })
658 .collect();
659
660 Ok(Status {
661 self_node,
662 peers,
663 active_exit_node: self.active_exit_node(),
664 magic_dns_suffix,
665 })
666 }
667
668 /// List the tailnet peers this node can Taildrop a file *to* (Go LocalAPI `FileTargets`).
669 ///
670 /// Mirrors the upstream send-path filter (`feature/taildrop` `Extension::FileTargets`): a peer
671 /// qualifies when it advertises a reachable peerAPI **and** is either owned by the same user as
672 /// this node **or** explicitly granted the file-sharing-target capability. The whole list is
673 /// gated on this node holding the file-sharing capability (control sets it when the admin enables
674 /// Taildrop) — absent that, an empty list (fail-closed, not an error, matching how the receive
675 /// store returns empty when disabled). Results are sorted by the peer's MagicDNS name.
676 ///
677 /// Targets are listed regardless of current online state (upstream's `FileTargets` does not gate
678 /// on online either; an offline target's send will simply time out). The self node is never
679 /// included. Returns empty before the first netmap.
680 ///
681 /// Divergence from Go: the upstream filter also excludes `tvOS` peers, which this fork cannot
682 /// reproduce (the domain node carries no OS string); the impact is negligible — the actual send
683 /// fail-closes if such a peer refused the transfer.
684 pub async fn file_targets(&self) -> Result<Vec<FileTarget>, Error> {
685 // Node-level gate: this node must hold the file-sharing capability (Taildrop enabled by the
686 // admin). Read it off the self node's cap map, like Go's `hasCapFileSharing()`.
687 let self_node = self.control.ask(control_runner::SelfNode).await?;
688 let Some(self_node) = self_node else {
689 return Ok(Vec::new()); // no netmap yet
690 };
691 if !self_node.can_share_files() {
692 return Ok(Vec::new()); // Taildrop not enabled for the tailnet — fail-closed
693 }
694 let self_user_id = self_node.user_id;
695
696 let peers = self
697 .peer_tracker
698 .upgrade()
699 .ok_or(Error {
700 kind: ErrorKind::ActorGone,
701 target_actor: None,
702 message_ty: None,
703 })?
704 .ask(peer_tracker::AllPeers)
705 .await?;
706
707 // Eligibility + ordering live in `build_file_targets` (pure, unit-tested in `status`).
708 Ok(status::build_file_targets(peers, self_user_id))
709 }
710
711 /// The stable id of the exit node traffic is currently egressing through, or `None` if none is
712 /// engaged. This is the route updater's resolved + fail-closed answer (see
713 /// [`Status::active_exit_node`](crate::status::Status::active_exit_node)): it differs from the
714 /// configured [`exit_node`](Self::exit_node) selector, which may name a peer that is absent or
715 /// no longer advertising a default route (in which case egress is dropped and this returns
716 /// `None`).
717 pub fn active_exit_node(&self) -> Option<ts_control::StableNodeId> {
718 self.active_exit_rx.borrow().clone()
719 }
720
721 /// Request an OIDC ID token from control scoped to `audience` (workload-identity federation).
722 ///
723 /// Returns the signed JWT, or the token RPC's own [`ts_control::IdTokenError`]. The kameo
724 /// delegated-reply send error is flattened: a handler error carries the real `IdTokenError`,
725 /// any other send failure (actor shutdown / mailbox closed) is surfaced as
726 /// [`ts_control::IdTokenError::NetworkError`].
727 pub async fn fetch_id_token(
728 &self,
729 audience: String,
730 ) -> Result<String, ts_control::IdTokenError> {
731 self.control
732 .ask(control_runner::FetchIdToken { audience })
733 .await
734 .map_err(flatten_send_err)
735 }
736
737 /// Log this node out of the tailnet: deregister it by expiring its current node key.
738 ///
739 /// Forwards to the control runner, which re-POSTs `/machine/register` with a past expiry over a
740 /// fresh Noise channel. This is a control-plane state change only — it does NOT shut the runtime
741 /// down (the caller follows with [`graceful_shutdown`](Self::graceful_shutdown)) and does not
742 /// touch the on-disk node key. The kameo delegated-reply send error is flattened the same way as
743 /// `fetch_id_token`: a handler error carries the real
744 /// [`ts_control::LogoutError`]; any other send failure (actor shutdown / mailbox closed) is
745 /// surfaced as [`ts_control::LogoutError::NetworkError`].
746 pub async fn logout(&self) -> Result<(), ts_control::LogoutError> {
747 self.control
748 .ask(control_runner::Logout)
749 .await
750 .map_err(flatten_logout_send_err)
751 }
752
753 /// Publish a `TXT` DNS record for this node via control's `/machine/set-dns` (Go
754 /// `LocalClient.SetDNS`).
755 ///
756 /// Forwards to the control runner, which POSTs the record over a fresh Noise channel. The kameo
757 /// delegated-reply send error is flattened the same way as `fetch_id_token`:
758 /// a handler error carries the real [`ts_control::SetDnsError`]; any other send failure (actor
759 /// shutdown / mailbox closed) is surfaced as [`ts_control::SetDnsError::NetworkError`].
760 pub async fn set_dns(
761 &self,
762 name: String,
763 value: String,
764 ) -> Result<(), ts_control::SetDnsError> {
765 self.control
766 .ask(control_runner::SetDns { name, value })
767 .await
768 .map_err(flatten_set_dns_send_err)
769 }
770
771 /// Sign `node_key` with this node's network-lock key and submit the signature to control
772 /// (Go `tka.sign` Direct case → `/machine/tka/sign`).
773 ///
774 /// Submits only — the local [`Authority`](ts_tka::Authority) is **not** mutated here; it advances
775 /// via the existing verified-sync path. A handler error carries the real [`ts_control::TkaSyncError`];
776 /// any other send failure (actor shutdown / mailbox closed) is surfaced as
777 /// [`ts_control::TkaSyncError::NetworkError`].
778 pub async fn tka_sign(&self, node_key: [u8; 32]) -> Result<(), ts_control::TkaSyncError> {
779 self.control
780 .ask(control_runner::TkaSign { node_key })
781 .await
782 .map_err(flatten_tka_send_err)
783 }
784
785 /// Disable Tailnet Lock by presenting the `disablement_secret` to control (Go `tka.disable` →
786 /// `/machine/tka/disable`), targeting the current authority head.
787 ///
788 /// Submits only — the local [`Authority`](ts_tka::Authority) is **not** mutated here. A handler
789 /// error carries the real [`ts_control::TkaSyncError`] (incl.
790 /// [`Unsupported`](ts_control::TkaSyncError::Unsupported) when there is no known TKA head to
791 /// disable); any other send failure collapses to
792 /// [`NetworkError`](ts_control::TkaSyncError::NetworkError).
793 pub async fn tka_disable(
794 &self,
795 disablement_secret: Vec<u8>,
796 ) -> Result<(), ts_control::TkaSyncError> {
797 self.control
798 .ask(control_runner::TkaDisable { disablement_secret })
799 .await
800 .map_err(flatten_tka_send_err)
801 }
802
803 /// Initialize Tailnet Lock with this node as the sole initial trusted key, gated by
804 /// `disablement_secret` (Go `tka` init → `/machine/tka/init/{begin,finish}`).
805 ///
806 /// Submits only — does not seed the local [`Authority`](ts_tka::Authority); the node picks up the
807 /// new lock via the existing verified netmap-sync. A handler error carries the real
808 /// [`ts_control::TkaSyncError`] ([`Unsupported`](ts_control::TkaSyncError::Unsupported) if
809 /// control needs other nodes re-signed — the single-node "lock yourself in" subset only); any
810 /// other send failure collapses to [`NetworkError`](ts_control::TkaSyncError::NetworkError).
811 pub async fn tka_init(
812 &self,
813 disablement_secret: Vec<u8>,
814 ) -> Result<(), ts_control::TkaSyncError> {
815 self.control
816 .ask(control_runner::TkaInit { disablement_secret })
817 .await
818 .map_err(flatten_tka_send_err)
819 }
820
821 /// Issue a real Let's Encrypt certificate for this node's MagicDNS `name` (`acme` feature).
822 ///
823 /// Mirrors `fetch_id_token`: forwards to the control runner, which runs
824 /// the client-side ACME DNS-01 flow on a spawned task and publishes the challenge TXT via the
825 /// node's set-dns RPC. The kameo delegated-reply send error is flattened — a handler error
826 /// carries the real [`ts_control::CertError`]; any other send failure (actor shutdown / mailbox
827 /// closed) is surfaced as a [`ts_control::CertError::Io`]. SaaS-only: a self-hosted control
828 /// plane 501s on set-dns.
829 #[cfg(feature = "acme")]
830 pub async fn get_certificate(
831 &self,
832 name: String,
833 ) -> Result<ts_control::tls::CertifiedKey, ts_control::CertError> {
834 self.control
835 .ask(control_runner::GetCertificate { name })
836 .await
837 .map_err(flatten_cert_send_err)
838 }
839
840 /// Issue a real Let's Encrypt certificate for this node's MagicDNS `name` and return the
841 /// **PEM pair** `(cert_chain_pem, key_pem)` — the analog of Go's
842 /// `LocalClient.CertPairWithValidity`, for writing the daemon's on-disk `.crt` + `.key`
843 /// (`tnet cert`). `acme` feature.
844 ///
845 /// Same issuance as [`get_certificate`](Self::get_certificate) (one client-side ACME DNS-01
846 /// order, challenge published via the node's set-dns RPC) — only the result shape differs: this
847 /// returns the leaf+chain PEM and the leaf-key PEM instead of the opaque
848 /// [`CertifiedKey`](ts_control::tls::CertifiedKey). The second element is the **leaf private
849 /// key** PEM; it is never logged anywhere on this path.
850 ///
851 /// **`min_validity` (honest "always fresh").** Go's `CertPairWithValidity` reuses a cached cert
852 /// when it has at least `min_validity` of its lifetime left, and re-issues otherwise. This fork
853 /// has **no cert cache** — every call performs a fresh issuance — so `min_validity` is accepted
854 /// for signature compatibility but does not change behavior: a freshly issued cert (full
855 /// lifetime) trivially satisfies any `min_validity`. A reuse cache is separate future work; this
856 /// does NOT fake one.
857 ///
858 /// Mirrors [`get_certificate`](Self::get_certificate)'s error handling: the kameo
859 /// delegated-reply send error is flattened — a handler error carries the real
860 /// [`ts_control::CertError`]; any other send failure (actor shutdown / mailbox closed) collapses
861 /// to a [`ts_control::CertError::Io`]. SaaS-only: a self-hosted control plane 501s on set-dns.
862 #[cfg(feature = "acme")]
863 pub async fn cert_pair(
864 &self,
865 name: String,
866 min_validity: Option<Duration>,
867 ) -> Result<(String, String), ts_control::CertError> {
868 // No cert cache exists in this fork (every issuance is fresh), so `min_validity` is honored
869 // trivially by always issuing a full-lifetime cert. Bound (unused beyond this contract) so
870 // the parameter is explicitly accounted for rather than silently ignored.
871 let _ = min_validity;
872 self.control
873 .ask(control_runner::GetCertPair { name })
874 .await
875 .map_err(flatten_cert_send_err)
876 }
877
878 /// Resolve which node owns a tailnet source address.
879 ///
880 /// Maps the destination IP of `addr` to its owning node. Mirrors tsnet's `LocalClient::WhoIs`.
881 /// Returns `None` if no peer holds that tailnet IP.
882 ///
883 /// The returned [`WhoIs`] additionally carries the **flow-scoped** peer-capability grants
884 /// ([`WhoIs::cap_map`], Go `apitype.WhoIsResponse.CapMap`): the caps control's packet-filter
885 /// application rules authorize for traffic from THIS node (the flow source) to `addr` (the
886 /// destination). Empty when no grant matches. (The node-level cap map rides
887 /// [`WhoIs::capabilities`].)
888 pub async fn whois(&self, addr: core::net::SocketAddr) -> Result<Option<WhoIs>, Error> {
889 let whois = self
890 .peer_tracker
891 .upgrade()
892 .ok_or(Error {
893 kind: ErrorKind::ActorGone,
894 target_actor: None,
895 message_ty: None,
896 })?
897 .ask(peer_tracker::Whois { addr })
898 .await?;
899
900 let Some(mut whois) = whois else {
901 return Ok(None);
902 };
903
904 // Fill the flow-scoped cap map: src = this node's own tailnet IP (of the dst's family),
905 // dst = the queried address. A grant applies when its source matches the flow source — `src`
906 // ∈ its src prefixes OR this node holds one of its source node-caps — AND `dst` ∈ its dst
907 // prefixes (Go `Filter.CapsWithValues`). Resolve our own IP + cap map from the self node; if
908 // it isn't known yet, leave the map empty (no grants resolvable without a source).
909 let dst = addr.ip();
910 if let Some(self_node) = self.control.ask(control_runner::SelfNode).await? {
911 let src: core::net::IpAddr = if dst.is_ipv6() {
912 self_node.tailnet_address.ipv6.addr().into()
913 } else {
914 self_node.tailnet_address.ipv4.addr().into()
915 };
916 let grants = self.cap_grants_rx.borrow();
917 whois.cap_map = ts_packetfilter_state::caps_for(&grants, src, dst, |cap| {
918 self_node.has_node_attr(cap)
919 });
920 }
921
922 Ok(Some(whois))
923 }
924
925 /// The current direct-path status to the peer holding tailnet IP `dst`: its confirmed direct UDP
926 /// endpoint and that path's last-measured RTT, or `None` when there is no direct path right now
927 /// (the peer is relayed via DERP, is unknown, or has no disco key).
928 ///
929 /// The latency is the RTT of the most recent disco ping/pong that confirmed the path — a live
930 /// snapshot up to one probe interval stale, NOT a fresh on-demand round-trip (that is a separate,
931 /// heavier capability). Mirrors the direct-path latency Go surfaces for `ipnstate.PeerStatus`.
932 pub async fn direct_path(
933 &self,
934 dst: core::net::IpAddr,
935 ) -> Result<Option<(core::net::SocketAddr, Duration)>, Error> {
936 let peer_tracker = self.peer_tracker.upgrade().ok_or(Error {
937 kind: ErrorKind::ActorGone,
938 target_actor: None,
939 message_ty: None,
940 })?;
941
942 // Resolve the tailnet IP to its node, then to its disco key. No node / no disco key ⇒ no
943 // direct path is possible (a peer with no disco key can only be reached via DERP).
944 let Some(node) = peer_tracker
945 .ask(peer_tracker::PeerByTailnetIp { ip: dst })
946 .await?
947 else {
948 return Ok(None);
949 };
950 let Some(disco) = node.disco_key else {
951 return Ok(None);
952 };
953
954 self.direct
955 .ask(direct::DirectPathLatency { disco })
956 .await
957 .map_err(Into::into)
958 }
959
960 /// Send a disco ping to the peer holding tailnet IP `dst` **now** and await the pong, returning
961 /// the fresh round-trip latency and the endpoint that answered, or `None` if no pong arrives
962 /// within `timeout` (or the peer is unknown / has no disco key / no candidate path). This is the
963 /// true on-demand `PingType::Disco` (Go `tailscale ping`), as opposed to
964 /// [`direct_path`](Self::direct_path) which reports the last periodic probe's RTT.
965 ///
966 /// The ping round-trip is awaited OFF the direct manager's mailbox (we take a `MagicSock` handle
967 /// and await on it directly), so a slow/timing-out ping never blocks the actor.
968 pub async fn ping_disco(
969 &self,
970 dst: core::net::IpAddr,
971 timeout: Duration,
972 ) -> Result<Option<(core::net::SocketAddr, Duration)>, Error> {
973 let peer_tracker = self.peer_tracker.upgrade().ok_or(Error {
974 kind: ErrorKind::ActorGone,
975 target_actor: None,
976 message_ty: None,
977 })?;
978
979 let Some(node) = peer_tracker
980 .ask(peer_tracker::PeerByTailnetIp { ip: dst })
981 .await?
982 else {
983 return Ok(None);
984 };
985 let Some(disco) = node.disco_key else {
986 return Ok(None);
987 };
988
989 // Cheap synchronous handle fetch, then await the ping OFF the actor mailbox.
990 let Some(sock) = self.direct.ask(direct::SockHandle).await? else {
991 return Ok(None);
992 };
993 // A `ping_now` error is an underlay UDP send failure (not an actor problem); surface it as a
994 // reply-level error. A timed-out / unanswered ping is `Ok(None)`, not an error.
995 sock.ping_now(&disco, timeout).await.map_err(|_| Error {
996 kind: ErrorKind::ReplyErr,
997 target_actor: None,
998 message_ty: None,
999 })
1000 }
1001
1002 /// Change the selected exit node at runtime (the equivalent of Go `tsnet`'s
1003 /// `LocalClient.EditPrefs(ExitNodeID/ExitNodeIP)`), without recreating the device.
1004 ///
1005 /// Updates the live exit-node selector, then asks the peer tracker to re-broadcast the current
1006 /// peer set so the route updater and source filter re-resolve the new selector immediately.
1007 /// `None` clears the exit node (internet-bound traffic is then dropped, fail-closed, unless this
1008 /// node egresses directly). The selection is re-resolved against the live peer set, so passing a
1009 /// selector for a peer not yet in the netmap simply takes effect once that peer appears.
1010 pub async fn set_exit_node(
1011 &self,
1012 selector: Option<ts_control::ExitNodeSelector>,
1013 ) -> Result<(), Error> {
1014 // Update the live cell every reader borrows from. `send_replace` keeps the value current
1015 // even with no active receivers (none can have dropped while the runtime is up, but it is
1016 // the right non-failing primitive here).
1017 self.exit_node_tx.send_replace(selector);
1018
1019 // Trigger an immediate re-resolution: the route updater (outbound routes + DoH delegation)
1020 // and the source filter (inbound validation) both recompute on an `Arc<PeerState>`, so a
1021 // re-broadcast applies the new exit without waiting for the next netmap update.
1022 self.peer_tracker
1023 .upgrade()
1024 .ok_or(Error {
1025 kind: ErrorKind::ActorGone,
1026 target_actor: None,
1027 message_ty: None,
1028 })?
1029 .ask(peer_tracker::RepublishState)
1030 .await
1031 .map_err(Into::into)
1032 }
1033
1034 /// The currently-selected exit node, or `None` if none is selected.
1035 pub fn exit_node(&self) -> Option<ts_control::ExitNodeSelector> {
1036 self.env.exit_node()
1037 }
1038
1039 /// Toggle whether this node accepts peer-advertised subnet routes at runtime (the equivalent of
1040 /// Go `tsnet`'s `LocalClient.EditPrefs(RouteAll)` / `tailscale set --accept-routes`), without
1041 /// recreating the device.
1042 ///
1043 /// `accept-routes` is a purely **local** preference — unlike advertised routes it is never
1044 /// reported to control (no `Hostinfo` / MapRequest side), so this only re-runs the local
1045 /// route/source-filter recompute, mirroring [`set_exit_node`](Self::set_exit_node) rather than
1046 /// [`set_advertise_routes`](Self::set_advertise_routes). Updates the live cell, then asks the peer
1047 /// tracker to re-broadcast the current peer set so the route updater (outbound routes) and the
1048 /// source filter (inbound validation) re-filter against the new value immediately: turning it on
1049 /// installs newly-accepted subnet routes (and widens the source filter to match); turning it off
1050 /// removes them from BOTH in lock-step (never accepting a source for a route no longer installed).
1051 /// Self routes and the exit-node default `/0` are unaffected (the latter is gated by the exit-node
1052 /// selection, not this flag).
1053 ///
1054 /// In TUN transport mode the host routing table is also re-steered live: the `RepublishState`
1055 /// kicked below re-broadcasts the peer set to the `TunActor`, whose `PeerState` handler re-reads
1056 /// `accept_routes` (and the exit selection) from `Env` and re-applies the host routes — so the
1057 /// toggle takes effect without rebuilding the device (the apply is an idempotent add-new/
1058 /// remove-gone diff). The exit-node default `/0` is still keyed on the exit selection, not this flag.
1059 pub async fn set_accept_routes(&self, accept: bool) -> Result<(), Error> {
1060 // Update the live cell every reader borrows from (same primitive/rationale as set_exit_node).
1061 self.accept_routes_tx.send_replace(accept);
1062
1063 // Trigger an immediate re-filter: the route updater and source filter both recompute on an
1064 // `Arc<PeerState>`, so a re-broadcast applies the new preference without waiting for the next
1065 // netmap update. Both re-read the same live cell, so the outbound route set and the inbound
1066 // source filter stay coupled (the anti-leak invariant).
1067 self.peer_tracker
1068 .upgrade()
1069 .ok_or(Error {
1070 kind: ErrorKind::ActorGone,
1071 target_actor: None,
1072 message_ty: None,
1073 })?
1074 .ask(peer_tracker::RepublishState)
1075 .await
1076 .map_err(Into::into)
1077 }
1078
1079 /// Whether this node currently accepts peer-advertised subnet routes (`--accept-routes`).
1080 pub fn accept_routes(&self) -> bool {
1081 self.env.accept_routes()
1082 }
1083
1084 /// Toggle whether this node accepts the tailnet's DNS configuration at runtime (the equivalent of
1085 /// Go `tsnet`'s `LocalClient.EditPrefs(CorpDNS)` / `tailscale set --accept-dns`), without
1086 /// recreating the device.
1087 ///
1088 /// Like [`set_accept_routes`](Self::set_accept_routes), `accept-dns` is a purely **local**
1089 /// preference — it is never reported to control (no `Hostinfo` / MapRequest side), so this only
1090 /// re-runs the local MagicDNS view rebuild. Updates the live cell, then asks the peer tracker to
1091 /// re-broadcast the current peer set; the resulting `PeerState` rebuild re-applies the gate on the
1092 /// MagicDNS responder (and the peerAPI DoH server that shares its view). When `false`, the
1093 /// responder ignores the control-pushed DNS config and answers every query `REFUSED`, mirroring Go
1094 /// applying an empty `dns.Config` when `CorpDNS` is off; flipping it back to `true` restores
1095 /// serving from the still-current config (the real config is never destroyed — only gated at the
1096 /// read site), so the OFF→ON restore is automatic.
1097 pub async fn set_accept_dns(&self, accept: bool) -> Result<(), Error> {
1098 // Update the live cell every reader borrows from (same primitive/rationale as set_accept_routes).
1099 self.accept_dns_tx.send_replace(accept);
1100
1101 // Trigger an immediate view rebuild: the MagicDNS responder re-reads `Env::accept_dns()` when
1102 // it handles a `PeerState`, so a re-broadcast re-applies the gate on both the netstack
1103 // responder and the peerAPI DoH server (which share the view) without waiting for the next
1104 // control/peer update. Mirrors `set_accept_routes`'s republish.
1105 self.peer_tracker
1106 .upgrade()
1107 .ok_or(Error {
1108 kind: ErrorKind::ActorGone,
1109 target_actor: None,
1110 message_ty: None,
1111 })?
1112 .ask(peer_tracker::RepublishState)
1113 .await
1114 .map_err(Into::into)
1115 }
1116
1117 /// Whether this node currently accepts the tailnet's DNS configuration (`--accept-dns` / `CorpDNS`).
1118 pub fn accept_dns(&self) -> bool {
1119 self.env.accept_dns()
1120 }
1121
1122 /// Change the set of subnet routes this node advertises at runtime (Go `tailscale set
1123 /// --advertise-routes`). Applies BOTH halves together so the wire and the data path agree:
1124 ///
1125 /// 1. **Wire** — re-advertise `Hostinfo.RoutableIPs` to control on the live map-poll connection
1126 /// (so control grants the node the subnet-router role for exactly these prefixes).
1127 /// 2. **Local** — swap the forwarder's accept/dial route table (so the node actually forwards the
1128 /// prefixes it advertises). New flows see the new set; in-flight flows keep their routing.
1129 ///
1130 /// `routes` is filtered to the IPv4-only, deduplicated set this fork can honor (IPv6 prefixes are
1131 /// dropped under the IPv6-off posture — we never advertise a route we won't forward), so the wire
1132 /// and forwarder are fed the identical final set. This sets the explicit subnet prefixes only; it
1133 /// does NOT touch the exit-node `0.0.0.0/0` advertisement (a separate concern).
1134 pub async fn set_advertise_routes(&self, routes: Vec<ipnet::IpNet>) -> Result<(), Error> {
1135 // Update the explicit-subnet part of the live preference, keep the exit-node flag, and
1136 // re-send the composed set. Composes with `set_advertise_exit_node` (neither clobbers the
1137 // other's contribution to `Hostinfo.RoutableIPs`).
1138 let composed = {
1139 let mut adv = self.advertise.lock().unwrap_or_else(|p| p.into_inner());
1140 adv.routes = routes;
1141 compose_advertised_routes(adv.routes.clone(), adv.exit_node)
1142 };
1143 self.apply_advertised_routes(composed).await
1144 }
1145
1146 /// Advertise (or stop advertising) this node as an **exit node** — the `0.0.0.0/0` default route
1147 /// (Go `tailscale set --advertise-exit-node`). Composes with
1148 /// [`set_advertise_routes`](Self::set_advertise_routes): toggling the exit node re-sends the
1149 /// explicit subnet routes plus (when `enable`) `0.0.0.0/0`, so the two preferences are
1150 /// independent. Like `set_advertise_routes`, this both re-advertises `Hostinfo.RoutableIPs` to
1151 /// control AND updates the forwarder's accept/dial set, applied together. Control still gates
1152 /// whether the advertised exit node is actually *usable* by peers (this only advertises it).
1153 pub async fn set_advertise_exit_node(&self, enable: bool) -> Result<(), Error> {
1154 let composed = {
1155 let mut adv = self.advertise.lock().unwrap_or_else(|p| p.into_inner());
1156 adv.exit_node = enable;
1157 compose_advertised_routes(adv.routes.clone(), adv.exit_node)
1158 };
1159 self.apply_advertised_routes(composed).await
1160 }
1161
1162 /// Push a freshly-composed advertised-route set to BOTH halves: the forwarder's accept/dial
1163 /// table (local) FIRST — so the node forwards a prefix before control grants it, never the
1164 /// reverse — then re-advertise `Hostinfo.RoutableIPs` to control on the live map-poll connection
1165 /// (wire). `composed` is already filtered + exit-node-folded by [`compose_advertised_routes`].
1166 async fn apply_advertised_routes(&self, composed: Vec<ipnet::IpNet>) -> Result<(), Error> {
1167 self.forwarder
1168 .ask(forwarder_actor::UpdateRoutes {
1169 routes: composed.clone(),
1170 })
1171 .await?;
1172 self.control
1173 .ask(control_runner::SetAdvertiseRoutes { routes: composed })
1174 .await
1175 .map_err(Into::into)
1176 }
1177
1178 /// Change this node's hostname at runtime (Go `tailscale set --hostname`), re-reporting
1179 /// `Hostinfo.Hostname` to control on the live map-poll connection. Hostname is display-only
1180 /// (control reflects it in the netmap), so there is no dataplane half. The new value is also
1181 /// what a subsequent re-registration reports, so it persists across a reconnect.
1182 pub async fn set_hostname(&self, hostname: String) -> Result<(), Error> {
1183 self.control
1184 .ask(control_runner::SetHostname { hostname })
1185 .await
1186 .map_err(Into::into)
1187 }
1188
1189 /// Subscribe to netmap peer-change events: the **narrow** peer-set view.
1190 ///
1191 /// Returns a [`watch::Receiver`] whose value is the current set of peer [`StatusNode`]s,
1192 /// updated on every netmap state update from control. Await
1193 /// [`watch::Receiver::changed`](tokio::sync::watch::Receiver::changed) to react to peers
1194 /// joining, leaving, or changing. For the unified Go-`WatchIPNBus` feed that merges this with
1195 /// device-state and the interactive-login URL, see [`watch_ipn_bus`](Self::watch_ipn_bus); this
1196 /// method is the peer-only projection of the same underlying cell.
1197 pub async fn watch_netmap(&self) -> Result<watch::Receiver<Vec<StatusNode>>, Error> {
1198 self.peer_tracker
1199 .upgrade()
1200 .ok_or(Error {
1201 kind: ErrorKind::ActorGone,
1202 target_actor: None,
1203 message_ty: None,
1204 })?
1205 .ask(peer_tracker::WatchNetmap)
1206 .await
1207 .map_err(Into::into)
1208 }
1209
1210 /// The current device connection-[`DeviceState`].
1211 pub fn device_state(&self) -> DeviceState {
1212 self.state_rx.borrow().clone()
1213 }
1214
1215 /// Watch the device connection-[`DeviceState`] (`Connecting` → `Running` / `NeedsLogin` /
1216 /// `Expired` / `Failed`).
1217 ///
1218 /// Returns a [`watch::Receiver`]; await
1219 /// [`changed`](tokio::sync::watch::Receiver::changed) to react push-style to control connection
1220 /// transitions instead of polling [`status`](Self::status). The initial value is the current
1221 /// state. Note: a transient per-reconnect dip back to `Connecting` is **not** currently
1222 /// emitted (control transparently reconnects below this layer); the state reflects registration
1223 /// outcome and node-key expiry.
1224 pub fn watch_state(&self) -> watch::Receiver<DeviceState> {
1225 self.state_rx.clone()
1226 }
1227
1228 /// Wait until the device finishes registering, returning a typed outcome.
1229 ///
1230 /// Resolves `Ok(())` once the device reaches [`DeviceState::Running`]. Returns a typed
1231 /// [`RegistrationError`] otherwise — the actionable distinction between "retry", "re-pair", and
1232 /// "drive interactive login" that replaces polling the device's `ipv4_addr` in a loop:
1233 /// - `AuthRejected` — bad/expired/unknown auth key. **Permanent** (re-pair).
1234 /// - `NeedsLogin(url)` — interactive authorization required (no usable auth key). **Not
1235 /// permanent**: the runtime keeps retrying and will reach `Running` once the user authorizes
1236 /// the URL. An **auth-key** caller should treat this as a failure; an **interactive** caller
1237 /// should ignore this return and instead drive the flow via [`watch_state`](Self::watch_state)
1238 /// (this method returns the URL eagerly rather than blocking for the whole login).
1239 /// - `NetworkUnreachable` — control unreachable. **Transient** (retry).
1240 /// - `Timeout` — no settled state within `timeout`.
1241 ///
1242 /// `KeyExpired` is not produced by this initial wait (a node key expires only *after* it has
1243 /// come up); observe post-registration expiry via [`watch_state`](Self::watch_state).
1244 /// `timeout` of `None` waits indefinitely for a settled state.
1245 pub async fn wait_until_running(
1246 &self,
1247 timeout: Option<Duration>,
1248 ) -> Result<(), RegistrationError> {
1249 device_state::wait_for_running(self.state_rx.clone(), timeout).await
1250 }
1251
1252 /// Subscribe to the unified IPN notification bus (Go `ipn` `WatchIPNBus` /
1253 /// `LocalBackend.WatchNotifications`).
1254 ///
1255 /// Returns an [`IpnBusWatcher`]; await [`next`](IpnBusWatcher::next) to receive [`Notify`]
1256 /// events that coalesce device-[`DeviceState`] changes (including the interactive-login URL as
1257 /// `browse_to_url`) and netmap peer-set changes into one feed. `mask`
1258 /// ([`NotifyWatchOpt`]) selects which current-state fields are front-loaded as an initial
1259 /// snapshot on subscribe (`INITIAL_STATE` / `INITIAL_NETMAP`), exactly like Go's
1260 /// `NotifyInitialState` / `NotifyInitialNetMap`.
1261 ///
1262 /// This composes the same `watch` cells as [`watch_state`](Self::watch_state),
1263 /// [`watch_netmap`](Self::watch_netmap), and `pop_browser_url` — one source of truth, so the
1264 /// merged feed cannot diverge from those narrow views. Besides the registration-time login URL
1265 /// (carried by `NeedsLogin`), `browse_to_url` also streams the mid-session
1266 /// `MapResponse.PopBrowserURL` (re-auth / consent on an already-running node). Delivery is
1267 /// best-effort/lossy (a bounded per-watcher buffer; a notification is dropped rather than
1268 /// blocking the runtime if a slow consumer's buffer fills), matching Go's bus. The stream ends
1269 /// (`next` returns `None`) on runtime shutdown or when the watcher is dropped.
1270 pub async fn watch_ipn_bus(&self, mask: NotifyWatchOpt) -> Result<IpnBusWatcher, Error> {
1271 // The peer-set cell lives on the peer-tracker actor; obtain a receiver the same way
1272 // `watch_netmap` does. State + shutdown cells are held here.
1273 let peer_rx = self
1274 .peer_tracker
1275 .upgrade()
1276 .ok_or(Error {
1277 kind: ErrorKind::ActorGone,
1278 target_actor: None,
1279 message_ty: None,
1280 })?
1281 .ask(peer_tracker::WatchNetmap)
1282 .await?;
1283 // The running-node consent-URL cell lives on the control runner; obtain its receiver the
1284 // same way (the control actor ref is strong, so no upgrade needed).
1285 let browser_rx = self.control.ask(control_runner::WatchBrowserUrl).await?;
1286 Ok(ipn_bus::spawn_watcher(
1287 mask,
1288 self.state_rx.clone(),
1289 peer_rx,
1290 browser_rx,
1291 self.shutdown.subscribe(),
1292 ))
1293 }
1294
1295 /// Attempt to shut down the runtime gracefully.
1296 ///
1297 /// Returns false if the shutdown timed out. It is still shut down if it timed out, just
1298 /// more violently and with possible resource leaks.
1299 pub async fn graceful_shutdown(self, timeout: Option<Duration>) -> bool {
1300 self.shutdown.send_replace(true);
1301
1302 async fn _shutdown_all(runtime: Runtime) {
1303 // See the note in `Drop` for why we only need to stop these actors to bring down the
1304 // whole runtime.
1305
1306 let _ignore = runtime.control.stop_gracefully().await;
1307 let _ignore = runtime.dataplane.stop_gracefully().await;
1308 let _ignore = runtime.env.bus.stop_gracefully().await;
1309
1310 tokio::join![
1311 runtime.control.wait_for_shutdown(),
1312 runtime.dataplane.wait_for_shutdown(),
1313 runtime.env.bus.wait_for_shutdown(),
1314 ];
1315 }
1316
1317 let fut = _shutdown_all(self);
1318
1319 match timeout {
1320 Some(timeout) => tokio::time::timeout(timeout, fut).await.is_ok(),
1321 None => {
1322 fut.await;
1323 true
1324 }
1325 }
1326 }
1327}
1328
1329impl Drop for Runtime {
1330 fn drop(&mut self) {
1331 // Stop the taildrop reaper so it cannot outlive the runtime (the `reauth_bridge` pattern). It
1332 // also self-exits when `shutdown` flips below, but aborting is immediate and covers the
1333 // already-shutdown early-return path too.
1334 if let Some(reaper) = self.taildrop_reaper.take() {
1335 reaper.abort();
1336 }
1337
1338 // We must have already run `graceful_shutdown`: on the happy path, this does nothing, but
1339 // if it timed out, we need to make sure the actors are dead so we don't leak them and their
1340 // dependents.
1341 if *self.shutdown.borrow() {
1342 self.control.kill();
1343 self.dataplane.kill();
1344 self.env.bus.kill();
1345 return;
1346 }
1347
1348 self.shutdown.send_replace(true);
1349
1350 // Actors shut down when the last ActorRef to them is dropped (as nothing can send them
1351 // messages anymore). If we don't hold an ActorRef in Runtime, in general the only thing
1352 // that has one is the MessageBus, which each actor subscribes to for a subset of messages.
1353 // Hence, if we shut down the bus, most actors die as well.
1354
1355 // First shut down the actors we have an ActorRef to:
1356 try_shutdown(&self.control);
1357 try_shutdown(&self.dataplane);
1358
1359 // Then shutdown the message bus, stopping the rest of the actors:
1360 try_shutdown(&self.env.bus);
1361 }
1362}
1363
1364fn try_shutdown(a: &ActorRef<impl kameo::Actor>) {
1365 if let Err(e) = a.mailbox_sender().try_send(Signal::Stop) {
1366 tracing::error!(error = %e, "graceful shutdown failed, killing actor");
1367 a.kill();
1368 }
1369}
1370
1371/// Tailscale's overlay MTU. The userspace netstacks MUST advertise an MSS that fits this so they
1372/// never hand the WireGuard encrypt path an IP packet larger than the tunnel can carry (the netstack
1373/// has no PMTU discovery and nothing re-segments between it and the 1280-MTU TUN). This is the same
1374/// default the TUN device uses (`tun_config_from_control`); both are derived from this value so the
1375/// netstack and the TUN always agree.
1376///
1377/// This is the **inner** IP-packet budget. The WireGuard transport header (a 16-byte
1378/// `TransportDataHeader` + the 16-byte AEAD tag = 32 bytes) is added by `TransmitSession::encrypt`
1379/// *after* the netstack produces the inner packet, and the outer UDP/IP headers ride on top of that.
1380/// So do NOT subtract the WireGuard overhead here — that would be a double-subtraction that
1381/// under-fills the tunnel and diverges from the TUN's MTU. The assert below documents that the outer
1382/// datagram still fits a conventional 1500-byte physical path with margin (1280 + 32 WG + 8 UDP +
1383/// 20 outer-IP = 1340).
1384const DEFAULT_OVERLAY_MTU: u16 = 1280;
1385
1386const _: () = assert!(
1387 DEFAULT_OVERLAY_MTU as usize + 32 + 8 + 20 <= 1500,
1388 "inner overlay MTU + WireGuard(32) + UDP(8) + outer-IP(20) must fit a 1500-byte physical path"
1389);
1390
1391/// Build the netstack config shared by both userspace netstacks (application + forwarder) from the
1392/// per-deployment `tcp_buffer_size` and `mtu` knobs.
1393///
1394/// `tcp_buffer_size`: `None` keeps the netstack default (256 KiB/direction); `Some(n)` overrides it
1395/// (e.g. a smaller window on a memory-constrained exit node forwarding many concurrent flows — see
1396/// [`netstack::netcore::Config::tcp_buffer_size`]).
1397///
1398/// `mtu`: the overlay/tunnel MTU. `None` (and a stray `0`) falls back to [`DEFAULT_OVERLAY_MTU`]
1399/// (1280), exactly as the TUN device does, so the netstack's advertised MSS fits the tunnel. Leaving
1400/// this at the netstack's generic 1500 default (the prior behavior) made smoltcp advertise MSS ~1460
1401/// and segment to ~1500 B, which then overflowed the 1280 TUN — a PMTU black-hole / throughput cliff.
1402///
1403/// Factored out of [`Runtime::spawn`] so the mapping is unit-testable without standing up the actors.
1404fn netstack_config_from(
1405 tcp_buffer_size: Option<usize>,
1406 mtu: Option<u16>,
1407) -> netstack::netcore::Config {
1408 let mut c = netstack::netcore::Config::default();
1409 if let Some(tcp_buffer_size) = tcp_buffer_size {
1410 c.tcp_buffer_size = tcp_buffer_size;
1411 }
1412 // `0` is not a usable MTU; treat it like `None` and fall back to the overlay default, mirroring
1413 // the TUN's `and_then(NonZeroU16::new).unwrap_or(1280)`.
1414 let mtu = mtu.filter(|&m| m != 0).unwrap_or(DEFAULT_OVERLAY_MTU);
1415 c.mtu = usize::from(mtu);
1416 c
1417}
1418
1419/// Filter a requested advertise-route set to the IPv4-only, deduplicated set this fork can honor,
1420/// mirroring [`ts_control::Config::advertised_routes`] so a runtime `set_advertise_routes` feeds the
1421/// wire (control grant) and the forwarder (accept/dial table) the identical final set. IPv6 prefixes
1422/// are dropped under the IPv6-off posture — we never advertise a route we won't forward. Order is
1423/// preserved (first occurrence wins). Factored out so the filter is unit-testable without an actor.
1424fn filter_advertise_routes(routes: Vec<ipnet::IpNet>) -> Vec<ipnet::IpNet> {
1425 let mut filtered: Vec<ipnet::IpNet> = Vec::new();
1426 for net in routes {
1427 if matches!(net, ipnet::IpNet::V4(_)) {
1428 if !filtered.contains(&net) {
1429 filtered.push(net);
1430 }
1431 } else {
1432 tracing::warn!(prefix = %net, "dropping IPv6 advertise route (IPv6-off posture)");
1433 }
1434 }
1435 filtered
1436}
1437
1438/// Compose the final advertised-route set from the explicit subnet `routes` and the exit-node flag,
1439/// mirroring [`ts_control::Config::advertised_routes`]: the IPv4-only, deduplicated subnet prefixes,
1440/// plus `0.0.0.0/0` appended when `exit_node` is set. This is the single source of truth both
1441/// runtime advertise mutators (`set_advertise_routes`, `set_advertise_exit_node`) feed, so the two
1442/// compose instead of clobbering. Factored out so the composition is unit-testable without an actor.
1443fn compose_advertised_routes(routes: Vec<ipnet::IpNet>, exit_node: bool) -> Vec<ipnet::IpNet> {
1444 let mut filtered = filter_advertise_routes(routes);
1445 if exit_node {
1446 let default_v4 = ipnet::IpNet::V4(
1447 ipnet::Ipv4Net::new(core::net::Ipv4Addr::UNSPECIFIED, 0)
1448 .expect("0.0.0.0/0 is a valid prefix"),
1449 );
1450 if !filtered.contains(&default_v4) {
1451 filtered.push(default_v4);
1452 }
1453 }
1454 filtered
1455}
1456
1457/// The runtime's live advertised-route preference: the explicit subnet routes plus whether this node
1458/// advertises itself as an exit node. Held behind a `Mutex` on the [`Runtime`] so
1459/// [`Runtime::set_advertise_routes`] and [`Runtime::set_advertise_exit_node`] each mutate their own
1460/// part and re-send the composed set — they compose rather than clobber (Go `EditPrefs` keeps
1461/// `AdvertiseRoutes` and the exit-node advertisement as independent prefs that both feed
1462/// `Hostinfo.RoutableIPs`).
1463#[derive(Debug, Default, Clone)]
1464struct AdvertiseState {
1465 /// The explicit subnet prefixes (pre-filter; the last value passed to `set_advertise_routes`).
1466 routes: Vec<ipnet::IpNet>,
1467 /// Whether this node advertises the exit-node default route (`0.0.0.0/0`).
1468 exit_node: bool,
1469}
1470
1471/// Flatten a kameo delegated-reply [`SendError`] for the id-token RPC into the RPC's own
1472/// [`ts_control::IdTokenError`].
1473///
1474/// A [`SendError::HandlerError`](kameo::error::SendError::HandlerError) carries the real
1475/// `IdTokenError` produced by the handler and is surfaced verbatim. Any other send failure (actor
1476/// not running / stopped, mailbox full, send timeout) is a delivery problem rather than an RPC
1477/// result, so it collapses to a transient [`ts_control::IdTokenError::NetworkError`]. Factored out
1478/// of [`Runtime::fetch_id_token`] so this mapping is unit-testable without standing up an actor.
1479fn flatten_send_err<M>(
1480 e: kameo::error::SendError<M, ts_control::IdTokenError>,
1481) -> ts_control::IdTokenError {
1482 match e {
1483 kameo::error::SendError::HandlerError(err) => err,
1484 _ => ts_control::IdTokenError::NetworkError,
1485 }
1486}
1487
1488/// Flatten a kameo `SendError` from the `Logout` ask into a [`ts_control::LogoutError`].
1489///
1490/// A `HandlerError` carries the real `LogoutError` from the control RPC and is surfaced verbatim;
1491/// any other send failure (actor not running / stopped, mailbox full, send timeout) — a delivery
1492/// problem, not a logout result — collapses to the transient [`ts_control::LogoutError::NetworkError`]
1493/// (logout is idempotent, so a retry after a delivery failure is safe). Factored out of
1494/// [`Runtime::logout`] so the mapping is unit-testable without standing up an actor.
1495fn flatten_logout_send_err<M>(
1496 e: kameo::error::SendError<M, ts_control::LogoutError>,
1497) -> ts_control::LogoutError {
1498 match e {
1499 kameo::error::SendError::HandlerError(err) => err,
1500 _ => ts_control::LogoutError::NetworkError,
1501 }
1502}
1503
1504/// Flatten a kameo `SendError` from the `SetDns` ask into a [`ts_control::SetDnsError`].
1505///
1506/// A `HandlerError` carries the real `SetDnsError` from the set-dns RPC and is surfaced verbatim;
1507/// any other send failure (actor not running / stopped, mailbox full, send timeout) — a delivery
1508/// problem, not a publish result — collapses to the transient
1509/// [`ts_control::SetDnsError::NetworkError`]. Factored out of [`Runtime::set_dns`] so the mapping is
1510/// unit-testable without standing up an actor.
1511fn flatten_set_dns_send_err<M>(
1512 e: kameo::error::SendError<M, ts_control::SetDnsError>,
1513) -> ts_control::SetDnsError {
1514 match e {
1515 kameo::error::SendError::HandlerError(err) => err,
1516 _ => ts_control::SetDnsError::NetworkError,
1517 }
1518}
1519
1520/// Flatten a kameo `SendError` from a TKA mutation ask (`TkaSign`/`TkaDisable`) into a
1521/// [`ts_control::TkaSyncError`]. A `HandlerError` carries the real RPC error; any other send failure
1522/// (actor shutdown / mailbox closed) is surfaced as the transient
1523/// [`ts_control::TkaSyncError::NetworkError`]. Generic over the message type so both share it.
1524fn flatten_tka_send_err<M>(
1525 e: kameo::error::SendError<M, ts_control::TkaSyncError>,
1526) -> ts_control::TkaSyncError {
1527 match e {
1528 kameo::error::SendError::HandlerError(err) => err,
1529 _ => ts_control::TkaSyncError::NetworkError,
1530 }
1531}
1532
1533/// Flatten a kameo `SendError` from the `GetCertificate` / `GetCertPair` ask into a
1534/// [`ts_control::CertError`].
1535///
1536/// A `HandlerError` carries the real `CertError` produced by the ACME issuance and is surfaced
1537/// verbatim. `CertError` has no transient-network variant, so any other send failure (actor not
1538/// running / stopped, mailbox full, send timeout) — a delivery problem rather than an issuance
1539/// result — collapses to a [`ts_control::CertError::Io`]. Generic over the message type, so it
1540/// serves both [`Runtime::get_certificate`] and [`Runtime::cert_pair`]; factored out so the mapping
1541/// is unit-testable without standing up an actor.
1542#[cfg(feature = "acme")]
1543fn flatten_cert_send_err<M>(
1544 e: kameo::error::SendError<M, ts_control::CertError>,
1545) -> ts_control::CertError {
1546 match e {
1547 kameo::error::SendError::HandlerError(err) => err,
1548 _ => ts_control::CertError::Io(std::io::Error::other(
1549 "control runner unavailable for certificate issuance",
1550 )),
1551 }
1552}
1553
1554#[cfg(test)]
1555mod tests {
1556 use super::*;
1557
1558 /// `None` must leave the netstack's own default TCP window in place (the 256 KiB throughput
1559 /// default), and must not silently coerce to some other value.
1560 #[test]
1561 fn netstack_config_none_uses_netstack_default() {
1562 let default = netstack::netcore::Config::default();
1563 let built = netstack_config_from(None, None);
1564 assert_eq!(
1565 built.tcp_buffer_size, default.tcp_buffer_size,
1566 "None must inherit the netstack default TCP buffer size"
1567 );
1568 }
1569
1570 #[test]
1571 fn netstack_config_mtu_defaults_to_overlay_not_generic_1500() {
1572 // The crux of the fix: with no explicit MTU, the netstack must use the 1280 overlay MTU, NOT
1573 // smoltcp's generic 1500 default — otherwise it advertises an MSS that overflows the tunnel.
1574 let built = netstack_config_from(None, None);
1575 assert_eq!(
1576 built.mtu,
1577 usize::from(DEFAULT_OVERLAY_MTU),
1578 "netstack MTU must default to the 1280 overlay MTU, not the 1500 netstack default"
1579 );
1580 assert_ne!(built.mtu, 1500, "must not leave the generic 1500 default");
1581 }
1582
1583 #[test]
1584 fn netstack_config_honors_explicit_mtu_and_rejects_zero() {
1585 // An explicit (control-supplied) MTU is honored verbatim.
1586 assert_eq!(netstack_config_from(None, Some(1400)).mtu, 1400);
1587 // A stray 0 is not a usable MTU; fall back to the overlay default (mirrors the TUN).
1588 assert_eq!(
1589 netstack_config_from(None, Some(0)).mtu,
1590 usize::from(DEFAULT_OVERLAY_MTU)
1591 );
1592 }
1593
1594 #[test]
1595 fn netstack_config_overlay_mtu_matches_tun_default() {
1596 // The netstack MTU default and the TUN MTU default must be the same value, or the two
1597 // netstacks and the TUN would disagree on the segment size budget.
1598 assert_eq!(
1599 DEFAULT_OVERLAY_MTU, 1280,
1600 "overlay MTU must match the TUN device default (tun_config_from_control)"
1601 );
1602 }
1603
1604 /// `Some(n)` must override the TCP window (the memory-vs-throughput knob exit-node operators
1605 /// reach for), reaching the config that both netstacks are built from.
1606 #[test]
1607 fn netstack_config_some_overrides_buffer() {
1608 let built = netstack_config_from(Some(64 * 1024), None);
1609 assert_eq!(
1610 built.tcp_buffer_size,
1611 64 * 1024,
1612 "Some(n) must override the TCP buffer size that both netstacks use"
1613 );
1614 }
1615
1616 /// `set_advertise_routes` must feed the wire and the forwarder the IDENTICAL filtered set:
1617 /// IPv4-only (IPv6 dropped under the IPv6-off posture), deduplicated, order preserved.
1618 #[test]
1619 fn filter_advertise_routes_keeps_v4_dedups_drops_v6() {
1620 let v4a: ipnet::IpNet = "10.0.0.0/24".parse().unwrap();
1621 let v4b: ipnet::IpNet = "192.168.1.0/24".parse().unwrap();
1622 let v6: ipnet::IpNet = "2001:db8::/32".parse().unwrap();
1623
1624 // Mixed input with a duplicate v4 and a v6 prefix.
1625 let out = filter_advertise_routes(vec![v4a, v6, v4b, v4a]);
1626
1627 assert_eq!(
1628 out,
1629 vec![v4a, v4b],
1630 "v6 dropped, duplicate v4 collapsed, first-occurrence order preserved"
1631 );
1632 }
1633
1634 /// An all-IPv6 request filters to empty (we never advertise a route we won't forward) rather
1635 /// than erroring — clearing the advertised set is a legitimate outcome.
1636 #[test]
1637 fn filter_advertise_routes_all_v6_is_empty() {
1638 let v6: ipnet::IpNet = "2001:db8::/32".parse().unwrap();
1639 assert!(filter_advertise_routes(vec![v6]).is_empty());
1640 }
1641
1642 /// `compose_advertised_routes` folds the exit-node `0.0.0.0/0` onto the filtered subnet routes
1643 /// when (and only when) the exit-node flag is set — so `set_advertise_routes` and
1644 /// `set_advertise_exit_node` compose. The two preferences are independent.
1645 #[test]
1646 fn compose_advertised_routes_folds_exit_node() {
1647 let subnet: ipnet::IpNet = "10.0.0.0/24".parse().unwrap();
1648 let default_v4: ipnet::IpNet = "0.0.0.0/0".parse().unwrap();
1649
1650 // Exit node off: just the (filtered) subnet routes.
1651 assert_eq!(
1652 compose_advertised_routes(vec![subnet], false),
1653 vec![subnet],
1654 "exit-node off ⇒ no default route"
1655 );
1656 // Exit node on: subnet routes PLUS 0.0.0.0/0.
1657 assert_eq!(
1658 compose_advertised_routes(vec![subnet], true),
1659 vec![subnet, default_v4],
1660 "exit-node on ⇒ 0.0.0.0/0 appended"
1661 );
1662 // Exit node on with NO subnet routes: just the default route.
1663 assert_eq!(
1664 compose_advertised_routes(vec![], true),
1665 vec![default_v4],
1666 "exit-node alone advertises only 0.0.0.0/0"
1667 );
1668 // Idempotent: an explicit 0.0.0.0/0 already in the routes isn't duplicated by the fold.
1669 assert_eq!(
1670 compose_advertised_routes(vec![default_v4], true),
1671 vec![default_v4],
1672 "the exit-node fold dedups against an explicit default route"
1673 );
1674 }
1675
1676 /// A `HandlerError` carries the real `IdTokenError` from the RPC handler and must pass through
1677 /// verbatim, not be flattened to a generic network error. Using an `Internal(_)` payload (not
1678 /// `NetworkError`) makes the passthrough observable: a buggy flatten that always returned
1679 /// `NetworkError` would fail this assertion.
1680 #[test]
1681 fn flatten_send_err_handler_error_passes_through() {
1682 // Build an `Internal(_)` payload via the public `From<Utf8Error>` conversion (no extra
1683 // deps): it is distinct from the `_ => NetworkError` fallback, so a buggy flatten that
1684 // always returned `NetworkError` would fail this assertion.
1685 // Route the invalid bytes through a runtime Vec so the `invalid_from_utf8` lint (which only
1686 // fires on compile-time-known literals) doesn't flag this intentional bad input.
1687 let bytes = vec![0xffu8, 0xfe];
1688 let utf8_err = core::str::from_utf8(&bytes).unwrap_err();
1689 let inner = ts_control::IdTokenError::from(utf8_err);
1690 assert!(matches!(inner, ts_control::IdTokenError::Internal(_)));
1691 let e: kameo::error::SendError<control_runner::FetchIdToken, ts_control::IdTokenError> =
1692 kameo::error::SendError::HandlerError(inner.clone());
1693 assert_eq!(flatten_send_err(e), inner);
1694 }
1695
1696 /// A non-handler send failure (actor stopped) is a delivery problem, not an RPC result, so it
1697 /// must collapse to a transient `NetworkError`.
1698 #[test]
1699 fn flatten_send_err_actor_stopped_is_network_error() {
1700 let e: kameo::error::SendError<control_runner::FetchIdToken, ts_control::IdTokenError> =
1701 kameo::error::SendError::ActorStopped;
1702 assert_eq!(flatten_send_err(e), ts_control::IdTokenError::NetworkError);
1703 }
1704
1705 /// `ActorNotRunning` (the message bounces back undelivered) is likewise a delivery failure and
1706 /// must map to a transient `NetworkError`.
1707 #[test]
1708 fn flatten_send_err_actor_not_running_is_network_error() {
1709 let e: kameo::error::SendError<control_runner::FetchIdToken, ts_control::IdTokenError> =
1710 kameo::error::SendError::ActorNotRunning(control_runner::FetchIdToken {
1711 audience: "sts.amazonaws.com".to_string(),
1712 });
1713 assert_eq!(flatten_send_err(e), ts_control::IdTokenError::NetworkError);
1714 }
1715
1716 /// A `HandlerError` from the logout RPC carries the real `LogoutError` and must pass through
1717 /// verbatim. An `Internal(_)` payload (distinct from the `_ => NetworkError` fallback) makes the
1718 /// passthrough observable.
1719 #[test]
1720 fn flatten_logout_send_err_handler_error_passes_through() {
1721 let inner = ts_control::LogoutError::Internal(ts_control::LogoutInternalErrorKind::Http);
1722 assert!(matches!(inner, ts_control::LogoutError::Internal(_)));
1723 let e: kameo::error::SendError<control_runner::Logout, ts_control::LogoutError> =
1724 kameo::error::SendError::HandlerError(inner.clone());
1725 assert_eq!(flatten_logout_send_err(e), inner);
1726 }
1727
1728 /// A non-handler send failure (actor stopped) is a delivery problem, not a logout result, and
1729 /// collapses to a transient `NetworkError` (logout is idempotent, so a retry is safe).
1730 #[test]
1731 fn flatten_logout_send_err_actor_stopped_is_network_error() {
1732 let e: kameo::error::SendError<control_runner::Logout, ts_control::LogoutError> =
1733 kameo::error::SendError::ActorStopped;
1734 assert_eq!(
1735 flatten_logout_send_err(e),
1736 ts_control::LogoutError::NetworkError
1737 );
1738 }
1739
1740 /// A `HandlerError` from the set-dns RPC carries the real `SetDnsError` and must pass through
1741 /// verbatim. An `Internal(_)` payload (distinct from the `_ => NetworkError` fallback) makes the
1742 /// passthrough observable.
1743 #[test]
1744 fn flatten_set_dns_send_err_handler_error_passes_through() {
1745 let inner = ts_control::SetDnsError::Internal(ts_control::SetDnsInternalErrorKind::Http);
1746 assert!(matches!(inner, ts_control::SetDnsError::Internal(_)));
1747 let e: kameo::error::SendError<control_runner::SetDns, ts_control::SetDnsError> =
1748 kameo::error::SendError::HandlerError(inner.clone());
1749 assert_eq!(flatten_set_dns_send_err(e), inner);
1750 }
1751
1752 /// A non-handler send failure (actor stopped) is a delivery problem, not a publish result, and
1753 /// collapses to a transient `NetworkError`.
1754 #[test]
1755 fn flatten_set_dns_send_err_actor_stopped_is_network_error() {
1756 let e: kameo::error::SendError<control_runner::SetDns, ts_control::SetDnsError> =
1757 kameo::error::SendError::ActorStopped;
1758 assert_eq!(
1759 flatten_set_dns_send_err(e),
1760 ts_control::SetDnsError::NetworkError
1761 );
1762 }
1763
1764 /// A `HandlerError` from a TKA mutation RPC carries the real `TkaSyncError` and must pass through
1765 /// verbatim (an `Unsupported` payload makes the passthrough observable, distinct from the
1766 /// `_ => NetworkError` fallback).
1767 #[test]
1768 fn flatten_tka_send_err_handler_error_passes_through() {
1769 let e: kameo::error::SendError<control_runner::TkaSign, ts_control::TkaSyncError> =
1770 kameo::error::SendError::HandlerError(ts_control::TkaSyncError::Unsupported);
1771 assert_eq!(
1772 flatten_tka_send_err(e),
1773 ts_control::TkaSyncError::Unsupported
1774 );
1775 }
1776
1777 /// A non-handler send failure (actor stopped) collapses to a transient `NetworkError`.
1778 #[test]
1779 fn flatten_tka_send_err_actor_stopped_is_network_error() {
1780 let e: kameo::error::SendError<control_runner::TkaSign, ts_control::TkaSyncError> =
1781 kameo::error::SendError::ActorStopped;
1782 assert_eq!(
1783 flatten_tka_send_err(e),
1784 ts_control::TkaSyncError::NetworkError
1785 );
1786 }
1787
1788 /// The same flatten works for the `TkaDisable` message type (the helper is generic over `M`).
1789 #[test]
1790 fn flatten_tka_send_err_works_for_disable() {
1791 let e: kameo::error::SendError<control_runner::TkaDisable, ts_control::TkaSyncError> =
1792 kameo::error::SendError::HandlerError(ts_control::TkaSyncError::Unsupported);
1793 assert_eq!(
1794 flatten_tka_send_err(e),
1795 ts_control::TkaSyncError::Unsupported
1796 );
1797 }
1798}