ts_runtime/lib.rs
1#![doc = include_str!("../README.md")]
2
3extern crate ts_netstack_smoltcp as netstack;
4
5use core::time::Duration;
6use std::sync::Arc;
7
8use kameo::{
9 actor::{ActorRef, Spawn, WeakActorRef},
10 mailbox::Signal,
11};
12use netstack::netcore::Channel;
13use tokio::sync::watch;
14
15use crate::{
16 control_runner::ControlRunner, dataplane::DataplaneActor, direct::DirectManager,
17 forwarder_actor::ForwarderActor, multiderp::Multiderp, netstack_actor::NetstackActor,
18};
19
20/// Pcap stream framer for debug packet capture (`CapturePcap`).
21pub mod capture;
22/// Control runner.
23pub mod control_runner;
24mod dataplane;
25mod derp_latency;
26/// Device connection-state tracking ([`DeviceState`]) and typed registration outcome
27/// ([`RegistrationError`]).
28pub mod device_state;
29mod direct;
30mod env;
31mod error;
32/// Fallback TCP handler registry (`tsnet.Server.RegisterFallbackTCPHandler` parity).
33pub mod fallback_tcp;
34mod forwarder_actor;
35/// Client-side Funnel ingress termination (`tsnet`'s `ListenFunnel` data path).
36pub mod funnel;
37/// Unified IPN notification bus ([`Notify`] / [`watch_ipn_bus`](Runtime::watch_ipn_bus)), mirroring
38/// Go `ipn` `LocalBackend.WatchNotifications` / the `WatchIPNBus` LocalAPI.
39pub mod ipn_bus;
40mod magic_dns;
41mod multiderp;
42mod netstack_actor;
43mod packetfilter;
44pub mod peer_tracker;
45mod peerapi;
46mod peerapi_doh;
47mod route_updater;
48/// Stored Serve config + accept-loop runtime (`tsnet`'s `Get/SetServeConfig` + serving runtime).
49pub mod serve;
50mod src_filter;
51/// Netmap status snapshot, WhoIs, and watcher types.
52pub mod status;
53/// Taildrop peer-to-peer file transfer store.
54pub mod taildrop;
55pub mod taildrop_send;
56/// Tailnet-Lock (TKA) chain-sync orchestration: bootstrap + offer/send driver (the runtime layer
57/// that bridges the `ts_control` sync RPCs and the `ts_tka` chain logic).
58mod tka_sync;
59#[cfg(feature = "tun")]
60mod tun_actor;
61
62pub use device_state::{DeviceState, RegistrationError};
63pub(crate) use env::Env;
64pub use error::{Error, ErrorKind};
65pub use ipn_bus::{IpnBusWatcher, Notify, NotifyWatchOpt};
66pub use status::{FileTarget, NetcheckReport, RegionLatency, Status, StatusNode, WhoIs};
67pub use ts_dataplane::{CaptureHook, CapturePath};
68
69use crate::peer_tracker::PeerTracker;
70
71/// The runtime for a tailscale device.
72pub struct Runtime {
73 /// Reference to the control actor.
74 pub control: ActorRef<ControlRunner>,
75 dataplane: ActorRef<DataplaneActor>,
76 /// Reference to the direct (disco/UDP underlay) manager, retained so [`Runtime::rebind`] can
77 /// ask it to re-bind the underlay socket on a network/link change.
78 direct: ActorRef<DirectManager>,
79 /// Reference to the application netstack actor. `None` in TUN transport mode, where there is
80 /// no userspace application netstack (the application data path is a real kernel TUN device).
81 netstack: Option<WeakActorRef<NetstackActor>>,
82 /// Reference to the peer tracker for peer lookups.
83 pub peer_tracker: WeakActorRef<PeerTracker>,
84 /// Fallback TCP handler registry, bound to the application netstack. `None` in TUN transport
85 /// mode (no application netstack exists to attach it to).
86 fallback_tcp: Option<fallback_tcp::FallbackTcpManager>,
87 /// Reference to the forwarder actor, retained so [`Runtime::set_advertise_routes`] can push a
88 /// new accept/dial route table onto the running forwarder (the local half of advertising
89 /// routes). Without this the strong ref would drop after the startup `GetChannel` and the
90 /// forwarder would be reachable only via the message bus.
91 forwarder: ActorRef<ForwarderActor>,
92 /// Reference to the multiderp manager, retained so [`Runtime::status`] can resolve each
93 /// relayed peer's DERP region id to its region **code** (`ipnstate.PeerStatus.Relay`). Without
94 /// this the strong ref would drop after startup (it is cloned into the direct manager + route
95 /// updater) and the region-code map would be unreachable.
96 multiderp: ActorRef<Multiderp>,
97 env: Env,
98 shutdown: watch::Sender<bool>,
99 /// Sender side of the exit-node selector `watch` cell. Held privately here (not on the cloned
100 /// `Env`, which keeps only the read side) so that only `Runtime::set_exit_node` can mutate the
101 /// selection; the route updater and source filter re-read it via [`Env::exit_node`].
102 exit_node_tx: watch::Sender<Option<ts_control::ExitNodeSelector>>,
103 /// Sender side of the accept-routes preference `watch` cell. Held privately here (same rationale
104 /// as [`exit_node_tx`](Self::exit_node_tx)) so that only [`Runtime::set_accept_routes`] can
105 /// toggle it; the route updater and source filter re-read it via [`Env::accept_routes`].
106 accept_routes_tx: watch::Sender<bool>,
107 /// Receiver mirroring the *active* (resolved + fail-closed) exit node's stable id, fed by the
108 /// route updater. Read by [`Runtime::status`] / [`Runtime::active_exit_node`] to report which
109 /// exit node traffic is actually egressing through (vs. the merely-configured selector).
110 active_exit_rx: watch::Receiver<Option<ts_control::StableNodeId>>,
111 /// Receiver for the device connection-state cell, fed by the control runner. Read by
112 /// [`Runtime::watch_state`] and [`Runtime::wait_until_running`].
113 state_rx: watch::Receiver<DeviceState>,
114 /// Receiver for the retained peer-capability grants, fed by the packet-filter updater. Read by
115 /// [`Runtime::whois`] to resolve the flow-scoped cap map (Go `apitype.WhoIsResponse.CapMap`).
116 cap_grants_rx: watch::Receiver<packetfilter::CapGrants>,
117 /// Live advertised-route preference (explicit subnet routes + the exit-node flag), seeded from
118 /// the startup config. [`Runtime::set_advertise_routes`] and [`set_advertise_exit_node`] each
119 /// mutate their part under this lock then re-send the composed set, so the two compose.
120 advertise: std::sync::Mutex<AdvertiseState>,
121}
122
123impl Runtime {
124 /// Spawn a new runtime with the given parameters for connecting to a tailnet.
125 pub async fn spawn(
126 config: ts_control::Config,
127 auth_key: Option<String>,
128 keys: ts_keys::NodeState,
129 ) -> Result<Self, Error> {
130 let (shutdown_tx, shutdown_rx) = watch::channel(false);
131
132 // The exit-node selector and accept-routes preference are live `watch` cells so
133 // `Device::set_exit_node` / `set_accept_routes` can change them at runtime.
134 // `new_with_runtime_txs` returns each `Sender` (mutation capability) separately so they are
135 // retained privately on the `Runtime`, while only the `Receiver`s (the readers' contract)
136 // live on the cloned `Env`. Initial values come from `ForwarderConfig`.
137 let (env, exit_node_tx, accept_routes_tx) = Env::new_with_runtime_txs(
138 keys,
139 shutdown_rx,
140 env::ForwarderConfig::from_control_config(&config),
141 );
142
143 // Both userspace netstacks (application + forwarder) share one netstack config. Honor the
144 // per-deployment TCP buffer knob when set, otherwise fall back to the netstack default.
145 let netstack_config = netstack_config_from(config.tcp_buffer_size);
146
147 let dataplane = DataplaneActor::spawn(env.clone());
148
149 let (netstack_id, netstack_up, netstack_down) =
150 dataplane.ask(dataplane::NewOverlayTransport).await?;
151
152 // A second overlay transport feeds the dedicated any-IP forwarder netstack. Inbound packets
153 // for advertised subnet routes / the exit-node default route are routed here (see
154 // `route_updater`), keeping forwarded flows off the application netstack.
155 let (forwarder_id, forwarder_up, forwarder_down) =
156 dataplane.ask(dataplane::NewOverlayTransport).await?;
157
158 let multiderp = Multiderp::spawn((env.clone(), dataplane.clone()));
159
160 // Spawn the direct (disco) underlay manager before the route updater. Its `on_start`
161 // binds the UDP socket and registers its transport synchronously, so by the time the
162 // route updater asks it for the direct transport id it is guaranteed to be available.
163 let direct = DirectManager::spawn((env.clone(), dataplane.clone(), multiderp.clone()));
164
165 // Spawn the forwarder before the route updater. Its `on_start` builds the forwarder
166 // netstack, enables any-IP acceptance, and starts the per-port accept loops synchronously,
167 // so by the time the route updater begins delivering advertised prefixes to
168 // `forwarder_id` the netstack is already draining its transport.
169 let forwarder = ForwarderActor::spawn((
170 env.clone(),
171 netstack_config.clone(),
172 forwarder_up,
173 forwarder_down,
174 ));
175 // Force `on_start` to finish (any-IP enabled, accept loops live) before the route updater
176 // can route the first inbound flow to `forwarder_id`: an `ask` blocks until the actor has
177 // started.
178 //
179 // The forwarder netstack's overlay `Channel` is reused by the TUN application path for
180 // recursive / exit-node-DoH MagicDNS forwarding (TUN mode has no application netstack of its
181 // own, but the forwarder netstack runs in both modes and egresses over the overlay — the
182 // anti-leak property `forward_query`/`forward_doh` require). Only the `tun` Tun arm consumes
183 // it, so it is unused when the `tun` feature is off — allow that without warn-as-error.
184 #[cfg_attr(not(feature = "tun"), allow(unused_variables))]
185 let (forwarder_channel,) = forwarder.ask(forwarder_actor::GetChannel).await?;
186
187 // The route updater is the single authoritative resolver of the active (resolved,
188 // fail-closed) exit node; it publishes the resolved stable id into this watch cell so
189 // `Runtime::status` can report which exit is actually engaged (not just configured).
190 let (active_exit_tx, active_exit_rx) = watch::channel(None);
191 route_updater::RouteUpdater::spawn((
192 multiderp.clone(),
193 direct.clone(),
194 env.clone(),
195 netstack_id,
196 forwarder_id,
197 active_exit_tx,
198 ));
199 // The packet-filter updater also surfaces the retained cap-grants (for flow-scoped WhoIs)
200 // through a `watch` cell whose receiver the `Runtime` holds — the bus has no replay, so a
201 // `watch` is how `Runtime::whois` reads the current grants on demand.
202 let (cap_grants_tx, cap_grants_rx) = watch::channel(Default::default());
203 packetfilter::PacketfilterUpdater::spawn((env.clone(), cap_grants_tx));
204 src_filter::SourceFilterUpdater::spawn(env.clone());
205 let peer_tracker = PeerTracker::spawn(env.clone()).downgrade();
206
207 // Select the application data path from the transport mode. The forwarder/egress path
208 // above is UNCHANGED in both modes — TUN mode only swaps the application data path, never
209 // the forwarder. `config` is moved into `ControlRunner::spawn` below, so branch on a
210 // borrow and clone the small `TunConfig` where needed before the move.
211 //
212 // - Netstack (the default, and the only reachable arm when the `tun` feature is off):
213 // spawn the application netstack + MagicDNS responder + fallback-TCP registry, all on
214 // the `netstack_up`/`netstack_down` overlay seam.
215 // - Tun: spawn `TunActor` on that same overlay seam instead; no application netstack and
216 // no MagicDNS responder exist, and `netstack`/`fallback_tcp` are `None`.
217 // - Tun requested but built without the `tun` feature: hard-error (a config/build
218 // mismatch knowable at spawn time). NEVER silently fall back to netstack.
219 let (netstack, fallback_tcp) = match &config.transport_mode {
220 ts_control::TransportMode::Netstack => {
221 let netstack = NetstackActor::spawn((
222 env.clone(),
223 netstack_config,
224 netstack_up,
225 netstack_down,
226 ));
227
228 // Fetch the netstack channel while we still hold the strong ActorRef, then spawn
229 // the MagicDNS responder on it. Fire-and-forget: like src_filter/route_updater,
230 // it's owned by the message bus and isn't stored on `Runtime`.
231 let (channel,) = netstack.ask(netstack_actor::GetChannel).await?;
232 // The fallback-TCP registry attaches to the application netstack — the same one
233 // that carries the embedder's explicit `Device::tcp_listen` sockets — so a
234 // fallback handler sees exactly the inbound flows no explicit listener matched.
235 let fallback_tcp = fallback_tcp::FallbackTcpManager::new(channel.clone());
236 magic_dns::MagicDnsActor::spawn((env.clone(), channel));
237
238 (Some(netstack.downgrade()), Some(fallback_tcp))
239 }
240
241 #[cfg(feature = "tun")]
242 ts_control::TransportMode::Tun(tun_cfg) => {
243 // Reuse the same `netstack_up`/`netstack_down` overlay-transport pair that would
244 // have fed the netstack — it is just the application-side overlay seam (the name
245 // is historical). No NetstackActor / MagicDnsActor is spawned.
246 tun_actor::TunActor::spawn((
247 env.clone(),
248 tun_cfg.clone(),
249 netstack_up,
250 netstack_down,
251 // Host-route gating inputs read from `Env`'s live cells: subnet routes are only
252 // steered into the TUN when `--accept-routes` is set, and the host `/0` only when
253 // the embedder configured an exit node. See `tun_actor::host_routes_from_node`.
254 // NOTE: TUN mode programs the host FIB once at device build; a runtime
255 // `set_accept_routes` / `set_exit_node` toggle is honored by the netstack data
256 // path immediately, but does not re-steer the host routing table until the device
257 // is (re)built — re-steering is tracked as a follow-up. Reading the live cells
258 // here (rather than a frozen config snapshot) keeps this gating correct as of
259 // whenever the device is actually built.
260 tun_actor::HostRouteGating {
261 accept_routes: env.accept_routes(),
262 exit_node_configured: env.exit_node().is_some(),
263 },
264 // Reuse the forwarder netstack's overlay `Channel` for recursive / exit-node-DoH
265 // MagicDNS forwarding in the TUN datapath (TUN mode has no application netstack
266 // Channel of its own). Egresses over the overlay — anti-leak preserved.
267 forwarder_channel.clone(),
268 ));
269
270 (None, None)
271 }
272
273 #[cfg(not(feature = "tun"))]
274 ts_control::TransportMode::Tun(_) => {
275 return Err(Error {
276 kind: ErrorKind::TunUnavailable,
277 target_actor: None,
278 message_ty: None,
279 });
280 }
281 };
282
283 // Device connection-state cell. Created here (not inside the actor) so the control runner's
284 // `on_start` can publish `Failed`/`NeedsLogin` and still return `Err` without the sender
285 // being tied to a `Self` that never gets constructed on a hard registration failure.
286 let (state_tx, state_rx) = watch::channel(DeviceState::Connecting);
287
288 // Seed the live advertised-route preference from the startup config before `config` moves
289 // into the control runner, so the runtime setters compose against the configured baseline.
290 let advertise = std::sync::Mutex::new(AdvertiseState {
291 routes: config.advertise_routes.clone(),
292 exit_node: config.advertise_exit_node,
293 });
294
295 let control = ControlRunner::spawn(control_runner::Params {
296 config,
297 auth_key,
298 env: env.clone(),
299 state_tx,
300 });
301
302 Ok(Self {
303 control,
304 dataplane,
305 direct,
306 peer_tracker,
307 fallback_tcp,
308 forwarder,
309 multiderp,
310 netstack,
311 env,
312 shutdown: shutdown_tx,
313 exit_node_tx,
314 accept_routes_tx,
315 active_exit_rx,
316 state_rx,
317 cap_grants_rx,
318 advertise,
319 })
320 }
321
322 /// Register a fallback TCP handler consulted for every inbound TCP flow that matches no
323 /// explicit listener (`tsnet.Server.RegisterFallbackTCPHandler` parity).
324 ///
325 /// The returned [`fallback_tcp::FallbackTcpHandle`] deregisters the handler when dropped. See
326 /// [`fallback_tcp`] for the dispatch contract and anti-leak guarantees.
327 ///
328 /// Returns [`ErrorKind::UnsupportedInTunMode`] in TUN transport mode, where there is no
329 /// application netstack to attach a fallback handler to.
330 pub fn register_fallback_tcp_handler(
331 &self,
332 cb: Arc<
333 dyn Fn(core::net::SocketAddr, core::net::SocketAddr) -> fallback_tcp::FallbackDecision
334 + Send
335 + Sync,
336 >,
337 ) -> Result<fallback_tcp::FallbackTcpHandle, Error> {
338 Ok(self
339 .fallback_tcp
340 .as_ref()
341 .ok_or(Error {
342 kind: ErrorKind::UnsupportedInTunMode,
343 target_actor: None,
344 message_ty: None,
345 })?
346 .register(cb))
347 }
348
349 /// Get a channel to send commands to the netstack.
350 ///
351 /// Returns [`ErrorKind::UnsupportedInTunMode`] in TUN transport mode, where there is no
352 /// application netstack.
353 pub async fn channel(&self) -> Result<Channel, Error> {
354 let (channel,) = self
355 .netstack
356 .as_ref()
357 .ok_or(Error {
358 kind: ErrorKind::UnsupportedInTunMode,
359 target_actor: None,
360 message_ty: None,
361 })?
362 .upgrade()
363 .ok_or(Error {
364 kind: ErrorKind::ActorGone,
365 target_actor: None,
366 message_ty: None,
367 })?
368 .ask(netstack_actor::GetChannel)
369 .await?;
370
371 Ok(channel)
372 }
373
374 /// The Taildrop file store, if Taildrop is enabled (`taildrop_dir` configured and the store
375 /// initialized). `None` when disabled — fail-closed. Shared with the peerAPI Taildrop server so
376 /// the embedder's read APIs and the receive path see the same on-disk store.
377 pub fn taildrop_store(&self) -> Option<Arc<crate::taildrop::TaildropStore>> {
378 self.env.taildrop_store.clone()
379 }
380
381 /// The shared Funnel ingress slot the peerAPI `/v0/ingress` route reads per connection.
382 ///
383 /// `Device::listen_funnel` installs a [`FunnelManager`](crate::funnel::FunnelManager)'s sink here
384 /// to make the route live (the peerAPI server is already running from startup). Returns a clone of
385 /// the runtime-lifetime `Arc` so the device can write the slot without restarting the server. See
386 /// [`crate::funnel`] for the ingress data path.
387 pub fn funnel_ingress_slot(&self) -> crate::funnel::FunnelIngressSlot {
388 self.env.funnel_ingress.clone()
389 }
390
391 /// The shared "Funnel ingress listener active" flag (the same `Arc` the control session reads to
392 /// set `HostInfo.IngressEnabled`). `Device::listen_funnel` flips it `true` while a funnel listener
393 /// is up so control routes Funnel traffic to this node; clearing it advertises no live endpoint.
394 pub fn ingress_active_flag(&self) -> std::sync::Arc<std::sync::atomic::AtomicBool> {
395 self.env.ingress_active.clone()
396 }
397
398 /// Install (`Some`) or clear (`None`) the debug packet-capture hook on the running dataplane.
399 /// `Some(hook)` tees every plaintext packet crossing the datapath to `hook` until it is cleared;
400 /// `None` stops capture. Mirrors Go `tstun.Wrapper.InstallCaptureHook` / `ClearCaptureSink`.
401 pub async fn install_capture(
402 &self,
403 hook: Option<ts_dataplane::CaptureHook>,
404 ) -> Result<(), Error> {
405 self.dataplane
406 .ask(dataplane::InstallCapture { hook })
407 .await
408 .map_err(Into::into)
409 }
410
411 /// Re-bind the underlay UDP socket after a network/link change (Wi-Fi switch, sleep/wake). The
412 /// embedder's own link monitor calls this (the engine owns the socket re-bind; the embedder owns
413 /// OS netmon). Re-binds the socket (same-port-preferred, IPv4-only invariant preserved) and
414 /// resets the now-stale local NAT mapping — clearing learned reflexive addresses and every
415 /// confirmed direct path while keeping candidate endpoints, so peers re-probe over the new socket
416 /// and relay over DERP (never a direct host dial) until a path re-confirms. Peers, control, the
417 /// netmap, disco state, and DERP are untouched. A no-op when the underlay is inert (bind failed
418 /// at startup, DERP-only). Mirrors Go magicsock `Conn.Rebind` + `resetEndpointStates`.
419 pub async fn rebind(&self) -> Result<(), Error> {
420 self.direct.ask(direct::Rebind).await.map_err(Error::from)
421 }
422
423 /// A snapshot of the local netmap: this node plus every known peer.
424 ///
425 /// Combines the self node held by the control runner with the peer set held by the peer
426 /// tracker. Mirrors tsnet's `LocalClient::Status`.
427 ///
428 /// `self_node` is `None` until the first netmap update has been received from control. Peer
429 /// entries carry no online/user/capability data (see the [`status`] module docs for that gap).
430 pub async fn status(&self) -> Result<Status, Error> {
431 let self_node_domain = self.control.ask(control_runner::SelfNode).await?;
432 // The MagicDNS suffix is the self node's FQDN minus its host label — already split into
433 // `Node.tailnet` at decode time (Go derives it the same way in `NetworkMap.MagicDNSSuffix`).
434 // Capture it before the domain `Node` is mapped away into a `StatusNode`.
435 let magic_dns_suffix = self_node_domain.as_ref().and_then(|n| n.tailnet.clone());
436 let self_node = self_node_domain.as_ref().map(StatusNode::from_node);
437
438 let peers_with_ids = self
439 .peer_tracker
440 .upgrade()
441 .ok_or(Error {
442 kind: ErrorKind::ActorGone,
443 target_actor: None,
444 message_ty: None,
445 })?
446 .ask(peer_tracker::GetStatus)
447 .await?;
448
449 // Join per-peer connectivity (Go `PeerStatus.CurAddr`): one batched query to the direct
450 // manager for every peer's current trusted direct endpoint, then fill `cur_addr` on each
451 // `StatusNode`. A peer absent from the map is relayed via DERP (`cur_addr = None`). This is a
452 // live snapshot — the direct path can expire/re-confirm between calls (matches Go's snapshot
453 // semantics). The `watch_netmap` stream intentionally carries no connectivity (it is a netmap
454 // watch, not a path-state watch, and does not re-fire on direct↔relay flips).
455 let ids: Vec<ts_transport::PeerId> = peers_with_ids.iter().map(|(id, _)| *id).collect();
456 let best_addrs = self
457 .direct
458 .ask(direct::BestAddrs { ids: ids.clone() })
459 .await
460 .unwrap_or_default();
461
462 // For the peers with NO direct path (relayed via DERP), resolve the region CODE they relay
463 // through (Go `PeerStatus.Relay`). One batched ask to multiderp; `cur_addr` and `relay` are
464 // mutually exclusive for a routed peer, mirroring Go's empty-vs-set strings.
465 let relay_ids: Vec<ts_transport::PeerId> = ids
466 .into_iter()
467 .filter(|id| !best_addrs.contains_key(id))
468 .collect();
469 let relay_codes = if relay_ids.is_empty() {
470 Default::default()
471 } else {
472 self.multiderp
473 .ask(multiderp::RelayCodesForPeers { ids: relay_ids })
474 .await
475 .unwrap_or_default()
476 };
477
478 let peers = peers_with_ids
479 .into_iter()
480 .map(|(id, mut node)| match best_addrs.get(&id).copied() {
481 Some(addr) => {
482 node.cur_addr = Some(addr);
483 node
484 }
485 None => {
486 node.relay = relay_codes.get(&id).cloned();
487 node
488 }
489 })
490 .collect();
491
492 Ok(Status {
493 self_node,
494 peers,
495 active_exit_node: self.active_exit_node(),
496 magic_dns_suffix,
497 })
498 }
499
500 /// List the tailnet peers this node can Taildrop a file *to* (Go LocalAPI `FileTargets`).
501 ///
502 /// Mirrors the upstream send-path filter (`feature/taildrop` `Extension::FileTargets`): a peer
503 /// qualifies when it advertises a reachable peerAPI **and** is either owned by the same user as
504 /// this node **or** explicitly granted the file-sharing-target capability. The whole list is
505 /// gated on this node holding the file-sharing capability (control sets it when the admin enables
506 /// Taildrop) — absent that, an empty list (fail-closed, not an error, matching how the receive
507 /// store returns empty when disabled). Results are sorted by the peer's MagicDNS name.
508 ///
509 /// Targets are listed regardless of current online state (upstream's `FileTargets` does not gate
510 /// on online either; an offline target's send will simply time out). The self node is never
511 /// included. Returns empty before the first netmap.
512 ///
513 /// Divergence from Go: the upstream filter also excludes `tvOS` peers, which this fork cannot
514 /// reproduce (the domain node carries no OS string); the impact is negligible — the actual send
515 /// fail-closes if such a peer refused the transfer.
516 pub async fn file_targets(&self) -> Result<Vec<FileTarget>, Error> {
517 // Node-level gate: this node must hold the file-sharing capability (Taildrop enabled by the
518 // admin). Read it off the self node's cap map, like Go's `hasCapFileSharing()`.
519 let self_node = self.control.ask(control_runner::SelfNode).await?;
520 let Some(self_node) = self_node else {
521 return Ok(Vec::new()); // no netmap yet
522 };
523 if !self_node.can_share_files() {
524 return Ok(Vec::new()); // Taildrop not enabled for the tailnet — fail-closed
525 }
526 let self_user_id = self_node.user_id;
527
528 let peers = self
529 .peer_tracker
530 .upgrade()
531 .ok_or(Error {
532 kind: ErrorKind::ActorGone,
533 target_actor: None,
534 message_ty: None,
535 })?
536 .ask(peer_tracker::AllPeers)
537 .await?;
538
539 // Eligibility + ordering live in `build_file_targets` (pure, unit-tested in `status`).
540 Ok(status::build_file_targets(peers, self_user_id))
541 }
542
543 /// The stable id of the exit node traffic is currently egressing through, or `None` if none is
544 /// engaged. This is the route updater's resolved + fail-closed answer (see
545 /// [`Status::active_exit_node`](crate::status::Status::active_exit_node)): it differs from the
546 /// configured [`exit_node`](Self::exit_node) selector, which may name a peer that is absent or
547 /// no longer advertising a default route (in which case egress is dropped and this returns
548 /// `None`).
549 pub fn active_exit_node(&self) -> Option<ts_control::StableNodeId> {
550 self.active_exit_rx.borrow().clone()
551 }
552
553 /// Request an OIDC ID token from control scoped to `audience` (workload-identity federation).
554 ///
555 /// Returns the signed JWT, or the token RPC's own [`ts_control::IdTokenError`]. The kameo
556 /// delegated-reply send error is flattened: a handler error carries the real `IdTokenError`,
557 /// any other send failure (actor shutdown / mailbox closed) is surfaced as
558 /// [`ts_control::IdTokenError::NetworkError`].
559 pub async fn fetch_id_token(
560 &self,
561 audience: String,
562 ) -> Result<String, ts_control::IdTokenError> {
563 self.control
564 .ask(control_runner::FetchIdToken { audience })
565 .await
566 .map_err(flatten_send_err)
567 }
568
569 /// Log this node out of the tailnet: deregister it by expiring its current node key.
570 ///
571 /// Forwards to the control runner, which re-POSTs `/machine/register` with a past expiry over a
572 /// fresh Noise channel. This is a control-plane state change only — it does NOT shut the runtime
573 /// down (the caller follows with [`graceful_shutdown`](Self::graceful_shutdown)) and does not
574 /// touch the on-disk node key. The kameo delegated-reply send error is flattened the same way as
575 /// [`fetch_id_token`](Self::fetch_id_token): a handler error carries the real
576 /// [`ts_control::LogoutError`]; any other send failure (actor shutdown / mailbox closed) is
577 /// surfaced as [`ts_control::LogoutError::NetworkError`].
578 pub async fn logout(&self) -> Result<(), ts_control::LogoutError> {
579 self.control
580 .ask(control_runner::Logout)
581 .await
582 .map_err(flatten_logout_send_err)
583 }
584
585 /// Publish a `TXT` DNS record for this node via control's `/machine/set-dns` (Go
586 /// `LocalClient.SetDNS`).
587 ///
588 /// Forwards to the control runner, which POSTs the record over a fresh Noise channel. The kameo
589 /// delegated-reply send error is flattened the same way as [`fetch_id_token`](Self::fetch_id_token):
590 /// a handler error carries the real [`ts_control::SetDnsError`]; any other send failure (actor
591 /// shutdown / mailbox closed) is surfaced as [`ts_control::SetDnsError::NetworkError`].
592 pub async fn set_dns(
593 &self,
594 name: String,
595 value: String,
596 ) -> Result<(), ts_control::SetDnsError> {
597 self.control
598 .ask(control_runner::SetDns { name, value })
599 .await
600 .map_err(flatten_set_dns_send_err)
601 }
602
603 /// Issue a real Let's Encrypt certificate for this node's MagicDNS `name` (`acme` feature).
604 ///
605 /// Mirrors [`fetch_id_token`](Self::fetch_id_token): forwards to the control runner, which runs
606 /// the client-side ACME DNS-01 flow on a spawned task and publishes the challenge TXT via the
607 /// node's set-dns RPC. The kameo delegated-reply send error is flattened — a handler error
608 /// carries the real [`ts_control::CertError`]; any other send failure (actor shutdown / mailbox
609 /// closed) is surfaced as a [`ts_control::CertError::Io`]. SaaS-only: a self-hosted control
610 /// plane 501s on set-dns.
611 #[cfg(feature = "acme")]
612 pub async fn get_certificate(
613 &self,
614 name: String,
615 ) -> Result<ts_control::tls::CertifiedKey, ts_control::CertError> {
616 self.control
617 .ask(control_runner::GetCertificate { name })
618 .await
619 .map_err(flatten_cert_send_err)
620 }
621
622 /// Resolve which node owns a tailnet source address.
623 ///
624 /// Maps the destination IP of `addr` to its owning node. Mirrors tsnet's `LocalClient::WhoIs`.
625 /// Returns `None` if no peer holds that tailnet IP.
626 ///
627 /// The returned [`WhoIs`] additionally carries the **flow-scoped** peer-capability grants
628 /// ([`WhoIs::cap_map`], Go `apitype.WhoIsResponse.CapMap`): the caps control's packet-filter
629 /// application rules authorize for traffic from THIS node (the flow source) to `addr` (the
630 /// destination). Empty when no grant matches. (The node-level cap map rides
631 /// [`WhoIs::capabilities`].)
632 pub async fn whois(&self, addr: core::net::SocketAddr) -> Result<Option<WhoIs>, Error> {
633 let whois = self
634 .peer_tracker
635 .upgrade()
636 .ok_or(Error {
637 kind: ErrorKind::ActorGone,
638 target_actor: None,
639 message_ty: None,
640 })?
641 .ask(peer_tracker::Whois { addr })
642 .await?;
643
644 let Some(mut whois) = whois else {
645 return Ok(None);
646 };
647
648 // Fill the flow-scoped cap map: src = this node's own tailnet IP (of the dst's family),
649 // dst = the queried address. A grant applies when its source matches the flow source — `src`
650 // ∈ its src prefixes OR this node holds one of its source node-caps — AND `dst` ∈ its dst
651 // prefixes (Go `Filter.CapsWithValues`). Resolve our own IP + cap map from the self node; if
652 // it isn't known yet, leave the map empty (no grants resolvable without a source).
653 let dst = addr.ip();
654 if let Some(self_node) = self.control.ask(control_runner::SelfNode).await? {
655 let src: core::net::IpAddr = if dst.is_ipv6() {
656 self_node.tailnet_address.ipv6.addr().into()
657 } else {
658 self_node.tailnet_address.ipv4.addr().into()
659 };
660 let grants = self.cap_grants_rx.borrow();
661 whois.cap_map = ts_packetfilter_state::caps_for(&grants, src, dst, |cap| {
662 self_node.has_node_attr(cap)
663 });
664 }
665
666 Ok(Some(whois))
667 }
668
669 /// The current direct-path status to the peer holding tailnet IP `dst`: its confirmed direct UDP
670 /// endpoint and that path's last-measured RTT, or `None` when there is no direct path right now
671 /// (the peer is relayed via DERP, is unknown, or has no disco key).
672 ///
673 /// The latency is the RTT of the most recent disco ping/pong that confirmed the path — a live
674 /// snapshot up to one probe interval stale, NOT a fresh on-demand round-trip (that is a separate,
675 /// heavier capability). Mirrors the direct-path latency Go surfaces for `ipnstate.PeerStatus`.
676 pub async fn direct_path(
677 &self,
678 dst: core::net::IpAddr,
679 ) -> Result<Option<(core::net::SocketAddr, Duration)>, Error> {
680 let peer_tracker = self.peer_tracker.upgrade().ok_or(Error {
681 kind: ErrorKind::ActorGone,
682 target_actor: None,
683 message_ty: None,
684 })?;
685
686 // Resolve the tailnet IP to its node, then to its disco key. No node / no disco key ⇒ no
687 // direct path is possible (a peer with no disco key can only be reached via DERP).
688 let Some(node) = peer_tracker
689 .ask(peer_tracker::PeerByTailnetIp { ip: dst })
690 .await?
691 else {
692 return Ok(None);
693 };
694 let Some(disco) = node.disco_key else {
695 return Ok(None);
696 };
697
698 self.direct
699 .ask(direct::DirectPathLatency { disco })
700 .await
701 .map_err(Into::into)
702 }
703
704 /// Send a disco ping to the peer holding tailnet IP `dst` **now** and await the pong, returning
705 /// the fresh round-trip latency and the endpoint that answered, or `None` if no pong arrives
706 /// within `timeout` (or the peer is unknown / has no disco key / no candidate path). This is the
707 /// true on-demand `PingType::Disco` (Go `tailscale ping`), as opposed to
708 /// [`direct_path`](Self::direct_path) which reports the last periodic probe's RTT.
709 ///
710 /// The ping round-trip is awaited OFF the direct manager's mailbox (we take a `MagicSock` handle
711 /// and await on it directly), so a slow/timing-out ping never blocks the actor.
712 pub async fn ping_disco(
713 &self,
714 dst: core::net::IpAddr,
715 timeout: Duration,
716 ) -> Result<Option<(core::net::SocketAddr, Duration)>, Error> {
717 let peer_tracker = self.peer_tracker.upgrade().ok_or(Error {
718 kind: ErrorKind::ActorGone,
719 target_actor: None,
720 message_ty: None,
721 })?;
722
723 let Some(node) = peer_tracker
724 .ask(peer_tracker::PeerByTailnetIp { ip: dst })
725 .await?
726 else {
727 return Ok(None);
728 };
729 let Some(disco) = node.disco_key else {
730 return Ok(None);
731 };
732
733 // Cheap synchronous handle fetch, then await the ping OFF the actor mailbox.
734 let Some(sock) = self.direct.ask(direct::SockHandle).await? else {
735 return Ok(None);
736 };
737 // A `ping_now` error is an underlay UDP send failure (not an actor problem); surface it as a
738 // reply-level error. A timed-out / unanswered ping is `Ok(None)`, not an error.
739 sock.ping_now(&disco, timeout).await.map_err(|_| Error {
740 kind: ErrorKind::ReplyErr,
741 target_actor: None,
742 message_ty: None,
743 })
744 }
745
746 /// Change the selected exit node at runtime (the equivalent of Go `tsnet`'s
747 /// `LocalClient.EditPrefs(ExitNodeID/ExitNodeIP)`), without recreating the device.
748 ///
749 /// Updates the live exit-node selector, then asks the peer tracker to re-broadcast the current
750 /// peer set so the route updater and source filter re-resolve the new selector immediately.
751 /// `None` clears the exit node (internet-bound traffic is then dropped, fail-closed, unless this
752 /// node egresses directly). The selection is re-resolved against the live peer set, so passing a
753 /// selector for a peer not yet in the netmap simply takes effect once that peer appears.
754 pub async fn set_exit_node(
755 &self,
756 selector: Option<ts_control::ExitNodeSelector>,
757 ) -> Result<(), Error> {
758 // Update the live cell every reader borrows from. `send_replace` keeps the value current
759 // even with no active receivers (none can have dropped while the runtime is up, but it is
760 // the right non-failing primitive here).
761 self.exit_node_tx.send_replace(selector);
762
763 // Trigger an immediate re-resolution: the route updater (outbound routes + DoH delegation)
764 // and the source filter (inbound validation) both recompute on an `Arc<PeerState>`, so a
765 // re-broadcast applies the new exit without waiting for the next netmap update.
766 self.peer_tracker
767 .upgrade()
768 .ok_or(Error {
769 kind: ErrorKind::ActorGone,
770 target_actor: None,
771 message_ty: None,
772 })?
773 .ask(peer_tracker::RepublishState)
774 .await
775 .map_err(Into::into)
776 }
777
778 /// The currently-selected exit node, or `None` if none is selected.
779 pub fn exit_node(&self) -> Option<ts_control::ExitNodeSelector> {
780 self.env.exit_node()
781 }
782
783 /// Toggle whether this node accepts peer-advertised subnet routes at runtime (the equivalent of
784 /// Go `tsnet`'s `LocalClient.EditPrefs(RouteAll)` / `tailscale set --accept-routes`), without
785 /// recreating the device.
786 ///
787 /// `accept-routes` is a purely **local** preference — unlike advertised routes it is never
788 /// reported to control (no `Hostinfo` / MapRequest side), so this only re-runs the local
789 /// route/source-filter recompute, mirroring [`set_exit_node`](Self::set_exit_node) rather than
790 /// [`set_advertise_routes`](Self::set_advertise_routes). Updates the live cell, then asks the peer
791 /// tracker to re-broadcast the current peer set so the route updater (outbound routes) and the
792 /// source filter (inbound validation) re-filter against the new value immediately: turning it on
793 /// installs newly-accepted subnet routes (and widens the source filter to match); turning it off
794 /// removes them from BOTH in lock-step (never accepting a source for a route no longer installed).
795 /// Self routes and the exit-node default `/0` are unaffected (the latter is gated by the exit-node
796 /// selection, not this flag).
797 ///
798 /// In TUN transport mode the netstack data path honors the toggle immediately, but the host
799 /// routing table (programmed once at device build) is not re-steered until the device is rebuilt.
800 pub async fn set_accept_routes(&self, accept: bool) -> Result<(), Error> {
801 // Update the live cell every reader borrows from (same primitive/rationale as set_exit_node).
802 self.accept_routes_tx.send_replace(accept);
803
804 // Trigger an immediate re-filter: the route updater and source filter both recompute on an
805 // `Arc<PeerState>`, so a re-broadcast applies the new preference without waiting for the next
806 // netmap update. Both re-read the same live cell, so the outbound route set and the inbound
807 // source filter stay coupled (the anti-leak invariant).
808 self.peer_tracker
809 .upgrade()
810 .ok_or(Error {
811 kind: ErrorKind::ActorGone,
812 target_actor: None,
813 message_ty: None,
814 })?
815 .ask(peer_tracker::RepublishState)
816 .await
817 .map_err(Into::into)
818 }
819
820 /// Whether this node currently accepts peer-advertised subnet routes (`--accept-routes`).
821 pub fn accept_routes(&self) -> bool {
822 self.env.accept_routes()
823 }
824
825 /// Change the set of subnet routes this node advertises at runtime (Go `tailscale set
826 /// --advertise-routes`). Applies BOTH halves together so the wire and the data path agree:
827 ///
828 /// 1. **Wire** — re-advertise `Hostinfo.RoutableIPs` to control on the live map-poll connection
829 /// (so control grants the node the subnet-router role for exactly these prefixes).
830 /// 2. **Local** — swap the forwarder's accept/dial route table (so the node actually forwards the
831 /// prefixes it advertises). New flows see the new set; in-flight flows keep their routing.
832 ///
833 /// `routes` is filtered to the IPv4-only, deduplicated set this fork can honor (IPv6 prefixes are
834 /// dropped under the IPv6-off posture — we never advertise a route we won't forward), so the wire
835 /// and forwarder are fed the identical final set. This sets the explicit subnet prefixes only; it
836 /// does NOT touch the exit-node `0.0.0.0/0` advertisement (a separate concern).
837 pub async fn set_advertise_routes(&self, routes: Vec<ipnet::IpNet>) -> Result<(), Error> {
838 // Update the explicit-subnet part of the live preference, keep the exit-node flag, and
839 // re-send the composed set. Composes with `set_advertise_exit_node` (neither clobbers the
840 // other's contribution to `Hostinfo.RoutableIPs`).
841 let composed = {
842 let mut adv = self.advertise.lock().unwrap_or_else(|p| p.into_inner());
843 adv.routes = routes;
844 compose_advertised_routes(adv.routes.clone(), adv.exit_node)
845 };
846 self.apply_advertised_routes(composed).await
847 }
848
849 /// Advertise (or stop advertising) this node as an **exit node** — the `0.0.0.0/0` default route
850 /// (Go `tailscale set --advertise-exit-node`). Composes with
851 /// [`set_advertise_routes`](Self::set_advertise_routes): toggling the exit node re-sends the
852 /// explicit subnet routes plus (when `enable`) `0.0.0.0/0`, so the two preferences are
853 /// independent. Like `set_advertise_routes`, this both re-advertises `Hostinfo.RoutableIPs` to
854 /// control AND updates the forwarder's accept/dial set, applied together. Control still gates
855 /// whether the advertised exit node is actually *usable* by peers (this only advertises it).
856 pub async fn set_advertise_exit_node(&self, enable: bool) -> Result<(), Error> {
857 let composed = {
858 let mut adv = self.advertise.lock().unwrap_or_else(|p| p.into_inner());
859 adv.exit_node = enable;
860 compose_advertised_routes(adv.routes.clone(), adv.exit_node)
861 };
862 self.apply_advertised_routes(composed).await
863 }
864
865 /// Push a freshly-composed advertised-route set to BOTH halves: the forwarder's accept/dial
866 /// table (local) FIRST — so the node forwards a prefix before control grants it, never the
867 /// reverse — then re-advertise `Hostinfo.RoutableIPs` to control on the live map-poll connection
868 /// (wire). `composed` is already filtered + exit-node-folded by [`compose_advertised_routes`].
869 async fn apply_advertised_routes(&self, composed: Vec<ipnet::IpNet>) -> Result<(), Error> {
870 self.forwarder
871 .ask(forwarder_actor::UpdateRoutes {
872 routes: composed.clone(),
873 })
874 .await?;
875 self.control
876 .ask(control_runner::SetAdvertiseRoutes { routes: composed })
877 .await
878 .map_err(Into::into)
879 }
880
881 /// Change this node's hostname at runtime (Go `tailscale set --hostname`), re-reporting
882 /// `Hostinfo.Hostname` to control on the live map-poll connection. Hostname is display-only
883 /// (control reflects it in the netmap), so there is no dataplane half. The new value is also
884 /// what a subsequent re-registration reports, so it persists across a reconnect.
885 pub async fn set_hostname(&self, hostname: String) -> Result<(), Error> {
886 self.control
887 .ask(control_runner::SetHostname { hostname })
888 .await
889 .map_err(Into::into)
890 }
891
892 /// Subscribe to netmap peer-change events: the **narrow** peer-set view.
893 ///
894 /// Returns a [`watch::Receiver`] whose value is the current set of peer [`StatusNode`]s,
895 /// updated on every netmap state update from control. Await
896 /// [`watch::Receiver::changed`](tokio::sync::watch::Receiver::changed) to react to peers
897 /// joining, leaving, or changing. For the unified Go-`WatchIPNBus` feed that merges this with
898 /// device-state and the interactive-login URL, see [`watch_ipn_bus`](Self::watch_ipn_bus); this
899 /// method is the peer-only projection of the same underlying cell.
900 pub async fn watch_netmap(&self) -> Result<watch::Receiver<Vec<StatusNode>>, Error> {
901 self.peer_tracker
902 .upgrade()
903 .ok_or(Error {
904 kind: ErrorKind::ActorGone,
905 target_actor: None,
906 message_ty: None,
907 })?
908 .ask(peer_tracker::WatchNetmap)
909 .await
910 .map_err(Into::into)
911 }
912
913 /// The current device connection-[`DeviceState`].
914 pub fn device_state(&self) -> DeviceState {
915 self.state_rx.borrow().clone()
916 }
917
918 /// Watch the device connection-[`DeviceState`] (`Connecting` → `Running` / `NeedsLogin` /
919 /// `Expired` / `Failed`).
920 ///
921 /// Returns a [`watch::Receiver`]; await
922 /// [`changed`](tokio::sync::watch::Receiver::changed) to react push-style to control connection
923 /// transitions instead of polling [`status`](Self::status). The initial value is the current
924 /// state. Note: a transient per-reconnect dip back to `Connecting` is **not** currently
925 /// emitted (control transparently reconnects below this layer); the state reflects registration
926 /// outcome and node-key expiry.
927 pub fn watch_state(&self) -> watch::Receiver<DeviceState> {
928 self.state_rx.clone()
929 }
930
931 /// Wait until the device finishes registering, returning a typed outcome.
932 ///
933 /// Resolves `Ok(())` once the device reaches [`DeviceState::Running`]. Returns a typed
934 /// [`RegistrationError`] otherwise — the actionable distinction between "retry", "re-pair", and
935 /// "drive interactive login" that replaces polling [`ipv4_addr`](Self::ipv4_addr) in a loop:
936 /// - `AuthRejected` — bad/expired/unknown auth key. **Permanent** (re-pair).
937 /// - `NeedsLogin(url)` — interactive authorization required (no usable auth key). **Not
938 /// permanent**: the runtime keeps retrying and will reach `Running` once the user authorizes
939 /// the URL. An **auth-key** caller should treat this as a failure; an **interactive** caller
940 /// should ignore this return and instead drive the flow via [`watch_state`](Self::watch_state)
941 /// (this method returns the URL eagerly rather than blocking for the whole login).
942 /// - `NetworkUnreachable` — control unreachable. **Transient** (retry).
943 /// - `Timeout` — no settled state within `timeout`.
944 ///
945 /// `KeyExpired` is not produced by this initial wait (a node key expires only *after* it has
946 /// come up); observe post-registration expiry via [`watch_state`](Self::watch_state).
947 /// `timeout` of `None` waits indefinitely for a settled state.
948 pub async fn wait_until_running(
949 &self,
950 timeout: Option<Duration>,
951 ) -> Result<(), RegistrationError> {
952 device_state::wait_for_running(self.state_rx.clone(), timeout).await
953 }
954
955 /// Subscribe to the unified IPN notification bus (Go `ipn` `WatchIPNBus` /
956 /// `LocalBackend.WatchNotifications`).
957 ///
958 /// Returns an [`IpnBusWatcher`]; await [`next`](IpnBusWatcher::next) to receive [`Notify`]
959 /// events that coalesce device-[`DeviceState`] changes (including the interactive-login URL as
960 /// `browse_to_url`) and netmap peer-set changes into one feed. `mask`
961 /// ([`NotifyWatchOpt`]) selects which current-state fields are front-loaded as an initial
962 /// snapshot on subscribe (`INITIAL_STATE` / `INITIAL_NETMAP`), exactly like Go's
963 /// `NotifyInitialState` / `NotifyInitialNetMap`.
964 ///
965 /// This composes the same `watch` cells as [`watch_state`](Self::watch_state),
966 /// [`watch_netmap`](Self::watch_netmap), and `pop_browser_url` — one source of truth, so the
967 /// merged feed cannot diverge from those narrow views. Besides the registration-time login URL
968 /// (carried by `NeedsLogin`), `browse_to_url` also streams the mid-session
969 /// `MapResponse.PopBrowserURL` (re-auth / consent on an already-running node). Delivery is
970 /// best-effort/lossy (a bounded per-watcher buffer; a notification is dropped rather than
971 /// blocking the runtime if a slow consumer's buffer fills), matching Go's bus. The stream ends
972 /// (`next` returns `None`) on runtime shutdown or when the watcher is dropped.
973 pub async fn watch_ipn_bus(&self, mask: NotifyWatchOpt) -> Result<IpnBusWatcher, Error> {
974 // The peer-set cell lives on the peer-tracker actor; obtain a receiver the same way
975 // `watch_netmap` does. State + shutdown cells are held here.
976 let peer_rx = self
977 .peer_tracker
978 .upgrade()
979 .ok_or(Error {
980 kind: ErrorKind::ActorGone,
981 target_actor: None,
982 message_ty: None,
983 })?
984 .ask(peer_tracker::WatchNetmap)
985 .await?;
986 // The running-node consent-URL cell lives on the control runner; obtain its receiver the
987 // same way (the control actor ref is strong, so no upgrade needed).
988 let browser_rx = self.control.ask(control_runner::WatchBrowserUrl).await?;
989 Ok(ipn_bus::spawn_watcher(
990 mask,
991 self.state_rx.clone(),
992 peer_rx,
993 browser_rx,
994 self.shutdown.subscribe(),
995 ))
996 }
997
998 /// Attempt to shut down the runtime gracefully.
999 ///
1000 /// Returns false if the shutdown timed out. It is still shut down if it timed out, just
1001 /// more violently and with possible resource leaks.
1002 pub async fn graceful_shutdown(self, timeout: Option<Duration>) -> bool {
1003 self.shutdown.send_replace(true);
1004
1005 async fn _shutdown_all(runtime: Runtime) {
1006 // See the note in `Drop` for why we only need to stop these actors to bring down the
1007 // whole runtime.
1008
1009 let _ignore = runtime.control.stop_gracefully().await;
1010 let _ignore = runtime.dataplane.stop_gracefully().await;
1011 let _ignore = runtime.env.bus.stop_gracefully().await;
1012
1013 tokio::join![
1014 runtime.control.wait_for_shutdown(),
1015 runtime.dataplane.wait_for_shutdown(),
1016 runtime.env.bus.wait_for_shutdown(),
1017 ];
1018 }
1019
1020 let fut = _shutdown_all(self);
1021
1022 match timeout {
1023 Some(timeout) => tokio::time::timeout(timeout, fut).await.is_ok(),
1024 None => {
1025 fut.await;
1026 true
1027 }
1028 }
1029 }
1030}
1031
1032impl Drop for Runtime {
1033 fn drop(&mut self) {
1034 // We must have already run `graceful_shutdown`: on the happy path, this does nothing, but
1035 // if it timed out, we need to make sure the actors are dead so we don't leak them and their
1036 // dependents.
1037 if *self.shutdown.borrow() {
1038 self.control.kill();
1039 self.dataplane.kill();
1040 self.env.bus.kill();
1041 return;
1042 }
1043
1044 self.shutdown.send_replace(true);
1045
1046 // Actors shut down when the last ActorRef to them is dropped (as nothing can send them
1047 // messages anymore). If we don't hold an ActorRef in Runtime, in general the only thing
1048 // that has one is the MessageBus, which each actor subscribes to for a subset of messages.
1049 // Hence, if we shut down the bus, most actors die as well.
1050
1051 // First shut down the actors we have an ActorRef to:
1052 try_shutdown(&self.control);
1053 try_shutdown(&self.dataplane);
1054
1055 // Then shutdown the message bus, stopping the rest of the actors:
1056 try_shutdown(&self.env.bus);
1057 }
1058}
1059
1060fn try_shutdown(a: &ActorRef<impl kameo::Actor>) {
1061 if let Err(e) = a.mailbox_sender().try_send(Signal::Stop) {
1062 tracing::error!(error = %e, "graceful shutdown failed, killing actor");
1063 a.kill();
1064 }
1065}
1066
1067/// Build the netstack config shared by both userspace netstacks (application + forwarder) from the
1068/// per-deployment `tcp_buffer_size` knob.
1069///
1070/// `None` keeps the netstack default (256 KiB/direction); `Some(n)` overrides it (e.g. a smaller
1071/// window on a memory-constrained exit node forwarding many concurrent flows — see
1072/// [`netstack::netcore::Config::tcp_buffer_size`]). Factored out of [`Runtime::spawn`] so the
1073/// None-default / Some-override mapping is unit-testable without standing up the actor system.
1074fn netstack_config_from(tcp_buffer_size: Option<usize>) -> netstack::netcore::Config {
1075 let mut c = netstack::netcore::Config::default();
1076 if let Some(tcp_buffer_size) = tcp_buffer_size {
1077 c.tcp_buffer_size = tcp_buffer_size;
1078 }
1079 c
1080}
1081
1082/// Filter a requested advertise-route set to the IPv4-only, deduplicated set this fork can honor,
1083/// mirroring [`ts_control::Config::advertised_routes`] so a runtime `set_advertise_routes` feeds the
1084/// wire (control grant) and the forwarder (accept/dial table) the identical final set. IPv6 prefixes
1085/// are dropped under the IPv6-off posture — we never advertise a route we won't forward. Order is
1086/// preserved (first occurrence wins). Factored out so the filter is unit-testable without an actor.
1087fn filter_advertise_routes(routes: Vec<ipnet::IpNet>) -> Vec<ipnet::IpNet> {
1088 let mut filtered: Vec<ipnet::IpNet> = Vec::new();
1089 for net in routes {
1090 if matches!(net, ipnet::IpNet::V4(_)) {
1091 if !filtered.contains(&net) {
1092 filtered.push(net);
1093 }
1094 } else {
1095 tracing::warn!(prefix = %net, "dropping IPv6 advertise route (IPv6-off posture)");
1096 }
1097 }
1098 filtered
1099}
1100
1101/// Compose the final advertised-route set from the explicit subnet `routes` and the exit-node flag,
1102/// mirroring [`ts_control::Config::advertised_routes`]: the IPv4-only, deduplicated subnet prefixes,
1103/// plus `0.0.0.0/0` appended when `exit_node` is set. This is the single source of truth both
1104/// runtime advertise mutators (`set_advertise_routes`, `set_advertise_exit_node`) feed, so the two
1105/// compose instead of clobbering. Factored out so the composition is unit-testable without an actor.
1106fn compose_advertised_routes(routes: Vec<ipnet::IpNet>, exit_node: bool) -> Vec<ipnet::IpNet> {
1107 let mut filtered = filter_advertise_routes(routes);
1108 if exit_node {
1109 let default_v4 = ipnet::IpNet::V4(
1110 ipnet::Ipv4Net::new(core::net::Ipv4Addr::UNSPECIFIED, 0)
1111 .expect("0.0.0.0/0 is a valid prefix"),
1112 );
1113 if !filtered.contains(&default_v4) {
1114 filtered.push(default_v4);
1115 }
1116 }
1117 filtered
1118}
1119
1120/// The runtime's live advertised-route preference: the explicit subnet routes plus whether this node
1121/// advertises itself as an exit node. Held behind a `Mutex` on the [`Runtime`] so
1122/// [`Runtime::set_advertise_routes`] and [`Runtime::set_advertise_exit_node`] each mutate their own
1123/// part and re-send the composed set — they compose rather than clobber (Go `EditPrefs` keeps
1124/// `AdvertiseRoutes` and the exit-node advertisement as independent prefs that both feed
1125/// `Hostinfo.RoutableIPs`).
1126#[derive(Debug, Default, Clone)]
1127struct AdvertiseState {
1128 /// The explicit subnet prefixes (pre-filter; the last value passed to `set_advertise_routes`).
1129 routes: Vec<ipnet::IpNet>,
1130 /// Whether this node advertises the exit-node default route (`0.0.0.0/0`).
1131 exit_node: bool,
1132}
1133
1134/// Flatten a kameo delegated-reply [`SendError`] for the id-token RPC into the RPC's own
1135/// [`ts_control::IdTokenError`].
1136///
1137/// A [`SendError::HandlerError`](kameo::error::SendError::HandlerError) carries the real
1138/// `IdTokenError` produced by the handler and is surfaced verbatim. Any other send failure (actor
1139/// not running / stopped, mailbox full, send timeout) is a delivery problem rather than an RPC
1140/// result, so it collapses to a transient [`ts_control::IdTokenError::NetworkError`]. Factored out
1141/// of [`Runtime::fetch_id_token`] so this mapping is unit-testable without standing up an actor.
1142fn flatten_send_err<M>(
1143 e: kameo::error::SendError<M, ts_control::IdTokenError>,
1144) -> ts_control::IdTokenError {
1145 match e {
1146 kameo::error::SendError::HandlerError(err) => err,
1147 _ => ts_control::IdTokenError::NetworkError,
1148 }
1149}
1150
1151/// Flatten a kameo `SendError` from the `Logout` ask into a [`ts_control::LogoutError`].
1152///
1153/// A `HandlerError` carries the real `LogoutError` from the control RPC and is surfaced verbatim;
1154/// any other send failure (actor not running / stopped, mailbox full, send timeout) — a delivery
1155/// problem, not a logout result — collapses to the transient [`ts_control::LogoutError::NetworkError`]
1156/// (logout is idempotent, so a retry after a delivery failure is safe). Factored out of
1157/// [`Runtime::logout`] so the mapping is unit-testable without standing up an actor.
1158fn flatten_logout_send_err<M>(
1159 e: kameo::error::SendError<M, ts_control::LogoutError>,
1160) -> ts_control::LogoutError {
1161 match e {
1162 kameo::error::SendError::HandlerError(err) => err,
1163 _ => ts_control::LogoutError::NetworkError,
1164 }
1165}
1166
1167/// Flatten a kameo `SendError` from the `SetDns` ask into a [`ts_control::SetDnsError`].
1168///
1169/// A `HandlerError` carries the real `SetDnsError` from the set-dns RPC and is surfaced verbatim;
1170/// any other send failure (actor not running / stopped, mailbox full, send timeout) — a delivery
1171/// problem, not a publish result — collapses to the transient
1172/// [`ts_control::SetDnsError::NetworkError`]. Factored out of [`Runtime::set_dns`] so the mapping is
1173/// unit-testable without standing up an actor.
1174fn flatten_set_dns_send_err<M>(
1175 e: kameo::error::SendError<M, ts_control::SetDnsError>,
1176) -> ts_control::SetDnsError {
1177 match e {
1178 kameo::error::SendError::HandlerError(err) => err,
1179 _ => ts_control::SetDnsError::NetworkError,
1180 }
1181}
1182
1183/// Flatten a kameo `SendError` from the `GetCertificate` ask into a [`ts_control::CertError`].
1184///
1185/// A `HandlerError` carries the real `CertError` produced by the ACME issuance and is surfaced
1186/// verbatim. `CertError` has no transient-network variant, so any other send failure (actor not
1187/// running / stopped, mailbox full, send timeout) — a delivery problem rather than an issuance
1188/// result — collapses to a [`ts_control::CertError::Io`]. Factored out of
1189/// [`Runtime::get_certificate`] so this mapping is unit-testable without standing up an actor.
1190#[cfg(feature = "acme")]
1191fn flatten_cert_send_err<M>(
1192 e: kameo::error::SendError<M, ts_control::CertError>,
1193) -> ts_control::CertError {
1194 match e {
1195 kameo::error::SendError::HandlerError(err) => err,
1196 _ => ts_control::CertError::Io(std::io::Error::other(
1197 "control runner unavailable for certificate issuance",
1198 )),
1199 }
1200}
1201
1202#[cfg(test)]
1203mod tests {
1204 use super::*;
1205
1206 /// `None` must leave the netstack's own default TCP window in place (the 256 KiB throughput
1207 /// default), and must not silently coerce to some other value.
1208 #[test]
1209 fn netstack_config_none_uses_netstack_default() {
1210 let default = netstack::netcore::Config::default();
1211 let built = netstack_config_from(None);
1212 assert_eq!(
1213 built.tcp_buffer_size, default.tcp_buffer_size,
1214 "None must inherit the netstack default TCP buffer size"
1215 );
1216 }
1217
1218 /// `Some(n)` must override the TCP window (the memory-vs-throughput knob exit-node operators
1219 /// reach for), reaching the config that both netstacks are built from.
1220 #[test]
1221 fn netstack_config_some_overrides_buffer() {
1222 let built = netstack_config_from(Some(64 * 1024));
1223 assert_eq!(
1224 built.tcp_buffer_size,
1225 64 * 1024,
1226 "Some(n) must override the TCP buffer size that both netstacks use"
1227 );
1228 }
1229
1230 /// `set_advertise_routes` must feed the wire and the forwarder the IDENTICAL filtered set:
1231 /// IPv4-only (IPv6 dropped under the IPv6-off posture), deduplicated, order preserved.
1232 #[test]
1233 fn filter_advertise_routes_keeps_v4_dedups_drops_v6() {
1234 let v4a: ipnet::IpNet = "10.0.0.0/24".parse().unwrap();
1235 let v4b: ipnet::IpNet = "192.168.1.0/24".parse().unwrap();
1236 let v6: ipnet::IpNet = "2001:db8::/32".parse().unwrap();
1237
1238 // Mixed input with a duplicate v4 and a v6 prefix.
1239 let out = filter_advertise_routes(vec![v4a, v6, v4b, v4a]);
1240
1241 assert_eq!(
1242 out,
1243 vec![v4a, v4b],
1244 "v6 dropped, duplicate v4 collapsed, first-occurrence order preserved"
1245 );
1246 }
1247
1248 /// An all-IPv6 request filters to empty (we never advertise a route we won't forward) rather
1249 /// than erroring — clearing the advertised set is a legitimate outcome.
1250 #[test]
1251 fn filter_advertise_routes_all_v6_is_empty() {
1252 let v6: ipnet::IpNet = "2001:db8::/32".parse().unwrap();
1253 assert!(filter_advertise_routes(vec![v6]).is_empty());
1254 }
1255
1256 /// `compose_advertised_routes` folds the exit-node `0.0.0.0/0` onto the filtered subnet routes
1257 /// when (and only when) the exit-node flag is set — so `set_advertise_routes` and
1258 /// `set_advertise_exit_node` compose. The two preferences are independent.
1259 #[test]
1260 fn compose_advertised_routes_folds_exit_node() {
1261 let subnet: ipnet::IpNet = "10.0.0.0/24".parse().unwrap();
1262 let default_v4: ipnet::IpNet = "0.0.0.0/0".parse().unwrap();
1263
1264 // Exit node off: just the (filtered) subnet routes.
1265 assert_eq!(
1266 compose_advertised_routes(vec![subnet], false),
1267 vec![subnet],
1268 "exit-node off ⇒ no default route"
1269 );
1270 // Exit node on: subnet routes PLUS 0.0.0.0/0.
1271 assert_eq!(
1272 compose_advertised_routes(vec![subnet], true),
1273 vec![subnet, default_v4],
1274 "exit-node on ⇒ 0.0.0.0/0 appended"
1275 );
1276 // Exit node on with NO subnet routes: just the default route.
1277 assert_eq!(
1278 compose_advertised_routes(vec![], true),
1279 vec![default_v4],
1280 "exit-node alone advertises only 0.0.0.0/0"
1281 );
1282 // Idempotent: an explicit 0.0.0.0/0 already in the routes isn't duplicated by the fold.
1283 assert_eq!(
1284 compose_advertised_routes(vec![default_v4], true),
1285 vec![default_v4],
1286 "the exit-node fold dedups against an explicit default route"
1287 );
1288 }
1289
1290 /// A `HandlerError` carries the real `IdTokenError` from the RPC handler and must pass through
1291 /// verbatim, not be flattened to a generic network error. Using an `Internal(_)` payload (not
1292 /// `NetworkError`) makes the passthrough observable: a buggy flatten that always returned
1293 /// `NetworkError` would fail this assertion.
1294 #[test]
1295 fn flatten_send_err_handler_error_passes_through() {
1296 // Build an `Internal(_)` payload via the public `From<Utf8Error>` conversion (no extra
1297 // deps): it is distinct from the `_ => NetworkError` fallback, so a buggy flatten that
1298 // always returned `NetworkError` would fail this assertion.
1299 // Route the invalid bytes through a runtime Vec so the `invalid_from_utf8` lint (which only
1300 // fires on compile-time-known literals) doesn't flag this intentional bad input.
1301 let bytes = vec![0xffu8, 0xfe];
1302 let utf8_err = core::str::from_utf8(&bytes).unwrap_err();
1303 let inner = ts_control::IdTokenError::from(utf8_err);
1304 assert!(matches!(inner, ts_control::IdTokenError::Internal(_)));
1305 let e: kameo::error::SendError<control_runner::FetchIdToken, ts_control::IdTokenError> =
1306 kameo::error::SendError::HandlerError(inner.clone());
1307 assert_eq!(flatten_send_err(e), inner);
1308 }
1309
1310 /// A non-handler send failure (actor stopped) is a delivery problem, not an RPC result, so it
1311 /// must collapse to a transient `NetworkError`.
1312 #[test]
1313 fn flatten_send_err_actor_stopped_is_network_error() {
1314 let e: kameo::error::SendError<control_runner::FetchIdToken, ts_control::IdTokenError> =
1315 kameo::error::SendError::ActorStopped;
1316 assert_eq!(flatten_send_err(e), ts_control::IdTokenError::NetworkError);
1317 }
1318
1319 /// `ActorNotRunning` (the message bounces back undelivered) is likewise a delivery failure and
1320 /// must map to a transient `NetworkError`.
1321 #[test]
1322 fn flatten_send_err_actor_not_running_is_network_error() {
1323 let e: kameo::error::SendError<control_runner::FetchIdToken, ts_control::IdTokenError> =
1324 kameo::error::SendError::ActorNotRunning(control_runner::FetchIdToken {
1325 audience: "sts.amazonaws.com".to_string(),
1326 });
1327 assert_eq!(flatten_send_err(e), ts_control::IdTokenError::NetworkError);
1328 }
1329
1330 /// A `HandlerError` from the logout RPC carries the real `LogoutError` and must pass through
1331 /// verbatim. An `Internal(_)` payload (distinct from the `_ => NetworkError` fallback) makes the
1332 /// passthrough observable.
1333 #[test]
1334 fn flatten_logout_send_err_handler_error_passes_through() {
1335 let inner = ts_control::LogoutError::Internal(ts_control::LogoutInternalErrorKind::Http);
1336 assert!(matches!(inner, ts_control::LogoutError::Internal(_)));
1337 let e: kameo::error::SendError<control_runner::Logout, ts_control::LogoutError> =
1338 kameo::error::SendError::HandlerError(inner.clone());
1339 assert_eq!(flatten_logout_send_err(e), inner);
1340 }
1341
1342 /// A non-handler send failure (actor stopped) is a delivery problem, not a logout result, and
1343 /// collapses to a transient `NetworkError` (logout is idempotent, so a retry is safe).
1344 #[test]
1345 fn flatten_logout_send_err_actor_stopped_is_network_error() {
1346 let e: kameo::error::SendError<control_runner::Logout, ts_control::LogoutError> =
1347 kameo::error::SendError::ActorStopped;
1348 assert_eq!(
1349 flatten_logout_send_err(e),
1350 ts_control::LogoutError::NetworkError
1351 );
1352 }
1353
1354 /// A `HandlerError` from the set-dns RPC carries the real `SetDnsError` and must pass through
1355 /// verbatim. An `Internal(_)` payload (distinct from the `_ => NetworkError` fallback) makes the
1356 /// passthrough observable.
1357 #[test]
1358 fn flatten_set_dns_send_err_handler_error_passes_through() {
1359 let inner = ts_control::SetDnsError::Internal(ts_control::SetDnsInternalErrorKind::Http);
1360 assert!(matches!(inner, ts_control::SetDnsError::Internal(_)));
1361 let e: kameo::error::SendError<control_runner::SetDns, ts_control::SetDnsError> =
1362 kameo::error::SendError::HandlerError(inner.clone());
1363 assert_eq!(flatten_set_dns_send_err(e), inner);
1364 }
1365
1366 /// A non-handler send failure (actor stopped) is a delivery problem, not a publish result, and
1367 /// collapses to a transient `NetworkError`.
1368 #[test]
1369 fn flatten_set_dns_send_err_actor_stopped_is_network_error() {
1370 let e: kameo::error::SendError<control_runner::SetDns, ts_control::SetDnsError> =
1371 kameo::error::SendError::ActorStopped;
1372 assert_eq!(
1373 flatten_set_dns_send_err(e),
1374 ts_control::SetDnsError::NetworkError
1375 );
1376 }
1377}