Skip to main content

ts_runtime/
device_state.rs

1//! Device connection-state tracking: a push-style view of where a [`Runtime`](crate::Runtime) is in
2//! its control-plane lifecycle, plus a typed registration outcome.
3//!
4//! Mirrors the part of Go `tsnet`/`ipn`'s state machine an embedder actually reacts to: is the node
5//! still coming up, running, waiting for interactive login, expired, or did registration hard-fail?
6//! The [`ControlRunner`](crate::control_runner::ControlRunner) publishes transitions into a
7//! `watch` cell so an embedder can `await` them ([`Runtime::watch_state`](crate::Runtime::watch_state))
8//! instead of polling [`status`](crate::Runtime::status), and
9//! [`Runtime::wait_until_running`](crate::Runtime::wait_until_running) is a one-shot convenience
10//! built on the same cell.
11
12/// The control-plane lifecycle state of a device.
13///
14/// Published by the control runner as it brings the node up and maintains the netmap stream. A
15/// consumer watches this to drive UI ("connecting…", "needs login", "expired") and to distinguish a
16/// permanent failure from a transient one without inspecting logs.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum DeviceState {
19    /// The runtime has spawned and is registering / establishing the control session. The initial
20    /// state of every device.
21    Connecting,
22    /// Registered and the netmap stream is live — the node is up.
23    Running,
24    /// Control requires interactive authentication (no usable auth key): the node is waiting for a
25    /// human to authorize it at the carried URL. Transient — registration retries until authorized.
26    NeedsLogin(url::Url),
27    /// The node key has expired (control reported the self-node's key expiry is in the past). The
28    /// node must re-authenticate to continue. Surfaced from the netmap self-node, not registration.
29    Expired,
30    /// Registration hard-failed with a permanent reason (e.g. a bad/expired/unknown auth key). The
31    /// control runner stops; this carries the typed reason. Not retried.
32    Failed(RegistrationError),
33}
34
35/// A typed registration outcome, distinguishing a **permanent** failure (don't retry — tell the
36/// user) from a **transient** one (worth retrying).
37///
38/// This is the error surfaced by [`Runtime::wait_until_running`](crate::Runtime::wait_until_running),
39/// replacing the previous "poll `ipv4_addr` until a deadline and report a generic timeout" workaround
40/// with an actionable reason.
41#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
42pub enum RegistrationError {
43    /// Control rejected registration with a permanent reason — typically a bad, expired, or unknown
44    /// auth key. The string is control's verbatim reason. **Permanent**: re-pairing (a new auth
45    /// key) is required; retrying with the same key will not succeed.
46    #[error("authentication rejected by control: {0}")]
47    AuthRejected(String),
48
49    /// The node key has expired. **Permanent** until re-authentication.
50    #[error("node key expired; re-authentication required")]
51    KeyExpired,
52
53    /// Interactive authorization is required: control offered an auth URL (no usable auth key).
54    /// **Actionable but not permanent** — direct the user to the URL; the runtime keeps retrying
55    /// registration and will reach `Running` once the user authorizes (so this is *not*
56    /// [`is_permanent`](Self::is_permanent)). A caller using an auth key should not hit this; a
57    /// caller doing interactive auth should drive it via
58    /// [`watch_state`](crate::Runtime::watch_state) rather than treating this as a hard failure.
59    #[error("interactive login required at {0}")]
60    NeedsLogin(url::Url),
61
62    /// The control plane was unreachable (network/transport error). **Transient**: retrying later
63    /// may succeed.
64    #[error("control plane unreachable")]
65    NetworkUnreachable,
66
67    /// No settled state was reached before the caller's timeout elapsed. **Indeterminate**:
68    /// registration may still be in flight (e.g. slow control plane); the caller may retry the wait.
69    #[error("timed out waiting for the device to finish registering")]
70    Timeout,
71}
72
73impl RegistrationError {
74    /// Whether this outcome is **permanent** — re-pairing / new credentials are required and
75    /// retrying as-is will not succeed (`AuthRejected`, `KeyExpired`). Everything else is not
76    /// permanent: `NetworkUnreachable`/`Timeout` are transient (retry may succeed), and `NeedsLogin`
77    /// is actionable-but-recoverable (the runtime keeps retrying and reaches `Running` once the user
78    /// authorizes the offered URL — so it is *not* permanent).
79    pub fn is_permanent(&self) -> bool {
80        matches!(
81            self,
82            RegistrationError::AuthRejected(_) | RegistrationError::KeyExpired
83        )
84    }
85}
86
87/// Map a control-layer [`ts_control::Error`] from the registration path into a typed
88/// [`RegistrationError`]. Used by the control runner when its `check_auth` loop hard-fails.
89impl From<&ts_control::Error> for RegistrationError {
90    fn from(e: &ts_control::Error) -> Self {
91        match e {
92            ts_control::Error::MachineNotAuthorized(u) => RegistrationError::NeedsLogin(u.clone()),
93            ts_control::Error::Registration(reason) => {
94                RegistrationError::AuthRejected(reason.clone())
95            }
96            ts_control::Error::NetworkError(_) => RegistrationError::NetworkUnreachable,
97            // A 429 rate-limit is **transient** — control is asking us to wait, not rejecting us —
98            // so it must NOT become a permanent `AuthRejected`. The control runner's `check_auth`
99            // loop already intercepts `RateLimited` and sleeps the server delay before this mapping
100            // is reached; classifying it as `NetworkUnreachable` here keeps any other caller of this
101            // conversion on the correct (non-permanent, retry-may-succeed) branch.
102            ts_control::Error::RateLimited(_) => RegistrationError::NetworkUnreachable,
103            // InvalidUrl / Internal: not a transient network condition and not an auth decision —
104            // treat as a (permanent-ish) auth rejection carrying the display reason so the caller
105            // sees something actionable rather than an opaque "timeout".
106            other => RegistrationError::AuthRejected(other.to_string()),
107        }
108    }
109}
110
111/// Wait on a [`DeviceState`] `watch` channel until it settles, mapping the settled state to the
112/// typed [`wait_until_running`](crate::Runtime::wait_until_running) result.
113///
114/// Factored out of [`Runtime::wait_until_running`](crate::Runtime) so the (non-trivial) loop — the
115/// see-then-await ordering, the per-state mapping, sender-drop handling, and the timeout — is
116/// unit-testable against a plain `watch::channel` without standing up a runtime.
117pub(crate) async fn wait_for_running(
118    mut rx: tokio::sync::watch::Receiver<DeviceState>,
119    timeout: Option<core::time::Duration>,
120) -> Result<(), RegistrationError> {
121    let wait = async {
122        loop {
123            // Evaluate the current value, then await a change. `borrow_and_update` marks the current
124            // value seen so a transition isn't missed between this check and `changed()`.
125            let settled = match &*rx.borrow_and_update() {
126                DeviceState::Running => Some(Ok(())),
127                DeviceState::Failed(e) => Some(Err(e.clone())),
128                DeviceState::Expired => Some(Err(RegistrationError::KeyExpired)),
129                DeviceState::NeedsLogin(u) => Some(Err(RegistrationError::NeedsLogin(u.clone()))),
130                DeviceState::Connecting => None,
131            };
132            if let Some(result) = settled {
133                return result;
134            }
135            // Not settled yet — wait for the next transition. If the sender is dropped (runtime
136            // tearing down), treat it as unreachable rather than hanging forever.
137            if rx.changed().await.is_err() {
138                return Err(RegistrationError::NetworkUnreachable);
139            }
140        }
141    };
142
143    match timeout {
144        Some(timeout) => tokio::time::timeout(timeout, wait)
145            .await
146            .unwrap_or(Err(RegistrationError::Timeout)),
147        None => wait.await,
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use core::time::Duration;
154
155    use tokio::sync::watch;
156
157    use super::*;
158
159    #[test]
160    fn permanence_classification() {
161        // Permanent: re-pairing / new credentials required.
162        assert!(RegistrationError::AuthRejected("bad key".into()).is_permanent());
163        assert!(RegistrationError::KeyExpired.is_permanent());
164        // Not permanent: NeedsLogin recovers once the user authorizes (runtime keeps retrying);
165        // network/timeout are transient.
166        assert!(
167            !RegistrationError::NeedsLogin("https://login.example/x".parse().unwrap())
168                .is_permanent()
169        );
170        assert!(!RegistrationError::NetworkUnreachable.is_permanent());
171        assert!(!RegistrationError::Timeout.is_permanent());
172    }
173
174    #[test]
175    fn maps_control_error_variants() {
176        let url: url::Url = "https://login.example/a".parse().unwrap();
177        assert_eq!(
178            RegistrationError::from(&ts_control::Error::MachineNotAuthorized(url.clone())),
179            RegistrationError::NeedsLogin(url)
180        );
181        assert_eq!(
182            RegistrationError::from(&ts_control::Error::Registration("bad auth key".into())),
183            RegistrationError::AuthRejected("bad auth key".into())
184        );
185        assert_eq!(
186            RegistrationError::from(&ts_control::Error::NetworkError(
187                ts_control::Operation::Registration
188            )),
189            RegistrationError::NetworkUnreachable
190        );
191        // A 429 rate-limit is TRANSIENT and must map to a non-permanent state, never the
192        // `AuthRejected` catch-all (which would wrongly stop the runtime). This pins the explicit
193        // arm: if a refactor drops it and lets `RateLimited` fall into `other => AuthRejected`, this
194        // assertion fails.
195        let rl = RegistrationError::from(&ts_control::Error::RateLimited(Duration::from_secs(30)));
196        assert_eq!(rl, RegistrationError::NetworkUnreachable);
197        assert!(
198            !rl.is_permanent(),
199            "a rate-limit must be a transient (non-permanent) failure"
200        );
201    }
202
203    // --- wait_for_running loop ---
204
205    /// An already-`Running` cell resolves `Ok(())` immediately (the initial `borrow_and_update`
206    /// sees it without waiting for a transition).
207    #[tokio::test]
208    async fn wait_resolves_when_already_running() {
209        let (_tx, rx) = watch::channel(DeviceState::Running);
210        assert_eq!(
211            wait_for_running(rx, Some(Duration::from_secs(1))).await,
212            Ok(())
213        );
214    }
215
216    /// A transition `Connecting → Running` published from another task is observed (no missed
217    /// wakeup) and resolves `Ok(())`.
218    #[tokio::test]
219    async fn wait_resolves_on_transition_to_running() {
220        let (tx, rx) = watch::channel(DeviceState::Connecting);
221        tokio::spawn(async move {
222            tokio::time::sleep(Duration::from_millis(20)).await;
223            tx.send_replace(DeviceState::Running);
224        });
225        assert_eq!(
226            wait_for_running(rx, Some(Duration::from_secs(1))).await,
227            Ok(())
228        );
229    }
230
231    /// Each settled non-running state maps to its typed error.
232    #[tokio::test]
233    async fn wait_maps_each_settled_failure() {
234        for (state, expected) in [
235            (
236                DeviceState::Failed(RegistrationError::AuthRejected("bad".into())),
237                RegistrationError::AuthRejected("bad".into()),
238            ),
239            (DeviceState::Expired, RegistrationError::KeyExpired),
240            (
241                DeviceState::NeedsLogin("https://login.example/x".parse().unwrap()),
242                RegistrationError::NeedsLogin("https://login.example/x".parse().unwrap()),
243            ),
244        ] {
245            let (_tx, rx) = watch::channel(state);
246            assert_eq!(
247                wait_for_running(rx, Some(Duration::from_secs(1))).await,
248                Err(expected)
249            );
250        }
251    }
252
253    /// A cell stuck at `Connecting` past the timeout yields `Timeout`.
254    #[tokio::test]
255    async fn wait_times_out_while_connecting() {
256        let (_tx, rx) = watch::channel(DeviceState::Connecting);
257        assert_eq!(
258            wait_for_running(rx, Some(Duration::from_millis(30))).await,
259            Err(RegistrationError::Timeout)
260        );
261    }
262
263    /// If the sender is dropped while still `Connecting`, the wait ends as `NetworkUnreachable`
264    /// rather than hanging forever.
265    #[tokio::test]
266    async fn wait_sender_dropped_is_network_unreachable() {
267        let (tx, rx) = watch::channel(DeviceState::Connecting);
268        drop(tx);
269        assert_eq!(
270            wait_for_running(rx, Some(Duration::from_secs(1))).await,
271            Err(RegistrationError::NetworkUnreachable)
272        );
273    }
274}