Skip to main content

ts_runtime/
device_state.rs

1//! Device connection-state tracking: a push-style view of where a [`Runtime`](crate::Runtime) is in
2//! its control-plane lifecycle, plus a typed registration outcome.
3//!
4//! Mirrors the part of Go `tsnet`/`ipn`'s state machine an embedder actually reacts to: is the node
5//! still coming up, running, waiting for interactive login, expired, or did registration hard-fail?
6//! The [`ControlRunner`](crate::control_runner::ControlRunner) publishes transitions into a
7//! `watch` cell so an embedder can `await` them ([`Runtime::watch_state`](crate::Runtime::watch_state))
8//! instead of polling [`status`](crate::Runtime::status), and
9//! [`Runtime::wait_until_running`](crate::Runtime::wait_until_running) is a one-shot convenience
10//! built on the same cell.
11
12/// The control-plane lifecycle state of a device.
13///
14/// Published by the control runner as it brings the node up and maintains the netmap stream. A
15/// consumer watches this to drive UI ("connecting…", "needs login", "expired") and to distinguish a
16/// permanent failure from a transient one without inspecting logs.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum DeviceState {
19    /// The runtime has spawned and is registering / establishing the control session. The initial
20    /// state of every device.
21    Connecting,
22    /// Registered and the netmap stream is live — the node is up.
23    Running,
24    /// Control requires interactive authentication (no usable auth key): the node is waiting for a
25    /// human to authorize it at the carried URL. Transient — registration retries until authorized.
26    NeedsLogin(url::Url),
27    /// The node key has expired (control reported the self-node's key expiry is in the past). The
28    /// node must re-authenticate to continue. Surfaced from the netmap self-node, not registration.
29    Expired,
30    /// Registration hard-failed with a permanent reason (e.g. a bad/expired/unknown auth key). The
31    /// control runner stops; this carries the typed reason. Not retried.
32    Failed(RegistrationError),
33}
34
35/// A typed registration outcome, distinguishing a **permanent** failure (don't retry — tell the
36/// user) from a **transient** one (worth retrying).
37///
38/// This is the error surfaced by [`Runtime::wait_until_running`](crate::Runtime::wait_until_running),
39/// replacing the previous "poll `ipv4_addr` until a deadline and report a generic timeout" workaround
40/// with an actionable reason.
41#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
42pub enum RegistrationError {
43    /// Control rejected registration with a permanent reason — typically a bad, expired, or unknown
44    /// auth key. The string is control's verbatim reason. **Permanent**: re-pairing (a new auth
45    /// key) is required; retrying with the same key will not succeed.
46    #[error("authentication rejected by control: {0}")]
47    AuthRejected(String),
48
49    /// The node key has expired. **Permanent** until re-authentication.
50    #[error("node key expired; re-authentication required")]
51    KeyExpired,
52
53    /// Interactive authorization is required: control offered an auth URL (no usable auth key).
54    /// **Actionable but not permanent** — direct the user to the URL; the runtime keeps retrying
55    /// registration and will reach `Running` once the user authorizes (so this is *not*
56    /// [`is_permanent`](Self::is_permanent)). A caller using an auth key should not hit this; a
57    /// caller doing interactive auth should drive it via
58    /// [`watch_state`](crate::Runtime::watch_state) rather than treating this as a hard failure.
59    #[error("interactive login required at {0}")]
60    NeedsLogin(url::Url),
61
62    /// The control plane was unreachable (network/transport error). **Transient**: retrying later
63    /// may succeed.
64    #[error("control plane unreachable")]
65    NetworkUnreachable,
66
67    /// No settled state was reached before the caller's timeout elapsed. **Indeterminate**:
68    /// registration may still be in flight (e.g. slow control plane); the caller may retry the wait.
69    #[error("timed out waiting for the device to finish registering")]
70    Timeout,
71}
72
73impl RegistrationError {
74    /// Whether this outcome is **permanent** — re-pairing / new credentials are required and
75    /// retrying as-is will not succeed (`AuthRejected`, `KeyExpired`). Everything else is not
76    /// permanent: `NetworkUnreachable`/`Timeout` are transient (retry may succeed), and `NeedsLogin`
77    /// is actionable-but-recoverable (the runtime keeps retrying and reaches `Running` once the user
78    /// authorizes the offered URL — so it is *not* permanent).
79    pub fn is_permanent(&self) -> bool {
80        matches!(
81            self,
82            RegistrationError::AuthRejected(_) | RegistrationError::KeyExpired
83        )
84    }
85}
86
87/// Map a control-layer [`ts_control::Error`] from the registration path into a typed
88/// [`RegistrationError`]. Used by the control runner when its `check_auth` loop hard-fails.
89impl From<&ts_control::Error> for RegistrationError {
90    fn from(e: &ts_control::Error) -> Self {
91        match e {
92            ts_control::Error::MachineNotAuthorized(u) => RegistrationError::NeedsLogin(u.clone()),
93            ts_control::Error::Registration(reason) => {
94                RegistrationError::AuthRejected(reason.clone())
95            }
96            ts_control::Error::NetworkError(_) => RegistrationError::NetworkUnreachable,
97            // InvalidUrl / Internal: not a transient network condition and not an auth decision —
98            // treat as a (permanent-ish) auth rejection carrying the display reason so the caller
99            // sees something actionable rather than an opaque "timeout".
100            other => RegistrationError::AuthRejected(other.to_string()),
101        }
102    }
103}
104
105/// Wait on a [`DeviceState`] `watch` channel until it settles, mapping the settled state to the
106/// typed [`wait_until_running`](crate::Runtime::wait_until_running) result.
107///
108/// Factored out of [`Runtime::wait_until_running`](crate::Runtime) so the (non-trivial) loop — the
109/// see-then-await ordering, the per-state mapping, sender-drop handling, and the timeout — is
110/// unit-testable against a plain `watch::channel` without standing up a runtime.
111pub(crate) async fn wait_for_running(
112    mut rx: tokio::sync::watch::Receiver<DeviceState>,
113    timeout: Option<core::time::Duration>,
114) -> Result<(), RegistrationError> {
115    let wait = async {
116        loop {
117            // Evaluate the current value, then await a change. `borrow_and_update` marks the current
118            // value seen so a transition isn't missed between this check and `changed()`.
119            let settled = match &*rx.borrow_and_update() {
120                DeviceState::Running => Some(Ok(())),
121                DeviceState::Failed(e) => Some(Err(e.clone())),
122                DeviceState::Expired => Some(Err(RegistrationError::KeyExpired)),
123                DeviceState::NeedsLogin(u) => Some(Err(RegistrationError::NeedsLogin(u.clone()))),
124                DeviceState::Connecting => None,
125            };
126            if let Some(result) = settled {
127                return result;
128            }
129            // Not settled yet — wait for the next transition. If the sender is dropped (runtime
130            // tearing down), treat it as unreachable rather than hanging forever.
131            if rx.changed().await.is_err() {
132                return Err(RegistrationError::NetworkUnreachable);
133            }
134        }
135    };
136
137    match timeout {
138        Some(timeout) => tokio::time::timeout(timeout, wait)
139            .await
140            .unwrap_or(Err(RegistrationError::Timeout)),
141        None => wait.await,
142    }
143}
144
145#[cfg(test)]
146mod tests {
147    use core::time::Duration;
148
149    use tokio::sync::watch;
150
151    use super::*;
152
153    #[test]
154    fn permanence_classification() {
155        // Permanent: re-pairing / new credentials required.
156        assert!(RegistrationError::AuthRejected("bad key".into()).is_permanent());
157        assert!(RegistrationError::KeyExpired.is_permanent());
158        // Not permanent: NeedsLogin recovers once the user authorizes (runtime keeps retrying);
159        // network/timeout are transient.
160        assert!(
161            !RegistrationError::NeedsLogin("https://login.example/x".parse().unwrap())
162                .is_permanent()
163        );
164        assert!(!RegistrationError::NetworkUnreachable.is_permanent());
165        assert!(!RegistrationError::Timeout.is_permanent());
166    }
167
168    #[test]
169    fn maps_control_error_variants() {
170        let url: url::Url = "https://login.example/a".parse().unwrap();
171        assert_eq!(
172            RegistrationError::from(&ts_control::Error::MachineNotAuthorized(url.clone())),
173            RegistrationError::NeedsLogin(url)
174        );
175        assert_eq!(
176            RegistrationError::from(&ts_control::Error::Registration("bad auth key".into())),
177            RegistrationError::AuthRejected("bad auth key".into())
178        );
179        assert_eq!(
180            RegistrationError::from(&ts_control::Error::NetworkError(
181                ts_control::Operation::Registration
182            )),
183            RegistrationError::NetworkUnreachable
184        );
185    }
186
187    // --- wait_for_running loop ---
188
189    /// An already-`Running` cell resolves `Ok(())` immediately (the initial `borrow_and_update`
190    /// sees it without waiting for a transition).
191    #[tokio::test]
192    async fn wait_resolves_when_already_running() {
193        let (_tx, rx) = watch::channel(DeviceState::Running);
194        assert_eq!(
195            wait_for_running(rx, Some(Duration::from_secs(1))).await,
196            Ok(())
197        );
198    }
199
200    /// A transition `Connecting → Running` published from another task is observed (no missed
201    /// wakeup) and resolves `Ok(())`.
202    #[tokio::test]
203    async fn wait_resolves_on_transition_to_running() {
204        let (tx, rx) = watch::channel(DeviceState::Connecting);
205        tokio::spawn(async move {
206            tokio::time::sleep(Duration::from_millis(20)).await;
207            tx.send_replace(DeviceState::Running);
208        });
209        assert_eq!(
210            wait_for_running(rx, Some(Duration::from_secs(1))).await,
211            Ok(())
212        );
213    }
214
215    /// Each settled non-running state maps to its typed error.
216    #[tokio::test]
217    async fn wait_maps_each_settled_failure() {
218        for (state, expected) in [
219            (
220                DeviceState::Failed(RegistrationError::AuthRejected("bad".into())),
221                RegistrationError::AuthRejected("bad".into()),
222            ),
223            (DeviceState::Expired, RegistrationError::KeyExpired),
224            (
225                DeviceState::NeedsLogin("https://login.example/x".parse().unwrap()),
226                RegistrationError::NeedsLogin("https://login.example/x".parse().unwrap()),
227            ),
228        ] {
229            let (_tx, rx) = watch::channel(state);
230            assert_eq!(
231                wait_for_running(rx, Some(Duration::from_secs(1))).await,
232                Err(expected)
233            );
234        }
235    }
236
237    /// A cell stuck at `Connecting` past the timeout yields `Timeout`.
238    #[tokio::test]
239    async fn wait_times_out_while_connecting() {
240        let (_tx, rx) = watch::channel(DeviceState::Connecting);
241        assert_eq!(
242            wait_for_running(rx, Some(Duration::from_millis(30))).await,
243            Err(RegistrationError::Timeout)
244        );
245    }
246
247    /// If the sender is dropped while still `Connecting`, the wait ends as `NetworkUnreachable`
248    /// rather than hanging forever.
249    #[tokio::test]
250    async fn wait_sender_dropped_is_network_unreachable() {
251        let (tx, rx) = watch::channel(DeviceState::Connecting);
252        drop(tx);
253        assert_eq!(
254            wait_for_running(rx, Some(Duration::from_secs(1))).await,
255            Err(RegistrationError::NetworkUnreachable)
256        );
257    }
258}