ts_runtime/device_state.rs
1//! Device connection-state tracking: a push-style view of where a [`Runtime`](crate::Runtime) is in
2//! its control-plane lifecycle, plus a typed registration outcome.
3//!
4//! Mirrors the part of Go `tsnet`/`ipn`'s state machine an embedder actually reacts to: is the node
5//! still coming up, running, waiting for interactive login, expired, or did registration hard-fail?
6//! The [`ControlRunner`](crate::control_runner::ControlRunner) publishes transitions into a
7//! `watch` cell so an embedder can `await` them ([`Runtime::watch_state`](crate::Runtime::watch_state))
8//! instead of polling [`status`](crate::Runtime::status), and
9//! [`Runtime::wait_until_running`](crate::Runtime::wait_until_running) is a one-shot convenience
10//! built on the same cell.
11
12/// The control-plane lifecycle state of a device.
13///
14/// Published by the control runner as it brings the node up and maintains the netmap stream. A
15/// consumer watches this to drive UI ("connecting…", "needs login", "expired") and to distinguish a
16/// permanent failure from a transient one without inspecting logs.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum DeviceState {
19 /// The runtime has spawned and is registering / establishing the control session. The initial
20 /// state of every device.
21 Connecting,
22 /// Registered and the netmap stream is live — the node is up.
23 Running,
24 /// Control requires interactive authentication (no usable auth key): the node is waiting for a
25 /// human to authorize it at the carried URL. Transient — registration retries until authorized.
26 NeedsLogin(url::Url),
27 /// The node key has expired (control reported the self-node's key expiry is in the past). The
28 /// node must re-authenticate to continue. Surfaced from the netmap self-node, not registration.
29 Expired,
30 /// Registration hard-failed with a permanent reason (e.g. a bad/expired/unknown auth key). The
31 /// control runner stops; this carries the typed reason. Not retried.
32 Failed(RegistrationError),
33}
34
35/// A typed registration outcome, distinguishing a **permanent** failure (don't retry — tell the
36/// user) from a **transient** one (worth retrying).
37///
38/// This is the error surfaced by [`Runtime::wait_until_running`](crate::Runtime::wait_until_running),
39/// replacing the previous "poll `ipv4_addr` until a deadline and report a generic timeout" workaround
40/// with an actionable reason.
41#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
42pub enum RegistrationError {
43 /// Control rejected registration with a permanent reason — typically a bad, expired, or unknown
44 /// auth key. The string is control's verbatim reason. **Permanent**: re-pairing (a new auth
45 /// key) is required; retrying with the same key will not succeed.
46 #[error("authentication rejected by control: {0}")]
47 AuthRejected(String),
48
49 /// The node key has expired. **Permanent** until re-authentication.
50 #[error("node key expired; re-authentication required")]
51 KeyExpired,
52
53 /// Interactive authorization is required: control offered an auth URL (no usable auth key).
54 /// **Actionable but not permanent** — direct the user to the URL; the runtime keeps retrying
55 /// registration and will reach `Running` once the user authorizes (so this is *not*
56 /// [`is_permanent`](Self::is_permanent)). A caller using an auth key should not hit this; a
57 /// caller doing interactive auth should drive it via
58 /// [`watch_state`](crate::Runtime::watch_state) rather than treating this as a hard failure.
59 #[error("interactive login required at {0}")]
60 NeedsLogin(url::Url),
61
62 /// The control plane was unreachable (network/transport error). **Transient**: retrying later
63 /// may succeed.
64 #[error("control plane unreachable")]
65 NetworkUnreachable,
66
67 /// No settled state was reached before the caller's timeout elapsed. **Indeterminate**:
68 /// registration may still be in flight (e.g. slow control plane); the caller may retry the wait.
69 #[error("timed out waiting for the device to finish registering")]
70 Timeout,
71}
72
73impl RegistrationError {
74 /// Whether this outcome is **permanent** — re-pairing / new credentials are required and
75 /// retrying as-is will not succeed (`AuthRejected`, `KeyExpired`). Everything else is not
76 /// permanent: `NetworkUnreachable`/`Timeout` are transient (retry may succeed), and `NeedsLogin`
77 /// is actionable-but-recoverable (the runtime keeps retrying and reaches `Running` once the user
78 /// authorizes the offered URL — so it is *not* permanent).
79 pub fn is_permanent(&self) -> bool {
80 matches!(
81 self,
82 RegistrationError::AuthRejected(_) | RegistrationError::KeyExpired
83 )
84 }
85}
86
87/// Map a control-layer [`ts_control::Error`] from the registration path into a typed
88/// [`RegistrationError`]. Used by the control runner when its `check_auth` loop hard-fails.
89impl From<&ts_control::Error> for RegistrationError {
90 fn from(e: &ts_control::Error) -> Self {
91 match e {
92 ts_control::Error::MachineNotAuthorized(u) => RegistrationError::NeedsLogin(u.clone()),
93 ts_control::Error::Registration(reason) => {
94 RegistrationError::AuthRejected(reason.clone())
95 }
96 ts_control::Error::NetworkError(_) => RegistrationError::NetworkUnreachable,
97 // InvalidUrl / Internal: not a transient network condition and not an auth decision —
98 // treat as a (permanent-ish) auth rejection carrying the display reason so the caller
99 // sees something actionable rather than an opaque "timeout".
100 other => RegistrationError::AuthRejected(other.to_string()),
101 }
102 }
103}
104
105/// Wait on a [`DeviceState`] `watch` channel until it settles, mapping the settled state to the
106/// typed [`wait_until_running`](crate::Runtime::wait_until_running) result.
107///
108/// Factored out of [`Runtime::wait_until_running`](crate::Runtime) so the (non-trivial) loop — the
109/// see-then-await ordering, the per-state mapping, sender-drop handling, and the timeout — is
110/// unit-testable against a plain `watch::channel` without standing up a runtime.
111pub(crate) async fn wait_for_running(
112 mut rx: tokio::sync::watch::Receiver<DeviceState>,
113 timeout: Option<core::time::Duration>,
114) -> Result<(), RegistrationError> {
115 let wait = async {
116 loop {
117 // Evaluate the current value, then await a change. `borrow_and_update` marks the current
118 // value seen so a transition isn't missed between this check and `changed()`.
119 let settled = match &*rx.borrow_and_update() {
120 DeviceState::Running => Some(Ok(())),
121 DeviceState::Failed(e) => Some(Err(e.clone())),
122 DeviceState::Expired => Some(Err(RegistrationError::KeyExpired)),
123 DeviceState::NeedsLogin(u) => Some(Err(RegistrationError::NeedsLogin(u.clone()))),
124 DeviceState::Connecting => None,
125 };
126 if let Some(result) = settled {
127 return result;
128 }
129 // Not settled yet — wait for the next transition. If the sender is dropped (runtime
130 // tearing down), treat it as unreachable rather than hanging forever.
131 if rx.changed().await.is_err() {
132 return Err(RegistrationError::NetworkUnreachable);
133 }
134 }
135 };
136
137 match timeout {
138 Some(timeout) => tokio::time::timeout(timeout, wait)
139 .await
140 .unwrap_or(Err(RegistrationError::Timeout)),
141 None => wait.await,
142 }
143}
144
145#[cfg(test)]
146mod tests {
147 use core::time::Duration;
148
149 use tokio::sync::watch;
150
151 use super::*;
152
153 #[test]
154 fn permanence_classification() {
155 // Permanent: re-pairing / new credentials required.
156 assert!(RegistrationError::AuthRejected("bad key".into()).is_permanent());
157 assert!(RegistrationError::KeyExpired.is_permanent());
158 // Not permanent: NeedsLogin recovers once the user authorizes (runtime keeps retrying);
159 // network/timeout are transient.
160 assert!(
161 !RegistrationError::NeedsLogin("https://login.example/x".parse().unwrap())
162 .is_permanent()
163 );
164 assert!(!RegistrationError::NetworkUnreachable.is_permanent());
165 assert!(!RegistrationError::Timeout.is_permanent());
166 }
167
168 #[test]
169 fn maps_control_error_variants() {
170 let url: url::Url = "https://login.example/a".parse().unwrap();
171 assert_eq!(
172 RegistrationError::from(&ts_control::Error::MachineNotAuthorized(url.clone())),
173 RegistrationError::NeedsLogin(url)
174 );
175 assert_eq!(
176 RegistrationError::from(&ts_control::Error::Registration("bad auth key".into())),
177 RegistrationError::AuthRejected("bad auth key".into())
178 );
179 assert_eq!(
180 RegistrationError::from(&ts_control::Error::NetworkError(
181 ts_control::Operation::Registration
182 )),
183 RegistrationError::NetworkUnreachable
184 );
185 }
186
187 // --- wait_for_running loop ---
188
189 /// An already-`Running` cell resolves `Ok(())` immediately (the initial `borrow_and_update`
190 /// sees it without waiting for a transition).
191 #[tokio::test]
192 async fn wait_resolves_when_already_running() {
193 let (_tx, rx) = watch::channel(DeviceState::Running);
194 assert_eq!(
195 wait_for_running(rx, Some(Duration::from_secs(1))).await,
196 Ok(())
197 );
198 }
199
200 /// A transition `Connecting → Running` published from another task is observed (no missed
201 /// wakeup) and resolves `Ok(())`.
202 #[tokio::test]
203 async fn wait_resolves_on_transition_to_running() {
204 let (tx, rx) = watch::channel(DeviceState::Connecting);
205 tokio::spawn(async move {
206 tokio::time::sleep(Duration::from_millis(20)).await;
207 tx.send_replace(DeviceState::Running);
208 });
209 assert_eq!(
210 wait_for_running(rx, Some(Duration::from_secs(1))).await,
211 Ok(())
212 );
213 }
214
215 /// Each settled non-running state maps to its typed error.
216 #[tokio::test]
217 async fn wait_maps_each_settled_failure() {
218 for (state, expected) in [
219 (
220 DeviceState::Failed(RegistrationError::AuthRejected("bad".into())),
221 RegistrationError::AuthRejected("bad".into()),
222 ),
223 (DeviceState::Expired, RegistrationError::KeyExpired),
224 (
225 DeviceState::NeedsLogin("https://login.example/x".parse().unwrap()),
226 RegistrationError::NeedsLogin("https://login.example/x".parse().unwrap()),
227 ),
228 ] {
229 let (_tx, rx) = watch::channel(state);
230 assert_eq!(
231 wait_for_running(rx, Some(Duration::from_secs(1))).await,
232 Err(expected)
233 );
234 }
235 }
236
237 /// A cell stuck at `Connecting` past the timeout yields `Timeout`.
238 #[tokio::test]
239 async fn wait_times_out_while_connecting() {
240 let (_tx, rx) = watch::channel(DeviceState::Connecting);
241 assert_eq!(
242 wait_for_running(rx, Some(Duration::from_millis(30))).await,
243 Err(RegistrationError::Timeout)
244 );
245 }
246
247 /// If the sender is dropped while still `Connecting`, the wait ends as `NetworkUnreachable`
248 /// rather than hanging forever.
249 #[tokio::test]
250 async fn wait_sender_dropped_is_network_unreachable() {
251 let (tx, rx) = watch::channel(DeviceState::Connecting);
252 drop(tx);
253 assert_eq!(
254 wait_for_running(rx, Some(Duration::from_secs(1))).await,
255 Err(RegistrationError::NetworkUnreachable)
256 );
257 }
258}