Skip to main content

studio_worker/
auto_register.rs

1//! Auto-register state machine — the only registration path.
2//!
3//! On first launch the worker POSTs `/workers/register-request`
4//! to the studio with a self-generated install id + a registration
5//! secret (only its SHA-256 hash leaves the box), then polls
6//! `/workers/register-requests/<id>` every 30s for the operator's
7//! decision.  On Approved we persist `worker_id` + `auth_token` to
8//! `config.toml` and fall through to the normal heartbeat / claim
9//! loops.  On Rejected we surface the reason; the user clears state
10//! with `studio-worker register --reset` to retry.
11//!
12//! `tick()` does at most one HTTP round-trip per call so the outer
13//! orchestrator can sleep between polls.  All persistence goes
14//! through `config::save` so a crash mid-flight leaves consistent
15//! on-disk state.
16
17use std::path::Path;
18use std::sync::Arc;
19
20use anyhow::Result;
21use chrono::{DateTime, Utc};
22use parking_lot::Mutex;
23use sha2::{Digest, Sha256};
24
25use crate::{
26    config::{self, SharedConfig},
27    engine,
28    http::ApiClient,
29    runtime::build_capabilities,
30    types::{AutoRegisterRequest, RegisterStatus},
31    AGENT_VERSION,
32};
33
34/// What `tick()` returns + what the UI Status tab reads.  Distinct
35/// from the persisted config fields, which carry the raw building
36/// blocks (`install_id`, `registration_request_id`, …).
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub enum RegistrationState {
39    /// First-launch default; no request in flight, no worker_id.
40    Pristine,
41    /// Studio has a Pending row for us; we're polling for the
42    /// operator's decision.
43    Pending {
44        request_id: String,
45        /// First time we saw this request in the Pending state.
46        since: DateTime<Utc>,
47    },
48    /// `worker_id` + `auth_token` are in config; ready for the
49    /// normal heartbeat / claim loops.
50    Approved,
51    /// Operator rejected the request.  Worker stops trying;
52    /// `studio-worker register --reset` clears the state.
53    Rejected { reason: String },
54}
55
56/// Shared in-memory mirror of `RegistrationState` for the UI to read
57/// (the persisted source of truth is `Config`).
58pub type SharedRegistration = Arc<Mutex<RegistrationState>>;
59
60pub fn shared_initial() -> SharedRegistration {
61    Arc::new(Mutex::new(RegistrationState::Pristine))
62}
63
64/// One iteration of the state machine.
65///
66/// Reads the current `Config` snapshot, decides what to do, performs
67/// at most one HTTP call, persists changes via `config::save`,
68/// mirrors the new state into `observers`, and returns it.
69///
70/// Idempotent: re-running with the same on-disk state and a
71/// pending-returning studio is a no-op on disk.
72pub async fn tick(
73    cfg: &SharedConfig,
74    config_path: &Path,
75    observers: &SharedRegistration,
76) -> RegistrationState {
77    // Fast path: already registered.
78    {
79        let snap = cfg.lock();
80        if snap.worker_id.is_some() && snap.auth_token.is_some() {
81            *observers.lock() = RegistrationState::Approved;
82            return RegistrationState::Approved;
83        }
84    }
85
86    // Ensure install_id + secret are present before doing any HTTP.
87    ensure_install_state(cfg, config_path);
88
89    // Branch on whether we already have a request id.
90    let (api_base_url, request_id, secret, install_id) = {
91        let snap = cfg.lock();
92        (
93            snap.api_base_url.clone(),
94            snap.registration_request_id.clone(),
95            snap.registration_secret.clone(),
96            snap.install_id.clone(),
97        )
98    };
99
100    match (request_id, secret) {
101        (Some(rid), Some(sec)) => {
102            poll_existing(cfg, config_path, observers, api_base_url, rid, sec).await
103        }
104        _ => {
105            create_request(
106                cfg,
107                config_path,
108                observers,
109                api_base_url,
110                install_id.expect("ensure_install_state seeds install_id"),
111            )
112            .await
113        }
114    }
115}
116
117fn ensure_install_state(cfg: &SharedConfig, config_path: &Path) {
118    let mut snap = cfg.lock();
119    let mut dirty = false;
120    if snap.install_id.is_none() {
121        snap.install_id = Some(new_uuid());
122        dirty = true;
123    }
124    // Pre-allocate the secret only if we also have no request id.
125    // Otherwise the existing pair is still valid.
126    if snap.registration_request_id.is_none() && snap.registration_secret.is_none() {
127        snap.registration_secret = Some(new_secret_hex());
128        dirty = true;
129    }
130    if dirty {
131        let snapshot = snap.clone();
132        drop(snap);
133        if let Err(e) = config::save(&snapshot, config_path) {
134            tracing::warn!(
135                target: "studio_worker::auto_register",
136                "failed to persist install state: {e}"
137            );
138        }
139    }
140}
141
142async fn create_request(
143    cfg: &SharedConfig,
144    config_path: &Path,
145    observers: &SharedRegistration,
146    api_base_url: String,
147    install_id: String,
148) -> RegistrationState {
149    let secret = match cfg.lock().registration_secret.clone() {
150        Some(s) => s,
151        None => {
152            // Should never happen post-ensure_install_state, but be safe.
153            let s = new_secret_hex();
154            cfg.lock().registration_secret = Some(s.clone());
155            s
156        }
157    };
158    let secret_hash = sha256_hex(&secret);
159
160    // Build the capabilities snapshot the operator will see.
161    let payload = match build_payload(cfg, install_id.clone(), secret_hash) {
162        Ok(p) => p,
163        Err(e) => {
164            tracing::warn!(
165                target: "studio_worker::auto_register",
166                "engine build failed during register-request: {e}"
167            );
168            return RegistrationState::Pristine;
169        }
170    };
171
172    let api_base_url_for_task = api_base_url.clone();
173    let payload_for_task = payload.clone();
174    let result = tokio::task::spawn_blocking(move || -> Result<_> {
175        let api = ApiClient::new(api_base_url_for_task)?;
176        api.register_request(&payload_for_task)
177    })
178    .await;
179
180    let response = match result {
181        Ok(Ok(r)) => r,
182        Ok(Err(e)) => {
183            tracing::warn!(
184                target: "studio_worker::auto_register",
185                "register-request HTTP failed; will retry next tick: {e}"
186            );
187            return RegistrationState::Pristine;
188        }
189        Err(e) => {
190            tracing::warn!(
191                target: "studio_worker::auto_register",
192                "register-request task panic; will retry next tick: {e}"
193            );
194            return RegistrationState::Pristine;
195        }
196    };
197
198    // Persist requestId.
199    let now = Utc::now();
200    {
201        let mut snap = cfg.lock();
202        snap.registration_request_id = Some(response.request_id.clone());
203        let snapshot = snap.clone();
204        drop(snap);
205        if let Err(e) = config::save(&snapshot, config_path) {
206            tracing::warn!(
207                target: "studio_worker::auto_register",
208                "failed to persist request_id: {e}"
209            );
210        }
211    }
212    let state = RegistrationState::Pending {
213        request_id: response.request_id,
214        since: now,
215    };
216    *observers.lock() = state.clone();
217    state
218}
219
220async fn poll_existing(
221    cfg: &SharedConfig,
222    config_path: &Path,
223    observers: &SharedRegistration,
224    api_base_url: String,
225    request_id: String,
226    secret: String,
227) -> RegistrationState {
228    let api_base_url_for_task = api_base_url.clone();
229    let request_id_for_task = request_id.clone();
230    let secret_for_task = secret.clone();
231    let result = tokio::task::spawn_blocking(move || -> Result<_> {
232        let api = ApiClient::new(api_base_url_for_task)?;
233        api.poll_register_status(&request_id_for_task, &secret_for_task)
234    })
235    .await;
236
237    let outcome = match result {
238        Ok(Ok(o)) => o,
239        Ok(Err(e)) => {
240            tracing::warn!(
241                target: "studio_worker::auto_register",
242                "poll failed; will retry next tick: {e}"
243            );
244            let state = RegistrationState::Pending {
245                request_id,
246                since: Utc::now(),
247            };
248            *observers.lock() = state.clone();
249            return state;
250        }
251        Err(e) => {
252            tracing::warn!(
253                target: "studio_worker::auto_register",
254                "poll task panic; will retry next tick: {e}"
255            );
256            let state = RegistrationState::Pending {
257                request_id,
258                since: Utc::now(),
259            };
260            *observers.lock() = state.clone();
261            return state;
262        }
263    };
264
265    match outcome {
266        None => {
267            // 404: studio doesn't know this request id anymore.  Drop
268            // the stale id + secret so the next tick creates fresh.
269            {
270                let mut snap = cfg.lock();
271                snap.registration_request_id = None;
272                snap.registration_secret = None;
273                let snapshot = snap.clone();
274                drop(snap);
275                if let Err(e) = config::save(&snapshot, config_path) {
276                    tracing::warn!(
277                        target: "studio_worker::auto_register",
278                        config_path = %config_path.display(),
279                        "failed to persist cleared request state after stale 404; the stale request id stays on disk until the next successful save: {e}"
280                    );
281                }
282            }
283            *observers.lock() = RegistrationState::Pristine;
284            RegistrationState::Pristine
285        }
286        Some(RegisterStatus::Pending) => {
287            let state = RegistrationState::Pending {
288                request_id,
289                since: Utc::now(),
290            };
291            *observers.lock() = state.clone();
292            state
293        }
294        Some(RegisterStatus::Approved {
295            worker_id,
296            auth_token,
297        }) => {
298            {
299                let mut snap = cfg.lock();
300                snap.worker_id = Some(worker_id);
301                snap.auth_token = Some(auth_token);
302                snap.registration_request_id = None;
303                snap.registration_secret = None;
304                let snapshot = snap.clone();
305                drop(snap);
306                if let Err(e) = config::save(&snapshot, config_path) {
307                    tracing::error!(
308                        target: "studio_worker::auto_register",
309                        config_path = %config_path.display(),
310                        "failed to persist approved credentials; this session is registered in memory but the worker will re-register from scratch on the next restart: {e}"
311                    );
312                }
313            }
314            *observers.lock() = RegistrationState::Approved;
315            RegistrationState::Approved
316        }
317        Some(RegisterStatus::Rejected { reason }) => {
318            {
319                let mut snap = cfg.lock();
320                snap.registration_request_id = None;
321                snap.registration_secret = None;
322                let snapshot = snap.clone();
323                drop(snap);
324                if let Err(e) = config::save(&snapshot, config_path) {
325                    tracing::warn!(
326                        target: "studio_worker::auto_register",
327                        config_path = %config_path.display(),
328                        "failed to persist cleared request state after rejection; the stale request id stays on disk until the next successful save: {e}"
329                    );
330                }
331            }
332            let state = RegistrationState::Rejected { reason };
333            *observers.lock() = state.clone();
334            state
335        }
336    }
337}
338
339fn build_payload(
340    cfg: &SharedConfig,
341    install_id: String,
342    registration_secret_hash: String,
343) -> Result<AutoRegisterRequest> {
344    let snap = cfg.lock().clone();
345    let engine_handle = engine::build(&snap)?;
346    let capabilities = build_capabilities(&snap, &*engine_handle);
347    Ok(AutoRegisterRequest {
348        install_id,
349        registration_secret_hash,
350        capabilities,
351        user_agent: format!("studio-worker/{AGENT_VERSION}"),
352    })
353}
354
355fn new_uuid() -> String {
356    // UUIDv4-ish without pulling in the `uuid` crate: 16 random bytes
357    // formatted as 8-4-4-4-12.
358    let bytes: [u8; 16] = rand_bytes::<16>();
359    let hex: String = bytes.iter().map(|b| format!("{b:02x}")).collect();
360    format!(
361        "{}-{}-{}-{}-{}",
362        &hex[0..8],
363        &hex[8..12],
364        &hex[12..16],
365        &hex[16..20],
366        &hex[20..32]
367    )
368}
369
370fn new_secret_hex() -> String {
371    // 32 bytes of randomness = 64 hex chars (256 bits of entropy).
372    let bytes: [u8; 32] = rand_bytes::<32>();
373    bytes.iter().map(|b| format!("{b:02x}")).collect()
374}
375
376fn sha256_hex(input: &str) -> String {
377    let mut hasher = Sha256::new();
378    hasher.update(input.as_bytes());
379    let digest = hasher.finalize();
380    digest.iter().map(|b| format!("{b:02x}")).collect()
381}
382
383/// Fill `N` bytes from the OS cryptographically-secure RNG via the
384/// `getrandom` crate (`getrandom(2)` / `/dev/urandom` on Linux,
385/// `getentropy` on macOS, `BCryptGenRandom` on Windows).  Used for
386/// the install id and the registration secret, both of which must be
387/// unpredictable: an attacker who could guess the secret could claim
388/// another box's pending registration.
389///
390/// Panics if the OS entropy source is unavailable.  That only happens
391/// on a fundamentally broken platform, and failing loudly (the panic
392/// is captured by Sentry) is the right call — minting a guessable
393/// secret from a timestamp would be worse than a clean crash.
394///
395/// This used to hand-roll a `/dev/urandom` read on unix and silently
396/// fall back to a SHA-256-mixed timestamp on Windows (and on any I/O
397/// error), which left the Windows secret predictable.  `getrandom` is
398/// a tiny syscall wrapper already in the dep tree, so the whole
399/// per-OS dance collapses into one secure, testable path.
400fn rand_bytes<const N: usize>() -> [u8; N] {
401    let mut buf = [0u8; N];
402    getrandom::fill(&mut buf).expect("OS entropy source (getrandom) unavailable");
403    buf
404}
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409
410    #[test]
411    fn new_uuid_has_expected_shape() {
412        let id = new_uuid();
413        let parts: Vec<&str> = id.split('-').collect();
414        assert_eq!(parts.len(), 5);
415        assert_eq!(parts[0].len(), 8);
416        assert_eq!(parts[1].len(), 4);
417        assert_eq!(parts[2].len(), 4);
418        assert_eq!(parts[3].len(), 4);
419        assert_eq!(parts[4].len(), 12);
420        assert!(id.chars().all(|c| c.is_ascii_hexdigit() || c == '-'));
421    }
422
423    #[test]
424    fn new_uuid_is_unique() {
425        let a = new_uuid();
426        let b = new_uuid();
427        assert_ne!(a, b);
428    }
429
430    #[test]
431    fn new_secret_hex_is_64_chars() {
432        let s = new_secret_hex();
433        assert_eq!(s.len(), 64);
434        assert!(s.chars().all(|c| c.is_ascii_hexdigit()));
435    }
436
437    #[test]
438    fn sha256_hex_is_deterministic() {
439        assert_eq!(sha256_hex("abc"), sha256_hex("abc"));
440        assert_ne!(sha256_hex("abc"), sha256_hex("abd"));
441        assert_eq!(sha256_hex("").len(), 64);
442    }
443
444    // ---------------------------------------------------------------
445    // Entropy primitive.  `rand_bytes` is the single source for the
446    // install id + registration secret on every platform, so these
447    // also cover the formerly-untested Windows path (which used to
448    // route through a predictable timestamp fallback).
449    // ---------------------------------------------------------------
450
451    #[test]
452    fn rand_bytes_are_distinct_across_many_calls() {
453        use std::collections::HashSet;
454        let mut seen = HashSet::new();
455        for _ in 0..2_000 {
456            assert!(
457                seen.insert(rand_bytes::<32>()),
458                "rand_bytes produced a duplicate 32-byte value"
459            );
460        }
461    }
462
463    #[test]
464    fn rand_bytes_cover_every_bit_position() {
465        // OR + AND across many samples: a stuck or constant source
466        // would leave a bit position never set (an OR-zero) or never
467        // cleared (an AND-one).  An OS CSPRNG flips every one of the
468        // 256 bits within a handful of samples.
469        let mut ever_set = [0u8; 32];
470        let mut ever_clear = [0xffu8; 32];
471        for _ in 0..256 {
472            let b = rand_bytes::<32>();
473            for i in 0..32 {
474                ever_set[i] |= b[i];
475                ever_clear[i] &= b[i];
476            }
477        }
478        assert_eq!(ever_set, [0xffu8; 32], "a bit position was never set");
479        assert_eq!(ever_clear, [0u8; 32], "a bit position was never cleared");
480    }
481}