Skip to main content

wire/
daemon_supervisor.rs

1//! `wire daemon --all-sessions` — multi-session supervisor.
2//!
3//! ## Why
4//!
5//! honey-pine's 2026-06-01 dogfood (#162) surfaced a launchd-vs-session
6//! isolation gap: the `sh.slancha.wire.daemon` launchd unit invokes
7//! `wire daemon --interval 5` with **no cwd context**. With WIRE_HOME
8//! unset, the daemon resolves to the *default* session WIRE_HOME and
9//! silently skips every other initialized session. Operators with
10//! multiple per-project sessions (slancha-mesh, wire, etc.) saw their
11//! shell `wire status` report `running:false` even with the launchd
12//! daemon perfectly alive — same daemon, different state tree.
13//!
14//! Her working remedy was `launchctl bootout` + `nohup wire daemon`
15//! from the project cwd. That works for one session but doesn't scale
16//! to N. The architectural fix is a supervisor that owns the
17//! multi-session orchestration: one supervisor process per launchd
18//! unit, N child `wire daemon --session <name>` processes — each with
19//! its own pinned `WIRE_HOME` and its own pidfile under that session's
20//! state dir. `wire status` from any cwd then sees its session's child
21//! pid and reports truthfully.
22//!
23//! ## Model
24//!
25//! - **Fork-exec, not threads.** Each session's daemon needs its own
26//!   `WIRE_HOME`. We set it via the child process env so the daemon
27//!   code path stays unchanged. Threads would mean global mutable
28//!   `WIRE_HOME` and cross-session races.
29//! - **Idempotent spawn.** Before spawning a child for session S,
30//!   check `daemon_singleton_holder()` on that session's home. If a
31//!   live daemon already exists (operator ran `wire daemon` directly
32//!   in S's cwd, or supervisor restarted and the old child is still
33//!   alive), leave it alone.
34//! - **Reap via polling, not SIGCHLD.** macOS launchd-supervised
35//!   processes already get SIGCHLD overhead; `try_wait` polling on a
36//!   short interval is simpler and bug-free across platforms.
37//! - **Backoff on rapid failure.** A child that exits within 10s of
38//!   spawn doubles its respawn delay (1s → 60s cap). Prevents a broken
39//!   session (corrupt key, missing relay) from fork-bombing.
40//! - **Don't exit on zero sessions.** Sleep and re-poll the registry —
41//!   new sessions get picked up without supervisor restart.
42//! - **Adopt orphaned children on supervisor restart.** When launchd
43//!   relaunches the supervisor, the previous supervisor's children
44//!   keep running (correct: they're still syncing). New supervisor
45//!   sees their pidfiles, skips re-spawning, and lets them keep going
46//!   until their next natural exit (then it spawns a fresh child).
47//!
48//! ## Invariants
49//!
50//! - One supervisor per launchd unit per machine. Singleton guard on
51//!   `sessions_root()/supervisor.pid` (separate from per-session
52//!   daemon pidfiles).
53//! - Child env contains exactly one wire-relevant variable:
54//!   `WIRE_HOME=<session-home>`. Any other inherited WIRE_* vars are
55//!   stripped so the operator's shell config doesn't leak in.
56//! - Per-session daemon code is *unchanged* — supervisor is a pure
57//!   orchestrator.
58
59use std::collections::HashMap;
60use std::path::{Path, PathBuf};
61use std::process::{Child, Command};
62use std::time::{Duration, Instant, SystemTime};
63
64use anyhow::{Context, Result};
65use serde_json::json;
66
67/// How often the supervisor re-reads the session registry. Tradeoff: a
68/// new session created at `wire session new` waits up to this many
69/// seconds before its daemon comes up. 10s strikes a balance — fast
70/// enough that operators don't notice, slow enough that registry
71/// fork-execs don't dominate.
72const REGISTRY_POLL_SECS: u64 = 10;
73
74/// Initial respawn delay after a child exits unexpectedly. Doubles on
75/// each rapid failure (exit within `RAPID_FAIL_WINDOW`) up to
76/// `MAX_BACKOFF`.
77const INITIAL_BACKOFF: Duration = Duration::from_secs(1);
78const MAX_BACKOFF: Duration = Duration::from_secs(60);
79const RAPID_FAIL_WINDOW: Duration = Duration::from_secs(10);
80
81/// Default idle cutoff for registry-unbound sessions. `list_sessions()`
82/// enumerates *every* session home ever minted on the machine — and
83/// because each Claude tab / `wire session new` mints a fresh persona
84/// home, a long-lived box accumulates hundreds (honey-pine's had 147).
85/// Spawning one daemon per home turns `--all-sessions` into a fork
86/// storm. A session is kept regardless of age if it has a registry cwd
87/// binding (operator deliberately bound it); an *unbound* session is
88/// only kept if it has been active within this window. Override via
89/// `WIRE_ALL_SESSIONS_MAX_IDLE_DAYS` (0 disables the filter → legacy
90/// spawn-for-all behavior).
91const DEFAULT_MAX_IDLE_DAYS: u64 = 7;
92
93/// Parse the idle cutoff. `None` raw → default; a `0` value → `None`
94/// (no filter, spawn for every session); any other integer → that many
95/// days; unparseable → default. Pure, so it's unit-testable without
96/// mutating process env.
97fn parse_max_idle(raw: Option<&str>) -> Option<Duration> {
98    match raw {
99        Some(v) => {
100            let days: u64 = v.trim().parse().unwrap_or(DEFAULT_MAX_IDLE_DAYS);
101            (days != 0).then(|| Duration::from_secs(days * 86_400))
102        }
103        None => Some(Duration::from_secs(DEFAULT_MAX_IDLE_DAYS * 86_400)),
104    }
105}
106
107/// Read the idle cutoff from the environment. `None` means "no idle
108/// filter" (spawn a daemon for every session — pre-fix behavior),
109/// selected by setting `WIRE_ALL_SESSIONS_MAX_IDLE_DAYS=0`.
110fn max_idle_from_env() -> Option<Duration> {
111    parse_max_idle(
112        std::env::var("WIRE_ALL_SESSIONS_MAX_IDLE_DAYS")
113            .ok()
114            .as_deref(),
115    )
116}
117
118/// Newest mtime among a session home's activity files — the
119/// supervisor's "last actually *synced*" signal. These live under the
120/// session's `state/wire/` subtree (same root the per-session daemon
121/// and `existing_daemon_for_session` use), NOT the home root.
122/// `last_sync.json` is rewritten on every successful daemon relay
123/// cycle; the cursors move on inbox/reactor activity. Returns `None`
124/// for a home that has never synced (a husk).
125///
126/// Deliberately excludes `daemon.pid`: it's written on *spawn*, so
127/// counting it would make eligibility self-perpetuating — the
128/// supervisor spawns a daemon, the pidfile refreshes, and the session
129/// would never age out even if it never actually syncs anything.
130fn fs_last_active(home: &Path) -> Option<SystemTime> {
131    let state = home.join("state").join("wire");
132    ["last_sync.json", "notify.cursor", "reactor.cursor"]
133        .iter()
134        .filter_map(|f| std::fs::metadata(state.join(f)).ok())
135        .filter_map(|m| m.modified().ok())
136        .max()
137}
138
139/// Filter `list_sessions()` down to the sessions the supervisor should
140/// own a daemon for. A session is eligible iff it has a registry cwd
141/// binding OR it was active within `max_idle`. `max_idle == None`
142/// disables the filter (every session eligible). Pure: the activity
143/// probe is injected so this is unit-testable without touching disk.
144fn supervisor_eligible<F>(
145    sessions: Vec<crate::session::SessionInfo>,
146    max_idle: Option<Duration>,
147    now: SystemTime,
148    last_active: F,
149) -> Vec<crate::session::SessionInfo>
150where
151    F: Fn(&Path) -> Option<SystemTime>,
152{
153    let Some(max_idle) = max_idle else {
154        return sessions;
155    };
156    sessions
157        .into_iter()
158        .filter(|s| {
159            if s.cwd.is_some() {
160                return true;
161            }
162            match last_active(&s.home_dir) {
163                // `duration_since` errors when the file mtime is in the
164                // future (clock skew) — treat that as "active now".
165                Some(t) => now.duration_since(t).map(|d| d <= max_idle).unwrap_or(true),
166                None => false,
167            }
168        })
169        .collect()
170}
171
172// ---- husk reaper (the 175-dir by-key accumulation fix) ----
173
174/// Default age below which a husk is left alone, in hours. Generous on
175/// purpose: a brand-new agent session may mint its by-key home minutes
176/// before it first inits/sends. Two days is far past any plausible
177/// "about to become real" window while still draining the backlog
178/// (honey-pine regrew 9 husks in one minute; 175 over two weeks).
179const DEFAULT_HUSK_REAP_MAX_AGE_HOURS: u64 = 48;
180
181/// How often the supervisor sweeps for husks. The reap is cheap (one
182/// readdir + a few stats per entry) but there's no reason to run it on
183/// every 10s registry poll — husks age in days, not seconds.
184const HUSK_REAP_INTERVAL: Duration = Duration::from_secs(3600);
185
186/// Parse the husk reap cutoff. `None` raw → default; a `0` value →
187/// `None` (reaper disabled); any other integer → that many hours;
188/// unparseable → default. Pure, mirrors `parse_max_idle`.
189fn parse_husk_reap_max_age(raw: Option<&str>) -> Option<Duration> {
190    match raw {
191        Some(v) => {
192            let hours: u64 = v.trim().parse().unwrap_or(DEFAULT_HUSK_REAP_MAX_AGE_HOURS);
193            (hours != 0).then(|| Duration::from_secs(hours * 3600))
194        }
195        None => Some(Duration::from_secs(DEFAULT_HUSK_REAP_MAX_AGE_HOURS * 3600)),
196    }
197}
198
199/// Read the husk reap cutoff from the environment.
200/// `WIRE_HUSK_REAP_MAX_AGE_HOURS=0` disables the reaper entirely.
201fn husk_reap_max_age_from_env() -> Option<Duration> {
202    parse_husk_reap_max_age(
203        std::env::var("WIRE_HUSK_REAP_MAX_AGE_HOURS")
204            .ok()
205            .as_deref(),
206    )
207}
208
209/// Delete husk session homes under `by_key_root` and return what was
210/// removed.
211///
212/// Every wire invocation inside an agent terminal mints a
213/// `sessions/by-key/<hash>/` home via session adoption (RFC-008), even
214/// for read-only commands, and nothing ever deleted them — a dev box
215/// accumulated 175 empty dirs in two weeks. The idle filter
216/// (`supervisor_eligible`) stops the daemon fork-storm but leaves the
217/// dirs. This is the complement: the filter hides, the reaper removes.
218///
219/// A dir is reaped only if ALL of these hold:
220/// - its name has the by-key shape (exactly 16 lowercase hex chars,
221///   `session_home_for_key`'s output) — named sessions are
222///   operator-created and never touched;
223/// - it holds NO identity (`config/wire/private.key` absent);
224/// - it has never synced (`fs_last_active` → None);
225/// - it is not registry-bound (`bound_names`);
226/// - no live daemon owns it (`daemon_live`, injected for testability);
227/// - it is older than `max_age` (top-dir mtime; future mtimes count as
228///   young — clock-skew never deletes).
229///
230/// Failures are per-entry best-effort (warn + continue): one undeletable
231/// dir must not stop the sweep.
232fn reap_husks<F>(
233    by_key_root: &Path,
234    max_age: Duration,
235    now: SystemTime,
236    bound_names: &std::collections::HashSet<String>,
237    daemon_live: F,
238) -> Vec<PathBuf>
239where
240    F: Fn(&Path) -> bool,
241{
242    let mut reaped = Vec::new();
243    let Ok(entries) = std::fs::read_dir(by_key_root) else {
244        return reaped; // no by-key dir yet — nothing to do
245    };
246    for entry in entries.flatten() {
247        let path = entry.path();
248        if !path.is_dir() {
249            continue;
250        }
251        let Some(name) = path.file_name().and_then(|s| s.to_str()) else {
252            continue;
253        };
254        let is_by_key_shape =
255            name.len() == 16 && name.bytes().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f'));
256        if !is_by_key_shape {
257            continue;
258        }
259        if bound_names.contains(name) {
260            continue;
261        }
262        if path
263            .join("config")
264            .join("wire")
265            .join("private.key")
266            .exists()
267        {
268            continue;
269        }
270        if fs_last_active(&path).is_some() {
271            continue;
272        }
273        if daemon_live(&path) {
274            continue;
275        }
276        let old_enough = std::fs::metadata(&path)
277            .and_then(|m| m.modified())
278            .ok()
279            .and_then(|m| now.duration_since(m).ok())
280            .is_some_and(|age| age >= max_age);
281        if !old_enough {
282            continue;
283        }
284        match std::fs::remove_dir_all(&path) {
285            Ok(()) => reaped.push(path),
286            Err(e) => eprintln!("supervisor: husk reap failed for {}: {e:#}", path.display()),
287        }
288    }
289    reaped
290}
291
292/// State the supervisor tracks per session it has spawned a child for.
293struct ChildState {
294    child: Child,
295    spawned_at: Instant,
296}
297
298/// Entrypoint for `wire daemon --all-sessions`. Loops forever; only
299/// returns Err on a setup error (e.g. cannot resolve sessions_root).
300pub fn run_supervisor(interval_secs: u64, as_json: bool) -> Result<()> {
301    // Supervisor singleton — one per machine. Separate pidfile from the
302    // per-session daemon pidfile so the two layers can't collide.
303    let pid_path = supervisor_pid_path()?;
304    if let Some(existing) = read_alive_supervisor_pid(&pid_path)? {
305        let msg = json!({
306            "status": "skipped",
307            "reason": "supervisor already running",
308            "holder_pid": existing,
309        });
310        if as_json {
311            println!("{msg}");
312        } else {
313            eprintln!(
314                "wire daemon --all-sessions: another supervisor is already running (pid {existing}); not starting a second one."
315            );
316        }
317        return Ok(());
318    }
319    write_supervisor_pid(&pid_path)?;
320    let _cleanup = SupervisorPidGuard {
321        path: pid_path.clone(),
322    };
323
324    if !as_json {
325        eprintln!(
326            "wire daemon --all-sessions: supervisor up. interval={interval_secs}s, registry-poll={REGISTRY_POLL_SECS}s. SIGINT to stop."
327        );
328    } else {
329        println!(
330            "{}",
331            json!({
332                "status": "supervisor_started",
333                "interval_secs": interval_secs,
334                "registry_poll_secs": REGISTRY_POLL_SECS,
335            })
336        );
337    }
338
339    // Idle cutoff for registry-unbound sessions — read once at startup
340    // (env doesn't change under a running supervisor).
341    let max_idle = max_idle_from_env();
342    eprintln!(
343        "supervisor: idle cutoff for unbound sessions = {}",
344        match max_idle {
345            Some(d) => format!("{} days", d.as_secs() / 86_400),
346            None => "disabled (spawn-for-all)".to_string(),
347        }
348    );
349
350    // Husk reap cutoff — also read once at startup.
351    let husk_max_age = husk_reap_max_age_from_env();
352    eprintln!(
353        "supervisor: husk reap cutoff = {}",
354        match husk_max_age {
355            Some(d) => format!("{} hours", d.as_secs() / 3600),
356            None => "disabled".to_string(),
357        }
358    );
359    let mut last_husk_reap: Option<Instant> = None;
360
361    let mut children: HashMap<String, ChildState> = HashMap::new();
362    // Per-session backoff that survives a child's reap → respawn → reap
363    // cycle. Distinguishes "session crashes hard repeatedly" from
364    // "child exited cleanly and we're spawning a fresh one".
365    let mut session_last_exit: HashMap<String, Instant> = HashMap::new();
366    let mut session_backoff: HashMap<String, Duration> = HashMap::new();
367
368    loop {
369        // 1. Reap any exited children. Detect rapid failures + update
370        //    per-session backoff so the next spawn waits.
371        let mut exited: Vec<String> = Vec::new();
372        for (name, state) in children.iter_mut() {
373            if let Ok(Some(status)) = state.child.try_wait() {
374                let lived = state.spawned_at.elapsed();
375                let rapid = lived < RAPID_FAIL_WINDOW;
376                eprintln!(
377                    "supervisor: child '{name}' exited (status={status:?}, lived={}s, rapid={rapid})",
378                    lived.as_secs()
379                );
380                let next_backoff = if rapid {
381                    let prev = session_backoff
382                        .get(name)
383                        .copied()
384                        .unwrap_or(INITIAL_BACKOFF);
385                    (prev * 2).min(MAX_BACKOFF)
386                } else {
387                    INITIAL_BACKOFF
388                };
389                session_backoff.insert(name.clone(), next_backoff);
390                session_last_exit.insert(name.clone(), Instant::now());
391                exited.push(name.clone());
392            }
393        }
394        for n in exited {
395            children.remove(&n);
396        }
397
398        // 2. Read registry, identify wanted sessions. Filter out
399        //    registry-unbound sessions that have been idle past the
400        //    cutoff so the supervisor doesn't fan out a daemon per
401        //    every ephemeral persona home (the 147-home fork storm).
402        let all_sessions = crate::session::list_sessions().unwrap_or_default();
403        let total_sessions = all_sessions.len();
404        let wanted: Vec<crate::session::SessionInfo> =
405            supervisor_eligible(all_sessions, max_idle, SystemTime::now(), fs_last_active);
406        if wanted.len() != total_sessions {
407            eprintln!(
408                "supervisor: {} of {} sessions eligible (skipped {} registry-unbound + idle > cutoff)",
409                wanted.len(),
410                total_sessions,
411                total_sessions - wanted.len()
412            );
413        }
414
415        // 2b. Hourly husk sweep: delete by-key homes that were minted
416        //     by session adoption but never grew an identity or synced.
417        //     Runs on the first loop iteration, then once per
418        //     HUSK_REAP_INTERVAL.
419        if let Some(max_age) = husk_max_age
420            && last_husk_reap.is_none_or(|t| t.elapsed() >= HUSK_REAP_INTERVAL)
421        {
422            last_husk_reap = Some(Instant::now());
423            let bound: std::collections::HashSet<String> = crate::session::read_registry()
424                .unwrap_or_default()
425                .by_cwd
426                .values()
427                .cloned()
428                .collect();
429            if let Ok(root) = crate::session::sessions_root() {
430                let reaped = reap_husks(
431                    &root.join("by-key"),
432                    max_age,
433                    SystemTime::now(),
434                    &bound,
435                    // On a liveness-probe error assume live — never
436                    // delete a home we couldn't safely inspect.
437                    |home| existing_daemon_for_session(home).unwrap_or(true),
438                );
439                if !reaped.is_empty() {
440                    eprintln!(
441                        "supervisor: reaped {} husk session home(s): {}",
442                        reaped.len(),
443                        reaped
444                            .iter()
445                            .filter_map(|p| p.file_name().and_then(|s| s.to_str()))
446                            .collect::<Vec<_>>()
447                            .join(", ")
448                    );
449                }
450            }
451        }
452
453        // 3. Kill children whose session has been removed from the
454        //    registry since last poll. (Operator ran `wire session
455        //    forget` or similar.)
456        let wanted_names: std::collections::HashSet<String> =
457            wanted.iter().map(|s| s.name.clone()).collect();
458        let to_kill: Vec<String> = children
459            .keys()
460            .filter(|n| !wanted_names.contains(n.as_str()))
461            .cloned()
462            .collect();
463        for name in to_kill {
464            if let Some(mut state) = children.remove(&name) {
465                eprintln!("supervisor: session '{name}' gone from registry; terminating its child");
466                let _ = state.child.kill();
467                let _ = state.child.wait();
468            }
469        }
470
471        // 4. Spawn missing children, respecting backoff + existing
472        //    pidfiles (operator-spawned daemons coexist).
473        for info in wanted {
474            if info.did.is_none() {
475                continue;
476            }
477            if children.contains_key(&info.name) {
478                continue;
479            }
480            // Backoff gate: if this session is in a rapid-fail loop,
481            // wait the remaining backoff before respawning.
482            if let Some(last_exit) = session_last_exit.get(&info.name) {
483                let wait = session_backoff
484                    .get(&info.name)
485                    .copied()
486                    .unwrap_or(INITIAL_BACKOFF);
487                if last_exit.elapsed() < wait {
488                    continue;
489                }
490            }
491            // Singleton check: an operator-spawned `wire daemon` may
492            // already own this session. Leave it alone — re-checking
493            // next poll is cheap.
494            if existing_daemon_for_session(&info.home_dir)? {
495                continue;
496            }
497            match spawn_child_for_session(&info.name, &info.home_dir, interval_secs) {
498                Ok(child) => {
499                    eprintln!(
500                        "supervisor: spawned child for session '{}' (pid {})",
501                        info.name,
502                        child.id()
503                    );
504                    children.insert(
505                        info.name.clone(),
506                        ChildState {
507                            child,
508                            spawned_at: Instant::now(),
509                        },
510                    );
511                }
512                Err(e) => {
513                    eprintln!(
514                        "supervisor: spawn failed for session '{}': {e:#}",
515                        info.name
516                    );
517                    // Treat spawn failure as a rapid failure so the
518                    // backoff curve kicks in.
519                    let prev = session_backoff
520                        .get(&info.name)
521                        .copied()
522                        .unwrap_or(INITIAL_BACKOFF);
523                    session_backoff.insert(info.name.clone(), (prev * 2).min(MAX_BACKOFF));
524                    session_last_exit.insert(info.name.clone(), Instant::now());
525                }
526            }
527        }
528
529        std::thread::sleep(Duration::from_secs(REGISTRY_POLL_SECS));
530    }
531}
532
533/// Spawn `wire daemon --interval <i>` as a child with `WIRE_HOME`
534/// pinned via env. Strips inherited WIRE_* env so the operator's
535/// shell config (test overrides like `WIRE_DAEMON_NO_SINGLETON=1`)
536/// can't leak in.
537///
538/// v0.14.2 #170 hotfix: the original implementation also passed
539/// `--session <character-name>` as a belt-and-suspenders check.
540/// That broke 127 of 133 sessions on a real multi-session box —
541/// `cmd_daemon`'s `--session` handler calls
542/// `session::session_dir(name)` which resolves
543/// `sessions_root/<name>`, correct for v0.6 top-level layout but
544/// WRONG for v0.13's `by-key/<hash>` layout where the character
545/// name is *derived* from the card DID, not the directory name.
546/// Children bailed → supervisor fork-bombed (10s poll × 60s
547/// backoff × 127 failing sessions). WIRE_HOME env alone is the
548/// correct contract: every daemon code path flows through
549/// `state_dir()` / `config_dir()` which honor it. No second
550/// source of truth.
551fn spawn_child_for_session(
552    name: &str,
553    home_dir: &std::path::Path,
554    interval_secs: u64,
555) -> Result<Child> {
556    let exe = std::env::current_exe().context("resolving current exe for child fork")?;
557    let mut cmd = Command::new(&exe);
558    cmd.args(["daemon", "--interval", &interval_secs.to_string()]);
559    // Strip WIRE_* env so operator shell-vars don't leak into the
560    // child. Then pin WIRE_HOME exactly.
561    let leaks: Vec<String> = std::env::vars()
562        .filter(|(k, _)| k.starts_with("WIRE_"))
563        .map(|(k, _)| k)
564        .collect();
565    for k in leaks {
566        cmd.env_remove(&k);
567    }
568    cmd.env("WIRE_HOME", home_dir);
569    // Children inherit stdout/stderr → land in the launchd log file
570    // (StandardOutPath in the plist). Operators see "supervisor:
571    // spawned ..." lines interleaved with each session's daemon log.
572    cmd.spawn().with_context(|| {
573        format!(
574            "fork-exec `wire daemon` for session '{name}' (binary {} WIRE_HOME={})",
575            exe.display(),
576            home_dir.display()
577        )
578    })
579}
580
581/// True iff this session's `daemon.pid` names a live process. Used by
582/// the supervisor to coexist with operator-spawned `wire daemon`
583/// invocations: if the operator already started one in a tmux pane,
584/// we skip the spawn and let theirs own the cursor.
585fn existing_daemon_for_session(home_dir: &std::path::Path) -> Result<bool> {
586    let pid_path = home_dir.join("state").join("wire").join("daemon.pid");
587    if !pid_path.exists() {
588        return Ok(false);
589    }
590    let body = match std::fs::read_to_string(&pid_path) {
591        Ok(b) => b,
592        Err(_) => return Ok(false),
593    };
594    // Pidfile is either JSON `{"pid": <n>, ...}` (v0.5.11+) or a bare
595    // integer (legacy). Try JSON+pid-field first; if that yields
596    // None (parse failed OR JSON had no pid field, e.g. a bare
597    // integer body parses as JSON number with no `.pid`), fall
598    // through to the bare-int path.
599    let pid = serde_json::from_str::<serde_json::Value>(&body)
600        .ok()
601        .and_then(|v| v.get("pid").and_then(serde_json::Value::as_u64))
602        .or_else(|| body.trim().parse::<u64>().ok());
603    Ok(pid
604        .map(|p| crate::ensure_up::pid_is_alive(p as u32))
605        .unwrap_or(false))
606}
607
608/// Read-only snapshot of the supervisor's current topology — supervisor
609/// liveness + per-session daemon liveness + orphan pids the supervisor
610/// is not currently managing. Used by `wire supervisor` (the CLI
611/// counterpart to single-session `wire status`) so operators can ask
612/// "what is the multi-session supervisor doing?" in one command
613/// instead of cross-referencing `pgrep` against per-session pidfiles
614/// by hand.
615#[derive(Debug, Clone, serde::Serialize)]
616pub struct SupervisorState {
617    /// Pid the `supervisor.pid` file names; None if file missing.
618    pub supervisor_pid: Option<u32>,
619    /// True iff that pid is currently a live process.
620    pub supervisor_alive: bool,
621    /// Per-session liveness across every initialized session, in
622    /// `list_sessions()` order.
623    pub sessions: Vec<SupervisedSession>,
624    /// `wire daemon` pids found via cmdline-scan that are NOT mapped
625    /// to any session's pidfile AND are not the supervisor itself.
626    /// Could be legacy operator-spawned daemons, leftover children
627    /// from a crashed prior supervisor, or daemons serving the
628    /// default WIRE_HOME (no `--all-sessions`). Operators see them
629    /// here so they can decide whether to kill.
630    pub unmanaged_pids: Vec<u32>,
631    /// v0.14.2: session names whose live daemon's recorded
632    /// `pidfile.version` is older than this CLI's own
633    /// `CARGO_PKG_VERSION`. The supervisor's existing-pidfile check
634    /// skips alive daemons regardless of their binary version, so
635    /// stale-binary daemons persist until they exit. Surfaced for
636    /// operator visibility — they can `pkill -TERM <pid>` or use a
637    /// future `wire upgrade --refresh-stale-children` to force the
638    /// supervisor to respawn them on the current binary.
639    pub stale_binary_sessions: Vec<String>,
640}
641
642/// One session as seen by the supervisor.
643#[derive(Debug, Clone, serde::Serialize)]
644pub struct SupervisedSession {
645    /// Session name (`info.name` from `session::list_sessions`).
646    pub name: String,
647    /// `home_dir` filesystem path.
648    pub home_dir: String,
649    /// Pid the session's `daemon.pid` records; None if file missing.
650    pub daemon_pid: Option<u32>,
651    /// True iff that pid is currently a live process.
652    pub daemon_alive: bool,
653    /// Seconds since the session's daemon last completed a sync
654    /// cycle (read from `last_sync.json`); None if never recorded.
655    pub last_sync_age_seconds: Option<u64>,
656    /// Version string the running daemon recorded when it wrote its
657    /// pidfile (`PidRecord::Json.version`). None when the pidfile is
658    /// missing or corrupt. Surfaced so operators can spot version drift across
659    /// the supervisor fleet — the supervisor's pre-spawn
660    /// existing-pidfile check skips alive daemons regardless of
661    /// their binary version, so a daemon spawned on v0.13.x and
662    /// still running after the supervisor was bounced to v0.14.x
663    /// keeps the old binary in memory until it exits.
664    #[serde(skip_serializing_if = "Option::is_none")]
665    pub daemon_version: Option<String>,
666}
667
668/// Build a `SupervisorState` snapshot. Pure read; no fork / no
669/// pidfile mutation. Best-effort on every component (filesystem
670/// errors yield None / empty rather than failing the whole call).
671pub fn read_supervisor_state() -> Result<SupervisorState> {
672    let pid_path = supervisor_pid_path()?;
673    let supervisor_pid = read_supervisor_pid(&pid_path);
674    let supervisor_alive = supervisor_pid
675        .map(crate::ensure_up::pid_is_alive)
676        .unwrap_or(false);
677
678    // Per-session liveness — walk list_sessions, read each home's
679    // pidfile + last_sync.
680    let sessions: Vec<SupervisedSession> = crate::session::list_sessions()
681        .unwrap_or_default()
682        .into_iter()
683        .map(|info| {
684            let daemon_pid = crate::session::session_daemon_pid(&info.home_dir);
685            let daemon_alive = daemon_pid
686                .map(crate::ensure_up::pid_is_alive)
687                .unwrap_or(false);
688            // last_sync.json lives under <home>/state/wire/last_sync.json.
689            let last_sync_age_seconds = read_session_last_sync_age(&info.home_dir);
690            // v0.14.2: read the daemon-recorded version from the JSON
691            // pidfile. Legacy bare-integer pidfiles return None
692            // (can't surface a version we don't have).
693            let daemon_version = read_session_pidfile_version(&info.home_dir);
694            SupervisedSession {
695                name: info.name,
696                home_dir: info.home_dir.to_string_lossy().into_owned(),
697                daemon_pid,
698                daemon_alive,
699                last_sync_age_seconds,
700                daemon_version,
701            }
702        })
703        .collect();
704
705    // Unmanaged pids: every `wire daemon` cmdline scan hit that isn't
706    // (a) the supervisor itself, (b) any session's pidfile pid.
707    let all_daemon_pids: std::collections::HashSet<u32> =
708        crate::platform::find_processes_by_cmdline("wire daemon")
709            .into_iter()
710            .collect();
711    let known_session_pids: std::collections::HashSet<u32> = sessions
712        .iter()
713        .filter_map(|s| if s.daemon_alive { s.daemon_pid } else { None })
714        .collect();
715    let mut unmanaged_pids: Vec<u32> = all_daemon_pids
716        .into_iter()
717        .filter(|p| Some(*p) != supervisor_pid && !known_session_pids.contains(p))
718        .collect();
719    unmanaged_pids.sort_unstable();
720
721    // v0.14.2: derive the stale-binary set. Compare each live
722    // daemon's recorded version against the running CLI's version.
723    // "Stale" iff alive + has a recorded version + that version is
724    // strictly less than ours by dotted-integer compare (so 0.10.0 >
725    // 0.9.0). Unparseable strings are conservatively "not stale" — a
726    // pre-release suffix like 0.14.2-rc.1 stays unflagged rather than
727    // false-positive against 0.14.2.
728    let our_version = env!("CARGO_PKG_VERSION");
729    let stale_binary_sessions: Vec<String> = sessions
730        .iter()
731        .filter(|s| {
732            s.daemon_alive
733                && s.daemon_version
734                    .as_deref()
735                    .map(|v| version_lt(v, our_version))
736                    .unwrap_or(false)
737        })
738        .map(|s| s.name.clone())
739        .collect();
740
741    Ok(SupervisorState {
742        supervisor_pid,
743        supervisor_alive,
744        sessions,
745        unmanaged_pids,
746        stale_binary_sessions,
747    })
748}
749
750/// Compare two dotted-integer version strings: `a < b`?
751///
752/// Splits on `.`, parses each segment as `u32`, compares
753/// element-wise (left-pad shorter with 0 so `0.14` < `0.14.1` is
754/// `true`). Anything that fails to parse as `u32` makes the whole
755/// compare return `false` — we'd rather under-flag a pre-release
756/// suffix like `0.14.2-rc.1` than false-positive against a stable
757/// peer of the same major.minor.patch.
758fn version_lt(a: &str, b: &str) -> bool {
759    let parse = |s: &str| -> Option<Vec<u32>> { s.split('.').map(|p| p.parse().ok()).collect() };
760    let (Some(av), Some(bv)) = (parse(a), parse(b)) else {
761        return false;
762    };
763    let n = av.len().max(bv.len());
764    for i in 0..n {
765        let ai = av.get(i).copied().unwrap_or(0);
766        let bi = bv.get(i).copied().unwrap_or(0);
767        if ai != bi {
768            return ai < bi;
769        }
770    }
771    false
772}
773
774/// Read the daemon-recorded version string from a session's
775/// `<home>/state/wire/daemon.pid` JSON pidfile. Returns None for
776/// legacy bare-integer pidfiles (no version field) and for absent /
777/// unreadable files.
778fn read_session_pidfile_version(home_dir: &std::path::Path) -> Option<String> {
779    let pidfile = home_dir.join("state").join("wire").join("daemon.pid");
780    let body = std::fs::read_to_string(&pidfile).ok()?;
781    let v: serde_json::Value = serde_json::from_str(&body).ok()?;
782    v.get("version")
783        .and_then(serde_json::Value::as_str)
784        .map(str::to_string)
785}
786
787/// Read `supervisor.pid` without the liveness check (the snapshot
788/// builder runs the check itself, separated so an absent file is
789/// just `None` rather than an Err).
790fn read_supervisor_pid(path: &std::path::Path) -> Option<u32> {
791    if !path.exists() {
792        return None;
793    }
794    let body = std::fs::read_to_string(path).ok()?;
795    body.trim().parse::<u32>().ok()
796}
797
798/// Read `<home>/state/wire/last_sync.json`'s timestamp and return
799/// "seconds since now". None on absent / unreadable / unparseable.
800fn read_session_last_sync_age(home_dir: &std::path::Path) -> Option<u64> {
801    let path = home_dir.join("state").join("wire").join("last_sync.json");
802    let body = std::fs::read_to_string(&path).ok()?;
803    let v: serde_json::Value = serde_json::from_str(&body).ok()?;
804    let ts = v.get("ts").and_then(serde_json::Value::as_str)?;
805    let parsed =
806        time::OffsetDateTime::parse(ts, &time::format_description::well_known::Rfc3339).ok()?;
807    let age = (time::OffsetDateTime::now_utc() - parsed).whole_seconds();
808    if age < 0 {
809        // Clock skew: timestamp is in the future. Treat as fresh.
810        Some(0)
811    } else {
812        Some(age as u64)
813    }
814}
815
816fn supervisor_pid_path() -> Result<PathBuf> {
817    let root = crate::session::sessions_root()
818        .context("resolving sessions_root for supervisor pidfile")?;
819    std::fs::create_dir_all(&root).with_context(|| format!("creating {root:?}"))?;
820    Ok(root.join("supervisor.pid"))
821}
822
823fn read_alive_supervisor_pid(path: &std::path::Path) -> Result<Option<u32>> {
824    if !path.exists() {
825        return Ok(None);
826    }
827    let body = std::fs::read_to_string(path).ok();
828    let pid = body.as_deref().and_then(|s| s.trim().parse::<u32>().ok());
829    match pid {
830        Some(p) if crate::ensure_up::pid_is_alive(p) => Ok(Some(p)),
831        _ => Ok(None),
832    }
833}
834
835fn write_supervisor_pid(path: &std::path::Path) -> Result<()> {
836    let pid = std::process::id();
837    std::fs::write(path, pid.to_string())
838        .with_context(|| format!("writing supervisor pidfile {path:?}"))?;
839    Ok(())
840}
841
842struct SupervisorPidGuard {
843    path: PathBuf,
844}
845
846impl Drop for SupervisorPidGuard {
847    fn drop(&mut self) {
848        // Only remove if it still names us — same pattern as
849        // DaemonPidGuard in ensure_up.rs.
850        if let Ok(body) = std::fs::read_to_string(&self.path)
851            && let Ok(pid) = body.trim().parse::<u32>()
852            && pid == std::process::id()
853        {
854            let _ = std::fs::remove_file(&self.path);
855        }
856    }
857}
858
859#[cfg(test)]
860mod tests {
861    use super::*;
862    use tempfile::tempdir;
863
864    #[test]
865    fn version_lt_dotted_integer_compare() {
866        // Lexical string-compare footgun cases — these must come out right.
867        assert!(version_lt("0.9.0", "0.10.0"));
868        assert!(version_lt("0.13.5", "0.14.1"));
869        assert!(version_lt("0.14.0", "0.14.1"));
870        // Equal / greater → not stale.
871        assert!(!version_lt("0.14.1", "0.14.1"));
872        assert!(!version_lt("0.14.2", "0.14.1"));
873        // Shorter version pads with zero.
874        assert!(version_lt("0.14", "0.14.1"));
875        assert!(!version_lt("0.14.1", "0.14"));
876        // Unparseable (pre-release suffix, garbage) is conservatively NOT-stale
877        // — under-flagging beats false-positive on `0.14.2-rc.1` vs `0.14.2`.
878        assert!(!version_lt("0.14.2-rc.1", "0.14.2"));
879        assert!(!version_lt("garbage", "0.14.1"));
880        assert!(!version_lt("0.14.1", "garbage"));
881    }
882
883    #[test]
884    fn read_alive_supervisor_pid_returns_none_when_missing() {
885        let tmp = tempdir().unwrap();
886        let p = tmp.path().join("supervisor.pid");
887        assert_eq!(read_alive_supervisor_pid(&p).unwrap(), None);
888    }
889
890    #[test]
891    fn read_alive_supervisor_pid_returns_none_for_dead_pid() {
892        let tmp = tempdir().unwrap();
893        let p = tmp.path().join("supervisor.pid");
894        // pid 999999 is almost certainly not running.
895        std::fs::write(&p, "999999").unwrap();
896        assert_eq!(read_alive_supervisor_pid(&p).unwrap(), None);
897    }
898
899    #[test]
900    fn read_alive_supervisor_pid_returns_pid_for_self() {
901        let tmp = tempdir().unwrap();
902        let p = tmp.path().join("supervisor.pid");
903        let our_pid = std::process::id();
904        std::fs::write(&p, our_pid.to_string()).unwrap();
905        assert_eq!(read_alive_supervisor_pid(&p).unwrap(), Some(our_pid));
906    }
907
908    #[test]
909    fn pid_guard_only_removes_when_pid_still_matches() {
910        let tmp = tempdir().unwrap();
911        let p = tmp.path().join("supervisor.pid");
912        // Write a foreign pid into the file, then drop a guard for
913        // our pid. The guard should leave the foreign pidfile alone.
914        std::fs::write(&p, "12345").unwrap();
915        {
916            let _g = SupervisorPidGuard { path: p.clone() };
917        }
918        assert!(p.exists(), "guard removed a pidfile that didn't name us");
919    }
920
921    #[test]
922    fn pid_guard_removes_when_pid_matches() {
923        let tmp = tempdir().unwrap();
924        let p = tmp.path().join("supervisor.pid");
925        let our_pid = std::process::id();
926        std::fs::write(&p, our_pid.to_string()).unwrap();
927        {
928            let _g = SupervisorPidGuard { path: p.clone() };
929        }
930        assert!(!p.exists(), "guard left our own pidfile behind");
931    }
932
933    #[test]
934    fn existing_daemon_for_session_returns_false_when_pidfile_missing() {
935        let tmp = tempdir().unwrap();
936        // home_dir has no state/wire/daemon.pid
937        assert!(!existing_daemon_for_session(tmp.path()).unwrap());
938    }
939
940    #[test]
941    fn existing_daemon_for_session_returns_false_for_dead_pid() {
942        let tmp = tempdir().unwrap();
943        let state = tmp.path().join("state").join("wire");
944        std::fs::create_dir_all(&state).unwrap();
945        std::fs::write(state.join("daemon.pid"), "999999").unwrap();
946        assert!(!existing_daemon_for_session(tmp.path()).unwrap());
947    }
948
949    #[test]
950    fn existing_daemon_for_session_returns_true_for_self_pid() {
951        let tmp = tempdir().unwrap();
952        let state = tmp.path().join("state").join("wire");
953        std::fs::create_dir_all(&state).unwrap();
954        std::fs::write(state.join("daemon.pid"), std::process::id().to_string()).unwrap();
955        assert!(existing_daemon_for_session(tmp.path()).unwrap());
956    }
957
958    // ---- supervisor eligibility filter (the 147-home fork-storm fix) ----
959
960    fn mk_session(name: &str, cwd: Option<&str>) -> crate::session::SessionInfo {
961        crate::session::SessionInfo {
962            name: name.to_string(),
963            cwd: cwd.map(String::from),
964            home_dir: PathBuf::from(format!("/sessions/{name}")),
965            did: None,
966            handle: None,
967            daemon_running: false,
968            character: None,
969        }
970    }
971
972    #[test]
973    fn parse_max_idle_default_when_unset() {
974        assert_eq!(
975            parse_max_idle(None),
976            Some(Duration::from_secs(DEFAULT_MAX_IDLE_DAYS * 86_400))
977        );
978    }
979
980    #[test]
981    fn parse_max_idle_zero_disables_filter() {
982        assert_eq!(parse_max_idle(Some("0")), None);
983    }
984
985    #[test]
986    fn parse_max_idle_explicit_days() {
987        assert_eq!(
988            parse_max_idle(Some("3")),
989            Some(Duration::from_secs(3 * 86_400))
990        );
991        assert_eq!(
992            parse_max_idle(Some("  14 ")),
993            Some(Duration::from_secs(14 * 86_400))
994        );
995    }
996
997    #[test]
998    fn parse_max_idle_garbage_falls_back_to_default() {
999        assert_eq!(
1000            parse_max_idle(Some("not-a-number")),
1001            Some(Duration::from_secs(DEFAULT_MAX_IDLE_DAYS * 86_400))
1002        );
1003    }
1004
1005    #[test]
1006    fn eligible_keeps_cwd_bound_even_when_ancient() {
1007        // A registry-bound session is kept no matter how idle — the
1008        // operator deliberately attached it to a project dir. (This is
1009        // the real-world case: the cwd-bound `wire`/`slancha-*` sessions
1010        // were the *oldest* on the box, yet must survive.)
1011        let now = SystemTime::now();
1012        let ancient = now - Duration::from_secs(365 * 86_400);
1013        let sessions = vec![mk_session("wire", Some("/Users/p/Source/wire"))];
1014        let out = supervisor_eligible(sessions, Some(Duration::from_secs(7 * 86_400)), now, |_| {
1015            Some(ancient)
1016        });
1017        assert_eq!(out.len(), 1);
1018        assert_eq!(out[0].name, "wire");
1019    }
1020
1021    #[test]
1022    fn eligible_keeps_unbound_recent_drops_unbound_idle() {
1023        // The live-but-unbound persona sessions (each Claude tab) are
1024        // recent → kept. The abandoned ones are idle → dropped.
1025        let now = SystemTime::now();
1026        let recent = now - Duration::from_secs(2 * 86_400);
1027        let stale = now - Duration::from_secs(30 * 86_400);
1028        let sessions = vec![
1029            mk_session("rosy-rook", None),    // live tab
1030            mk_session("agate-nimbus", None), // abandoned
1031        ];
1032        let out = supervisor_eligible(
1033            sessions,
1034            Some(Duration::from_secs(7 * 86_400)),
1035            now,
1036            |home| {
1037                if home.ends_with("rosy-rook") {
1038                    Some(recent)
1039                } else {
1040                    Some(stale)
1041                }
1042            },
1043        );
1044        let names: Vec<_> = out.iter().map(|s| s.name.as_str()).collect();
1045        assert_eq!(names, vec!["rosy-rook"]);
1046    }
1047
1048    #[test]
1049    fn eligible_drops_unbound_with_no_activity_signal() {
1050        // A never-synced husk (no activity files at all) and no cwd →
1051        // dropped: nothing says it's a session anyone is using.
1052        let now = SystemTime::now();
1053        let sessions = vec![mk_session("husk", None)];
1054        let out = supervisor_eligible(sessions, Some(Duration::from_secs(7 * 86_400)), now, |_| {
1055            None
1056        });
1057        assert!(out.is_empty());
1058    }
1059
1060    #[test]
1061    fn eligible_none_cutoff_keeps_everything() {
1062        // Override = 0 (max_idle None) restores legacy spawn-for-all.
1063        let now = SystemTime::now();
1064        let ancient = now - Duration::from_secs(999 * 86_400);
1065        let sessions = vec![mk_session("husk", None), mk_session("agate-nimbus", None)];
1066        let out = supervisor_eligible(sessions, None, now, |_| Some(ancient));
1067        assert_eq!(out.len(), 2);
1068    }
1069
1070    // ---- husk reaper ----
1071
1072    use std::collections::HashSet;
1073
1074    /// Make a by-key-shaped husk home (`state/wire` only, no identity)
1075    /// under `root` and return its path. The dir's real mtime is "now",
1076    /// so tests control age by passing a future `now` to `reap_husks`.
1077    fn mk_husk(root: &Path, name: &str) -> PathBuf {
1078        let home = root.join(name);
1079        std::fs::create_dir_all(home.join("state").join("wire")).unwrap();
1080        home
1081    }
1082
1083    /// `now` far enough in the future that any just-created dir is past
1084    /// the default 48h cutoff.
1085    fn far_future() -> SystemTime {
1086        SystemTime::now() + Duration::from_secs(100 * 3600)
1087    }
1088
1089    const CUTOFF_48H: Duration = Duration::from_secs(48 * 3600);
1090
1091    #[test]
1092    fn reap_removes_old_identityless_unsynced_husk() {
1093        let tmp = tempdir().unwrap();
1094        let home = mk_husk(tmp.path(), "abcdef0123456789");
1095        let reaped = reap_husks(
1096            tmp.path(),
1097            CUTOFF_48H,
1098            far_future(),
1099            &HashSet::new(),
1100            |_| false,
1101        );
1102        assert_eq!(reaped, vec![home.clone()]);
1103        assert!(!home.exists(), "husk dir should be gone");
1104    }
1105
1106    #[test]
1107    fn reap_keeps_identity_homes_regardless_of_age() {
1108        let tmp = tempdir().unwrap();
1109        let home = mk_husk(tmp.path(), "abcdef0123456789");
1110        let cfg = home.join("config").join("wire");
1111        std::fs::create_dir_all(&cfg).unwrap();
1112        std::fs::write(cfg.join("private.key"), "k").unwrap();
1113        let reaped = reap_husks(
1114            tmp.path(),
1115            CUTOFF_48H,
1116            far_future(),
1117            &HashSet::new(),
1118            |_| false,
1119        );
1120        assert!(reaped.is_empty());
1121        assert!(home.exists(), "identity-bearing home must never be reaped");
1122    }
1123
1124    #[test]
1125    fn reap_keeps_homes_that_ever_synced() {
1126        let tmp = tempdir().unwrap();
1127        let home = mk_husk(tmp.path(), "abcdef0123456789");
1128        std::fs::write(home.join("state").join("wire").join("last_sync.json"), "{}").unwrap();
1129        let reaped = reap_husks(
1130            tmp.path(),
1131            CUTOFF_48H,
1132            far_future(),
1133            &HashSet::new(),
1134            |_| false,
1135        );
1136        assert!(reaped.is_empty());
1137        assert!(home.exists(), "synced home must never be reaped");
1138    }
1139
1140    #[test]
1141    fn reap_keeps_young_husks() {
1142        let tmp = tempdir().unwrap();
1143        let home = mk_husk(tmp.path(), "abcdef0123456789");
1144        // `now` = actual now → dir age ≈ 0 < 48h.
1145        let reaped = reap_husks(
1146            tmp.path(),
1147            CUTOFF_48H,
1148            SystemTime::now(),
1149            &HashSet::new(),
1150            |_| false,
1151        );
1152        assert!(reaped.is_empty());
1153        assert!(home.exists(), "young husk must get its grace window");
1154    }
1155
1156    #[test]
1157    fn reap_keeps_registry_bound_names() {
1158        let tmp = tempdir().unwrap();
1159        let home = mk_husk(tmp.path(), "abcdef0123456789");
1160        let bound: HashSet<String> = ["abcdef0123456789".to_string()].into();
1161        let reaped = reap_husks(tmp.path(), CUTOFF_48H, far_future(), &bound, |_| false);
1162        assert!(reaped.is_empty());
1163        assert!(home.exists(), "operator-bound home must never be reaped");
1164    }
1165
1166    #[test]
1167    fn reap_keeps_homes_with_live_daemon() {
1168        let tmp = tempdir().unwrap();
1169        let home = mk_husk(tmp.path(), "abcdef0123456789");
1170        let reaped = reap_husks(
1171            tmp.path(),
1172            CUTOFF_48H,
1173            far_future(),
1174            &HashSet::new(),
1175            |_| true,
1176        );
1177        assert!(reaped.is_empty());
1178        assert!(home.exists(), "daemon-owned home must never be reaped");
1179    }
1180
1181    #[test]
1182    fn reap_ignores_non_by_key_shaped_names() {
1183        let tmp = tempdir().unwrap();
1184        // Named session, uppercase hex, and wrong-length hex — all
1185        // outside the by-key shape, all untouchable.
1186        let named = mk_husk(tmp.path(), "my-session");
1187        let upper = mk_husk(tmp.path(), "ABCDEF0123456789");
1188        let short = mk_husk(tmp.path(), "abcdef012345678");
1189        let reaped = reap_husks(
1190            tmp.path(),
1191            CUTOFF_48H,
1192            far_future(),
1193            &HashSet::new(),
1194            |_| false,
1195        );
1196        assert!(reaped.is_empty());
1197        assert!(named.exists() && upper.exists() && short.exists());
1198    }
1199
1200    #[test]
1201    fn reap_missing_root_is_a_noop() {
1202        let tmp = tempdir().unwrap();
1203        let reaped = reap_husks(
1204            &tmp.path().join("no-such-by-key"),
1205            CUTOFF_48H,
1206            far_future(),
1207            &HashSet::new(),
1208            |_| false,
1209        );
1210        assert!(reaped.is_empty());
1211    }
1212
1213    #[test]
1214    fn husk_reap_max_age_parsing() {
1215        // Unset → 48h default.
1216        assert_eq!(
1217            parse_husk_reap_max_age(None),
1218            Some(Duration::from_secs(48 * 3600))
1219        );
1220        // 0 → disabled.
1221        assert_eq!(parse_husk_reap_max_age(Some("0")), None);
1222        // Explicit hours.
1223        assert_eq!(
1224            parse_husk_reap_max_age(Some("12")),
1225            Some(Duration::from_secs(12 * 3600))
1226        );
1227        // Garbage → default, not disabled.
1228        assert_eq!(
1229            parse_husk_reap_max_age(Some("soon")),
1230            Some(Duration::from_secs(48 * 3600))
1231        );
1232    }
1233}