Skip to main content

wire/
ensure_up.rs

1//! Background-process bootstrapper for the MCP path.
2//!
3//! Post-pair, an agent shouldn't have to ask the user "start the daemon?" —
4//! the MCP accept/dial tools invoke [`ensure_daemon_running`] so push/pull is
5//! already armed by the time the agent surfaces "paired ✓" back to chat. OS
6//! toasts for inbound messages are folded into the daemon's own sync loop
7//! (see `cli::comms::notify_sweep_new_events`), so arming the daemon arms
8//! toasts too — no separate notify process.
9//!
10//! ## Idempotency
11//!
12//! Each subcommand writes its pid record to `$WIRE_HOME/state/wire/<name>.pid`
13//! on spawn. The next call reads the record and skips spawning if the pid is
14//! still alive. Stale pid files (process died) are silently overwritten.
15//!
16//! ## Pid-file shape (P0.4, 0.5.11)
17//!
18//! The pid file used to be a raw integer (`12345\n`). Today's debug surfaced
19//! a process running an OLD binary text in memory under a current symlink,
20//! and `wire status` had no way to detect that. The pid file is now a
21//! versioned JSON record:
22//!
23//! ```json
24//! {
25//!   "schema": "wire-daemon-pid-v1",
26//!   "pid": 12345,
27//!   "bin_path": "/usr/local/bin/wire",
28//!   "version": "0.5.11",
29//!   "started_at": "2026-05-16T01:23:45Z",
30//!   "did": "did:wire:paul-mac",
31//!   "relay_url": "https://wireup.net"
32//! }
33//! ```
34//!
35//! The JSON `DaemonPid` form is the only supported on-disk format;
36//! `read_pid_record` reports anything else as `Corrupt`.
37//!
38//! ## Wait-until-alive
39//!
40//! On spawn, we wait briefly for the child to be alive before persisting the
41//! pid file. A concurrent CLI seeing the file pointing at a not-yet-bound
42//! PID is the "daemon reports running but can't accept connections" race
43//! spark flagged in our P0.4 design call.
44//!
45//! ## Detachment (Unix)
46//!
47//! Spawned with stdio nulled. Since `wire mcp` runs without a controlling
48//! TTY (it's a stdio MCP server, not a login shell), the spawned children
49//! inherit no TTY → no SIGHUP arrives when the parent exits, so they
50//! survive a Claude Code restart cycle. PIDs are reaped by init.
51//!
52//! Worst case: a child dies; the next accept/dial call respawns it.
53//! No data is lost (outbox/inbox is on disk, content-addressed dedupe).
54
55use std::path::PathBuf;
56use std::process::{Command, Stdio};
57use std::time::{Duration, Instant};
58
59use anyhow::Result;
60use serde::{Deserialize, Serialize};
61use serde_json::Value;
62
63/// Schema string written into every JSON pid file. Bumped if the pid-file
64/// shape ever changes incompatibly. Readers warn on unknown schema.
65pub const DAEMON_PID_SCHEMA: &str = "wire-daemon-pid-v1";
66
67/// Versioned daemon pid record — the JSON form written by 0.5.11+.
68#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
69pub struct DaemonPid {
70    /// Schema discriminator. Always `wire-daemon-pid-v1` for now.
71    pub schema: String,
72    pub pid: u32,
73    /// Absolute path of the binary that was exec'd. Catches today's exact
74    /// bug: a stale 0.2.4 daemon process kept running under a symlink that
75    /// was repointed at 0.5.10 — `wire --version` says 0.5.10 but the
76    /// running daemon's text in memory is still 0.2.4.
77    pub bin_path: String,
78    /// CARGO_PKG_VERSION captured at spawn. Compared against the CLI's
79    /// own version on every invocation; mismatch = loud warn.
80    pub version: String,
81    /// RFC3339 timestamp of spawn.
82    pub started_at: String,
83    /// Self DID — catches multi-identity contamination (one user, two wire
84    /// identities on same host, daemon launched as wrong one). Cheap
85    /// field, expensive bug.
86    pub did: Option<String>,
87    /// Relay this daemon was bound to at spawn. Catches daemon-bound-to-
88    /// old-relay-after-migration drift.
89    pub relay_url: Option<String>,
90}
91
92/// Result of reading a pid file. JSON (full metadata) is the only
93/// supported on-disk form; anything else is `Corrupt`.
94#[derive(Debug, Clone)]
95pub enum PidRecord {
96    Json(DaemonPid),
97    Missing,
98    Corrupt(String),
99}
100
101impl PidRecord {
102    pub fn pid(&self) -> Option<u32> {
103        match self {
104            PidRecord::Json(d) => Some(d.pid),
105            _ => None,
106        }
107    }
108}
109
110/// Ensure a `wire daemon --interval 5` process is alive. Returns `Ok(true)`
111/// if a fresh process was spawned, `Ok(false)` if one was already running.
112pub fn ensure_daemon_running() -> Result<bool> {
113    ensure_background("daemon", &["daemon", "--interval", "5"])
114}
115
116fn pid_file(name: &str) -> Result<PathBuf> {
117    Ok(crate::config::state_dir()?.join(format!("{name}.pid")))
118}
119
120/// Snapshot of daemon liveness state read through ONE consistent
121/// view. Consumed by `wire status`, `wire doctor`'s `daemon` check,
122/// and `daemon_pid_consistency` so all three surfaces agree by
123/// construction — issue #2 root cause was three call sites that
124/// each computed liveness independently and disagreed for 25 min.
125#[derive(Debug, Clone)]
126pub struct DaemonLiveness {
127    /// PID claimed by `daemon.pid` (None if missing/corrupt).
128    pub pidfile_pid: Option<u32>,
129    /// True iff `pidfile_pid` is currently a live process.
130    pub pidfile_alive: bool,
131    /// Every PID matching `pgrep -f "wire daemon"`. Empty if pgrep is
132    /// unavailable (non-Unix systems, missing util) — the consumer
133    /// must not treat empty as "no daemons" without considering this.
134    pub pgrep_pids: Vec<u32>,
135    /// PIDs in `pgrep_pids` that do NOT match `pidfile_pid`. These are
136    /// orphan daemons racing the cursor with the pidfile-recorded one.
137    pub orphan_pids: Vec<u32>,
138    /// Full parsed pidfile record (Json / Missing / Corrupt).
139    pub record: PidRecord,
140}
141
142/// True iff `pid` is currently a live OS process. Delegates to the
143/// platform-aware check (`/proc` on Linux, `kill -0` on other Unix,
144/// `tasklist` on Windows) so callers never disagree across OSes. The old
145/// local `kill -0` path false-negatived on Windows (no `kill`), making
146/// `wire status`/`doctor` report the daemon DOWN while it was alive.
147pub fn pid_is_alive(pid: u32) -> bool {
148    crate::platform::process_alive(pid)
149}
150
151/// Read the daemon pid file + pgrep in one shot, producing a snapshot
152/// every caller can interpret identically. The point of this helper
153/// is that three independent callers used to compute liveness three
154/// different ways (#2): pidfile-pid-alive (cmd_status), pgrep-only
155/// (early check_daemon_health), neither (check_daemon_pid_consistency).
156/// Now all three flow through the same `DaemonLiveness`.
157pub fn daemon_liveness() -> DaemonLiveness {
158    let record = read_pid_record("daemon");
159    let pidfile_pid = record.pid();
160    let pidfile_alive = pidfile_pid.map(pid_is_alive).unwrap_or(false);
161    // Platform-aware cmdline scan (Unix `pgrep`, Windows PowerShell CIM).
162    // Field stays named `pgrep_pids` for callers; on Windows the old direct
163    // `pgrep` shell-out returned empty (no such tool), masking live daemons.
164    let pgrep_pids: Vec<u32> = crate::platform::find_processes_by_cmdline("wire daemon");
165    // A2 (v0.13.2): on a multi-session box EVERY session runs its own daemon,
166    // so the old "any `wire daemon` whose pid != my pidfile = orphan" rule
167    // flagged sibling sessions' LEGITIMATE daemons as orphans — `wire doctor`
168    // FAILed on the very multi-agent-per-box setup wire exists for. A true
169    // orphan is a wire daemon owned by NO session: exclude every session's
170    // pidfile pid, not just this session's.
171    let known_session_pids: std::collections::HashSet<u32> = crate::session::list_sessions()
172        .map(|sessions| {
173            sessions
174                .iter()
175                .filter_map(|s| crate::session::session_daemon_pid(&s.home_dir))
176                .collect()
177        })
178        .unwrap_or_default();
179    // v0.14.2 (#170 follow-up): also exclude the `wire daemon --all-sessions`
180    // supervisor. It's pgrep-matched by the "wire daemon" cmdline scan but
181    // ISN'T orphaned — it has its own pidfile at `sessions_root/supervisor.pid`
182    // and legitimately owns the orchestration role. Pre-fix the supervisor
183    // showed up under `!! orphan daemon process(es)` on every `wire status`
184    // even though it was the load-bearing process keeping every session
185    // daemon alive — confusing operators into thinking it was stale.
186    let supervisor_pid: Option<u32> = crate::session::sessions_root()
187        .ok()
188        .map(|root| root.join("supervisor.pid"))
189        .filter(|p| p.exists())
190        .and_then(|p| std::fs::read_to_string(p).ok())
191        .and_then(|s| s.trim().parse::<u32>().ok())
192        .filter(|p| pid_is_alive(*p));
193    // v0.15.1: scope the orphan check to daemons that serve OUR WIRE_HOME.
194    // `pgrep "wire daemon"` is machine-global, but a daemon only "races
195    // our relay cursor" if it points at the SAME state tree. Pre-fix, a
196    // fresh install / any non-default WIRE_HOME ran the global scan but
197    // built its exclusion set (known_session_pids, supervisor) from the
198    // CURRENT home's sessions_root — so the operator's real default-home
199    // daemons all showed up as "orphan daemon process(es)... Multiple
200    // daemons race the relay cursor" on the very first `wire status`,
201    // even though they touch a completely different home.
202    let our_home = std::env::var("WIRE_HOME").ok();
203    let orphan_pids: Vec<u32> = pgrep_pids
204        .iter()
205        .copied()
206        .filter(|p| {
207            is_orphan_for_home(
208                *p,
209                pidfile_pid,
210                &known_session_pids,
211                supervisor_pid,
212                our_home.as_deref(),
213                crate::session::read_wire_home_from_pid(*p).as_deref(),
214            )
215        })
216        .collect();
217    DaemonLiveness {
218        pidfile_pid,
219        pidfile_alive,
220        pgrep_pids,
221        orphan_pids,
222        record,
223    }
224}
225
226/// Pure orphan predicate (pid-home reader injected for testability).
227///
228/// `pid` is a true orphan — a `wire daemon` racing OUR relay cursor with
229/// no legitimate owner — iff ALL hold:
230/// - it is not our own pidfile pid,
231/// - it is not any registered session's daemon pid,
232/// - it is not the `--all-sessions` supervisor,
233/// - AND it serves the SAME WIRE_HOME as us (`pid_home == our_home`,
234///   where `None == None` means both serve the default home).
235///
236/// The home check is the v0.15.1 fix: it is strictly subtractive (only
237/// ever removes a candidate), so it can never invent an orphan — it just
238/// stops a daemon for a *different* home (the operator's real install,
239/// seen by the machine-global `pgrep` from inside a fresh/temp home) from
240/// being mislabeled as racing our cursor. A pid whose home can't be read
241/// on this platform (`pid_home == None` on Windows) only matches when our
242/// home is also unreadable/default — the safe direction for the noise.
243fn is_orphan_for_home(
244    pid: u32,
245    pidfile_pid: Option<u32>,
246    known_session_pids: &std::collections::HashSet<u32>,
247    supervisor_pid: Option<u32>,
248    our_home: Option<&str>,
249    pid_home: Option<&str>,
250) -> bool {
251    Some(pid) != pidfile_pid
252        && !known_session_pids.contains(&pid)
253        && Some(pid) != supervisor_pid
254        && pid_home == our_home
255}
256
257/// Read a pid file. Only the JSON `DaemonPid` form is supported; any
258/// other content is reported as `Corrupt`. Never panics.
259pub fn read_pid_record(name: &str) -> PidRecord {
260    let path = match pid_file(name) {
261        Ok(p) => p,
262        Err(_) => return PidRecord::Missing,
263    };
264    let body = match std::fs::read_to_string(&path) {
265        Ok(b) => b,
266        Err(_) => return PidRecord::Missing,
267    };
268    let trimmed = body.trim();
269    if trimmed.is_empty() {
270        return PidRecord::Missing;
271    }
272    match serde_json::from_str::<DaemonPid>(trimmed) {
273        Ok(d) => PidRecord::Json(d),
274        Err(e) => PidRecord::Corrupt(format!("JSON parse: {e}")),
275    }
276}
277
278/// Write a JSON pid record. P0.4: replaces the raw-int write.
279fn write_pid_record(name: &str, record: &DaemonPid) -> Result<()> {
280    let path = pid_file(name)?;
281    let body = serde_json::to_vec_pretty(record)?;
282    std::fs::write(&path, body)?;
283    Ok(())
284}
285
286/// Daemon-startup: claim the `daemon.pid` file for THIS process.
287///
288/// A daemon started directly (`wire daemon`, not via `ensure_background`)
289/// must write its own versioned-JSON pidfile so `wire status` / doctor /
290/// the singleton guard can see it. Idempotent: if the pidfile already
291/// records our PID we leave it untouched. (Historically this lived in
292/// `pending_pair::cleanup_on_startup` alongside the now-removed SAS
293/// pending-pair recovery; the pidfile write was never SAS-specific.)
294pub fn write_self_daemon_pid() -> Result<()> {
295    let path = pid_file("daemon")?;
296    let my_pid = std::process::id();
297    if path.exists()
298        && let Ok(s) = std::fs::read_to_string(&path)
299        && let Ok(rec) = serde_json::from_str::<DaemonPid>(s.trim())
300        && rec.pid == my_pid
301    {
302        // We already own this pidfile — nothing to do.
303        return Ok(());
304    }
305    if let Some(parent) = path.parent() {
306        std::fs::create_dir_all(parent).ok();
307    }
308    write_pid_record("daemon", &build_pid_record(my_pid))
309}
310
311/// Schema string written into every JSON last-sync file. Bumped if the
312/// shape ever changes incompatibly. Readers tolerate any schema string +
313/// fall back to "unknown last_sync" when they don't recognize it.
314pub const LAST_SYNC_FILE_SCHEMA: &str = "wire-daemon-last-sync-v1";
315
316/// Versioned record written by `wire daemon` after each successful sync
317/// cycle. Readers (`wire status`, `mcp__wire__wire_status`,
318/// `mcp__wire__wire_send` annotations) inspect it to surface
319/// "is the sync loop alive RIGHT NOW?" — distinct from "is there a
320/// process with `wire daemon` in its cmdline?" (the existing pidfile-
321/// alive check), which can be true while the loop has been wedged for
322/// minutes. v0.14.2 (#162): closes the silent-send class where the MCP
323/// surface reports `status:"queued"` while no one is actually pushing.
324#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
325pub struct LastSyncRecord {
326    /// Schema discriminator. `wire-daemon-last-sync-v1`.
327    pub schema: String,
328    /// RFC3339 UTC timestamp of the most recently completed cycle.
329    pub ts: String,
330    /// Number of outbox events pushed in this cycle.
331    pub push_n: usize,
332    /// Number of inbox events pulled (verified + written) in this cycle.
333    pub pull_n: usize,
334    /// Number of inbox events rejected by signature/cursor checks.
335    pub rejected_n: usize,
336}
337
338fn last_sync_file() -> Result<PathBuf> {
339    Ok(crate::config::state_dir()?.join("last_sync.json"))
340}
341
342/// Write the last-sync record. Called by `cmd_daemon` after each cycle
343/// (including --once). Best-effort: any error logs to stderr but does NOT
344/// abort the daemon loop — a wedged pidfile path shouldn't take the sync
345/// loop down with it.
346pub fn write_last_sync_record(push_n: usize, pull_n: usize, rejected_n: usize) {
347    let record = LastSyncRecord {
348        schema: LAST_SYNC_FILE_SCHEMA.to_string(),
349        ts: time::OffsetDateTime::now_utc()
350            .format(&time::format_description::well_known::Rfc3339)
351            .unwrap_or_default(),
352        push_n,
353        pull_n,
354        rejected_n,
355    };
356    let _ = (|| -> Result<()> {
357        let path = last_sync_file()?;
358        if let Some(parent) = path.parent() {
359            std::fs::create_dir_all(parent)?;
360        }
361        let body = serde_json::to_vec_pretty(&record)?;
362        std::fs::write(&path, body)?;
363        Ok(())
364    })()
365    .map_err(|e| eprintln!("daemon: last-sync persist error (non-fatal): {e:#}"));
366}
367
368/// Read the last-sync record. Returns `None` if missing/corrupt — every
369/// caller should treat that as "unknown sync state, daemon may never
370/// have run" and surface it accordingly.
371pub fn read_last_sync_record() -> Option<LastSyncRecord> {
372    let path = last_sync_file().ok()?;
373    let body = std::fs::read_to_string(&path).ok()?;
374    serde_json::from_str(&body).ok()
375}
376
377/// Convenience: the wall-clock age (in whole seconds) of the most recent
378/// sync, or `None` if no record exists / the timestamp can't be parsed.
379/// Negative ages (clock skew between daemon + reader) are clamped to 0.
380pub fn last_sync_age_seconds() -> Option<u64> {
381    let rec = read_last_sync_record()?;
382    let parsed =
383        time::OffsetDateTime::parse(&rec.ts, &time::format_description::well_known::Rfc3339)
384            .ok()?;
385    let delta = time::OffsetDateTime::now_utc() - parsed;
386    let secs = delta.whole_seconds();
387    Some(secs.max(0) as u64)
388}
389
390/// Inspect the daemon singleton state. Returns `Some(pid)` iff the
391/// pidfile names a live `wire daemon` process — i.e., a singleton is
392/// currently held by another in-flight daemon. Returns `None` if the
393/// pidfile is missing, corrupt, or names a dead process.
394///
395/// v0.14.2 (#162): foreground `wire daemon` (the operator-typed kind,
396/// not the `ensure_background` spawn path) didn't write its own
397/// pidfile, so subsequent `ensure_daemon_running()` calls couldn't
398/// see it and would spawn duplicates. The duplicate-pull race is
399/// safe — per-path outbox locks prevent corruption — but it wastes
400/// relay polls and confuses operator diagnosis ("why are there 3
401/// daemons?"). The singleton helpers below let `cmd_daemon` claim
402/// the slot at startup + write its own pidfile, closing the gap.
403pub fn daemon_singleton_holder() -> Option<u32> {
404    // Exclude our OWN pid: `ensure_background` records the spawned daemon's pid
405    // in the "daemon" pidfile right after spawn (the P0.4 alive-confirmation
406    // write), and the daemon's own startup singleton check then reads that same
407    // pidfile. Without this self-exclusion the daemon sees its own pid as a live
408    // "other" holder, logs "another daemon is already running", and exits — so a
409    // freshly-`wire up`'d session ends up with NO running daemon and the first
410    // connection silently never completes (the receiver never pulls). A
411    // manually-started daemon dodged this only because nothing pre-wrote its
412    // pid. Self is never "another" daemon.
413    let me = std::process::id();
414    match read_pid_record("daemon").pid() {
415        Some(pid) if pid != me && pid_is_alive(pid) => Some(pid),
416        _ => None,
417    }
418}
419
420/// Claim the daemon-pid singleton by writing this process's pid +
421/// metadata to the pidfile. Callers should first check
422/// `daemon_singleton_holder()` — if Some, bail rather than overwrite.
423///
424/// Returns a `DaemonPidGuard` that removes the pidfile when dropped,
425/// so a graceful exit (SIGINT → normal Drop chain) cleans up.
426pub fn claim_daemon_singleton() -> Result<DaemonPidGuard> {
427    crate::config::ensure_dirs()?;
428    let pid = std::process::id();
429    let record = build_pid_record(pid);
430    write_pid_record("daemon", &record)?;
431    let path = pid_file("daemon")?;
432    Ok(DaemonPidGuard {
433        path,
434        owned_pid: pid,
435    })
436}
437
438/// Drop guard for a claimed daemon-pid singleton. On drop, removes
439/// the pidfile only if it still names the pid we wrote — protects
440/// against the case where another daemon raced in after we exited
441/// the singleton check but before we wrote, and we don't want to
442/// wipe their record on our exit.
443pub struct DaemonPidGuard {
444    path: PathBuf,
445    owned_pid: u32,
446}
447
448impl Drop for DaemonPidGuard {
449    fn drop(&mut self) {
450        // Only remove if the file still names US. If another wire
451        // daemon raced in and overwrote, leave their record alone.
452        if let Ok(body) = std::fs::read_to_string(&self.path) {
453            let still_ours = serde_json::from_str::<DaemonPid>(body.trim())
454                .map(|d| d.pid == self.owned_pid)
455                .unwrap_or_else(|_| {
456                    body.trim()
457                        .parse::<u32>()
458                        .map(|p| p == self.owned_pid)
459                        .unwrap_or(false)
460                });
461            if still_ours {
462                let _ = std::fs::remove_file(&self.path);
463            }
464        }
465    }
466}
467
468/// Build a `DaemonPid` for a freshly-spawned child. Reads bin_path,
469/// current binary version, identity DID, and bound relay URL.
470fn build_pid_record(pid: u32) -> DaemonPid {
471    let bin_path = std::env::current_exe()
472        .map(|p| p.to_string_lossy().to_string())
473        .unwrap_or_default();
474    let version = env!("CARGO_PKG_VERSION").to_string();
475    let started_at = time::OffsetDateTime::now_utc()
476        .format(&time::format_description::well_known::Rfc3339)
477        .unwrap_or_default();
478    let (did, relay_url) = identity_for_pid_record();
479    DaemonPid {
480        schema: DAEMON_PID_SCHEMA.to_string(),
481        pid,
482        bin_path,
483        version,
484        started_at,
485        did,
486        relay_url,
487    }
488}
489
490/// Best-effort: pull DID + relay_url from the configured identity. None
491/// fields are written as `null` so the file stays well-formed even before
492/// the operator runs `wire init`.
493fn identity_for_pid_record() -> (Option<String>, Option<String>) {
494    let did = crate::config::read_agent_card()
495        .ok()
496        .and_then(|card| card.get("did").and_then(Value::as_str).map(str::to_string));
497    let relay_url = crate::config::read_relay_state().ok().and_then(|state| {
498        state
499            .get("self")
500            .and_then(|s| s.get("relay_url"))
501            .and_then(Value::as_str)
502            .map(str::to_string)
503    });
504    (did, relay_url)
505}
506
507/// Wait briefly for `process_alive(pid)` to be true. Returns true if the
508/// child went live within the budget. Default budget is 500ms — enough for
509/// std::process::Command::spawn to fork + exec on any reasonable platform.
510fn wait_until_alive(pid: u32, budget: Duration) -> bool {
511    let deadline = Instant::now() + budget;
512    while Instant::now() < deadline {
513        if process_alive(pid) {
514            return true;
515        }
516        std::thread::sleep(Duration::from_millis(10));
517    }
518    process_alive(pid)
519}
520
521fn ensure_background(name: &str, args: &[&str]) -> Result<bool> {
522    // Test escape hatch — tests/mcp_pair.rs spawns wire mcp with this env
523    // var set so wire_accept/wire_dial don't fork persistent daemon/notify
524    // processes that survive the test's temp WIRE_HOME.
525    if std::env::var("WIRE_MCP_SKIP_AUTO_UP").is_ok() {
526        return Ok(false);
527    }
528
529    // Skip spawn if existing pid is still alive.
530    if let Some(pid) = read_pid_record(name).pid()
531        && process_alive(pid)
532    {
533        return Ok(false);
534    }
535
536    crate::config::ensure_dirs()?;
537    let exe = std::env::current_exe()?;
538    let mut cmd = Command::new(&exe);
539    cmd.args(args).stdin(Stdio::null()).stdout(Stdio::null());
540    // Capture the spawned daemon's stderr to a logfile instead of /dev/null so
541    // a daemon that dies on startup leaves a trace (otherwise its death is
542    // invisible — exactly the silent-fail class this guards). Best-effort: fall
543    // back to null if the log can't be opened.
544    let stderr_log = crate::config::state_dir()
545        .ok()
546        .map(|d| d.join(format!("{name}-spawn.log")));
547    match stderr_log
548        .as_ref()
549        .and_then(|p| std::fs::File::create(p).ok())
550    {
551        Some(f) => {
552            cmd.stderr(Stdio::from(f));
553        }
554        None => {
555            cmd.stderr(Stdio::null());
556        }
557    }
558
559    let child = cmd.spawn()?;
560
561    // P0.4: wait until the child is actually alive before persisting the
562    // pid file. Otherwise a concurrent CLI sees the file pointing at a
563    // PID that isn't yet bound to anything — "daemon reports running but
564    // can't accept connections" race.
565    let pid = child.id();
566    if !wait_until_alive(pid, Duration::from_millis(500)) {
567        anyhow::bail!(
568            "spawned `wire {}` (pid {pid}) did not appear alive within 500ms",
569            args.join(" ")
570        );
571    }
572
573    let record = build_pid_record(pid);
574    write_pid_record(name, &record)?;
575    Ok(true)
576}
577
578/// Check the running daemon's version against the CLI's CARGO_PKG_VERSION.
579/// Returns Some(stale_version) if they disagree, None if they match (or no
580/// daemon).
581///
582/// Called by `wire status` + `wire doctor`. The intent is loud, non-fatal
583/// warning — don't BLOCK CLI invocations on version mismatch (operator may
584/// be running a one-shot debug while daemon is old), but DO make it
585/// impossible to miss.
586pub fn daemon_version_mismatch() -> Option<String> {
587    let record = read_pid_record("daemon");
588    let pid = record.pid()?;
589    if !process_alive(pid) {
590        return None;
591    }
592    match record {
593        PidRecord::Json(d) => {
594            if d.version != env!("CARGO_PKG_VERSION") {
595                Some(d.version)
596            } else {
597                None
598            }
599        }
600        _ => None,
601    }
602}
603
604fn process_alive(pid: u32) -> bool {
605    crate::platform::process_alive(pid)
606}
607
608#[cfg(test)]
609mod tests {
610    use super::*;
611
612    #[test]
613    fn process_alive_self() {
614        assert!(process_alive(std::process::id()));
615    }
616
617    #[test]
618    fn orphan_excludes_daemon_serving_a_different_home() {
619        // The v0.15.1 regression: a fresh install (our_home = temp) runs
620        // a machine-global pgrep that sees the operator's real default-home
621        // daemon (pid_home = None). It must NOT be flagged as an orphan
622        // racing our cursor.
623        let empty = std::collections::HashSet::new();
624        assert!(!is_orphan_for_home(
625            42,
626            None,
627            &empty,
628            None,
629            Some("/tmp/fresh/home"), // we run under a temp WIRE_HOME
630            None,                    // the real daemon serves the default home
631        ));
632        // A foreign Some-home daemon is likewise not ours.
633        assert!(!is_orphan_for_home(
634            42,
635            None,
636            &empty,
637            None,
638            Some("/tmp/fresh/home"),
639            Some("/Users/op/other/home"),
640        ));
641    }
642
643    #[test]
644    fn orphan_flags_unowned_daemon_on_same_home() {
645        // A genuine orphan: same home as us, not our pidfile, not a known
646        // session, not the supervisor → still flagged (feature preserved).
647        let empty = std::collections::HashSet::new();
648        // Both default home (None == None).
649        assert!(is_orphan_for_home(42, Some(7), &empty, Some(9), None, None));
650        // Both the same explicit home.
651        assert!(is_orphan_for_home(
652            42,
653            None,
654            &empty,
655            None,
656            Some("/h"),
657            Some("/h")
658        ));
659    }
660
661    #[test]
662    fn orphan_excludes_self_session_and_supervisor_even_on_same_home() {
663        let mut known = std::collections::HashSet::new();
664        known.insert(100u32);
665        // our own pidfile pid
666        assert!(!is_orphan_for_home(7, Some(7), &known, Some(9), None, None));
667        // a registered session daemon
668        assert!(!is_orphan_for_home(
669            100,
670            Some(7),
671            &known,
672            Some(9),
673            None,
674            None
675        ));
676        // the supervisor
677        assert!(!is_orphan_for_home(9, Some(7), &known, Some(9), None, None));
678    }
679
680    #[test]
681    fn process_alive_zero_is_false_or_self() {
682        assert!(!process_alive(99_999_999));
683    }
684
685    #[test]
686    fn pid_record_round_trips_via_json_form() {
687        // P0.4 contract: a record written by 0.5.11 must be readable by
688        // 0.5.11. If serde gets out of sync with the file format, every
689        // single CLI invocation breaks silently.
690        crate::config::test_support::with_temp_home(|| {
691            crate::config::ensure_dirs().unwrap();
692            let record = DaemonPid {
693                schema: DAEMON_PID_SCHEMA.to_string(),
694                pid: 12345,
695                bin_path: "/usr/local/bin/wire".to_string(),
696                version: "0.5.11".to_string(),
697                started_at: "2026-05-16T01:23:45Z".to_string(),
698                did: Some("did:wire:paul-mac".to_string()),
699                relay_url: Some("https://wireup.net".to_string()),
700            };
701            write_pid_record("daemon", &record).unwrap();
702            let read = read_pid_record("daemon");
703            match read {
704                PidRecord::Json(d) => assert_eq!(d, record),
705                other => panic!("expected JSON record, got {other:?}"),
706            }
707        });
708    }
709
710    #[test]
711    fn pid_record_corrupt_reports_corrupt_not_panic() {
712        // Today's debug had a stale pidfile pointing at a dead PID. The
713        // reader was tolerant. A future bug might write garbage; the reader
714        // must not panic — it must report Corrupt so wire doctor can
715        // surface it visibly.
716        crate::config::test_support::with_temp_home(|| {
717            crate::config::ensure_dirs().unwrap();
718            let path = super::pid_file("daemon").unwrap();
719            std::fs::write(&path, "not-a-pid-or-json {{{").unwrap();
720            let read = read_pid_record("daemon");
721            assert!(matches!(read, PidRecord::Corrupt(_)), "got {read:?}");
722        });
723    }
724
725    #[test]
726    fn daemon_version_mismatch_returns_none_when_no_pidfile() {
727        crate::config::test_support::with_temp_home(|| {
728            assert_eq!(daemon_version_mismatch(), None);
729        });
730    }
731}