Skip to main content

wire/
ensure_up.rs

1//! Background-process bootstrapper for the MCP path.
2//!
3//! Post-pair, an agent shouldn't have to ask the user "start the daemon?" —
4//! `wire_pair_confirm` invokes [`ensure_daemon_running`] + [`ensure_notify_running`]
5//! so push/pull and OS toasts are already armed by the time the agent surfaces
6//! "paired ✓" back to chat.
7//!
8//! ## Idempotency
9//!
10//! Each subcommand writes its pid record to `$WIRE_HOME/state/wire/<name>.pid`
11//! on spawn. The next call reads the record and skips spawning if the pid is
12//! still alive. Stale pid files (process died) are silently overwritten.
13//!
14//! ## Pid-file shape (P0.4, 0.5.11)
15//!
16//! The pid file used to be a raw integer (`12345\n`). Today's debug surfaced
17//! a process running an OLD binary text in memory under a current symlink,
18//! and `wire status` had no way to detect that. The pid file is now a
19//! versioned JSON record:
20//!
21//! ```json
22//! {
23//!   "schema": "wire-daemon-pid-v1",
24//!   "pid": 12345,
25//!   "bin_path": "/usr/local/bin/wire",
26//!   "version": "0.5.11",
27//!   "started_at": "2026-05-16T01:23:45Z",
28//!   "did": "did:wire:paul-mac",
29//!   "relay_url": "https://wireup.net"
30//! }
31//! ```
32//!
33//! Readers are TOLERANT of the legacy int form for one transition cycle —
34//! `read_daemon_pid` falls through to raw-int parse when JSON decode fails
35//! and reports `version: None` so callers can degrade gracefully.
36//!
37//! ## Wait-until-alive
38//!
39//! On spawn, we wait briefly for the child to be alive before persisting the
40//! pid file. A concurrent CLI seeing the file pointing at a not-yet-bound
41//! PID is the "daemon reports running but can't accept connections" race
42//! spark flagged in our P0.4 design call.
43//!
44//! ## Detachment (Unix)
45//!
46//! Spawned with stdio nulled. Since `wire mcp` runs without a controlling
47//! TTY (it's a stdio MCP server, not a login shell), the spawned children
48//! inherit no TTY → no SIGHUP arrives when the parent exits, so they
49//! survive a Claude Code restart cycle. PIDs are reaped by init.
50//!
51//! Worst case: a child dies; the next `wire_pair_confirm` call respawns it.
52//! No data is lost (outbox/inbox is on disk, content-addressed dedupe).
53
54use std::path::PathBuf;
55use std::process::{Command, Stdio};
56use std::time::{Duration, Instant};
57
58use anyhow::Result;
59use serde::{Deserialize, Serialize};
60use serde_json::Value;
61
62/// Schema string written into every JSON pid file. Bumped if the pid-file
63/// shape ever changes incompatibly. Readers warn on unknown schema.
64pub const DAEMON_PID_SCHEMA: &str = "wire-daemon-pid-v1";
65
66/// Versioned daemon pid record — the JSON form written by 0.5.11+.
67#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
68pub struct DaemonPid {
69    /// Schema discriminator. Always `wire-daemon-pid-v1` for now.
70    pub schema: String,
71    pub pid: u32,
72    /// Absolute path of the binary that was exec'd. Catches today's exact
73    /// bug: a stale 0.2.4 daemon process kept running under a symlink that
74    /// was repointed at 0.5.10 — `wire --version` says 0.5.10 but the
75    /// running daemon's text in memory is still 0.2.4.
76    pub bin_path: String,
77    /// CARGO_PKG_VERSION captured at spawn. Compared against the CLI's
78    /// own version on every invocation; mismatch = loud warn.
79    pub version: String,
80    /// RFC3339 timestamp of spawn.
81    pub started_at: String,
82    /// Self DID — catches multi-identity contamination (one user, two wire
83    /// identities on same host, daemon launched as wrong one). Cheap
84    /// field, expensive bug.
85    pub did: Option<String>,
86    /// Relay this daemon was bound to at spawn. Catches daemon-bound-to-
87    /// old-relay-after-migration drift.
88    pub relay_url: Option<String>,
89}
90
91/// Result of reading a pid file. Distinguishes legacy-int (no metadata)
92/// from JSON (full metadata) so callers can degrade gracefully.
93#[derive(Debug, Clone)]
94pub enum PidRecord {
95    Json(DaemonPid),
96    LegacyInt(u32),
97    Missing,
98    Corrupt(String),
99}
100
101impl PidRecord {
102    pub fn pid(&self) -> Option<u32> {
103        match self {
104            PidRecord::Json(d) => Some(d.pid),
105            PidRecord::LegacyInt(p) => Some(*p),
106            _ => None,
107        }
108    }
109}
110
111/// Ensure a `wire daemon --interval 5` process is alive. Returns `Ok(true)`
112/// if a fresh process was spawned, `Ok(false)` if one was already running.
113pub fn ensure_daemon_running() -> Result<bool> {
114    ensure_background("daemon", &["daemon", "--interval", "5"])
115}
116
117/// Ensure a `wire notify --interval 2` process is alive (OS toasts on
118/// every new verified inbox event). Returns true if newly spawned.
119pub fn ensure_notify_running() -> Result<bool> {
120    ensure_background("notify", &["notify", "--interval", "2"])
121}
122
123fn pid_file(name: &str) -> Result<PathBuf> {
124    Ok(crate::config::state_dir()?.join(format!("{name}.pid")))
125}
126
127/// Read a pid file, tolerating both JSON and legacy-int forms. Never
128/// panics — corrupt input becomes `PidRecord::Corrupt`.
129pub fn read_pid_record(name: &str) -> PidRecord {
130    let path = match pid_file(name) {
131        Ok(p) => p,
132        Err(_) => return PidRecord::Missing,
133    };
134    let body = match std::fs::read_to_string(&path) {
135        Ok(b) => b,
136        Err(_) => return PidRecord::Missing,
137    };
138    let trimmed = body.trim();
139    if trimmed.is_empty() {
140        return PidRecord::Missing;
141    }
142    // JSON form first.
143    if trimmed.starts_with('{') {
144        match serde_json::from_str::<DaemonPid>(trimmed) {
145            Ok(d) => return PidRecord::Json(d),
146            Err(e) => return PidRecord::Corrupt(format!("JSON parse: {e}")),
147        }
148    }
149    // Legacy raw-int form — keep readable for one transition cycle so a
150    // 0.5.11 daemon can take over from a 0.5.10 leftover without
151    // operator intervention.
152    match trimmed.parse::<u32>() {
153        Ok(pid) => PidRecord::LegacyInt(pid),
154        Err(e) => PidRecord::Corrupt(format!("expected int or JSON: {e}")),
155    }
156}
157
158/// Write a JSON pid record. P0.4: replaces the raw-int write.
159fn write_pid_record(name: &str, record: &DaemonPid) -> Result<()> {
160    let path = pid_file(name)?;
161    let body = serde_json::to_vec_pretty(record)?;
162    std::fs::write(&path, body)?;
163    Ok(())
164}
165
166/// Build a `DaemonPid` for a freshly-spawned child. Reads bin_path,
167/// current binary version, identity DID, and bound relay URL.
168fn build_pid_record(pid: u32) -> DaemonPid {
169    let bin_path = std::env::current_exe()
170        .map(|p| p.to_string_lossy().to_string())
171        .unwrap_or_default();
172    let version = env!("CARGO_PKG_VERSION").to_string();
173    let started_at = time::OffsetDateTime::now_utc()
174        .format(&time::format_description::well_known::Rfc3339)
175        .unwrap_or_default();
176    let (did, relay_url) = identity_for_pid_record();
177    DaemonPid {
178        schema: DAEMON_PID_SCHEMA.to_string(),
179        pid,
180        bin_path,
181        version,
182        started_at,
183        did,
184        relay_url,
185    }
186}
187
188/// Best-effort: pull DID + relay_url from the configured identity. None
189/// fields are written as `null` so the file stays well-formed even before
190/// the operator runs `wire init`.
191fn identity_for_pid_record() -> (Option<String>, Option<String>) {
192    let did = crate::config::read_agent_card()
193        .ok()
194        .and_then(|card| {
195            card.get("did")
196                .and_then(Value::as_str)
197                .map(str::to_string)
198        });
199    let relay_url = crate::config::read_relay_state()
200        .ok()
201        .and_then(|state| {
202            state
203                .get("self")
204                .and_then(|s| s.get("relay_url"))
205                .and_then(Value::as_str)
206                .map(str::to_string)
207        });
208    (did, relay_url)
209}
210
211/// Wait briefly for `process_alive(pid)` to be true. Returns true if the
212/// child went live within the budget. Default budget is 500ms — enough for
213/// std::process::Command::spawn to fork + exec on any reasonable platform.
214fn wait_until_alive(pid: u32, budget: Duration) -> bool {
215    let deadline = Instant::now() + budget;
216    while Instant::now() < deadline {
217        if process_alive(pid) {
218            return true;
219        }
220        std::thread::sleep(Duration::from_millis(10));
221    }
222    process_alive(pid)
223}
224
225fn ensure_background(name: &str, args: &[&str]) -> Result<bool> {
226    // Test escape hatch — tests/mcp_pair.rs spawns wire mcp with this env
227    // var set so wire_pair_confirm doesn't fork persistent daemon/notify
228    // processes that survive the test's temp WIRE_HOME.
229    if std::env::var("WIRE_MCP_SKIP_AUTO_UP").is_ok() {
230        return Ok(false);
231    }
232
233    // Skip spawn if existing pid is still alive.
234    if let Some(pid) = read_pid_record(name).pid()
235        && process_alive(pid)
236    {
237        return Ok(false);
238    }
239
240    crate::config::ensure_dirs()?;
241    let exe = std::env::current_exe()?;
242    let child = Command::new(&exe)
243        .args(args)
244        .stdin(Stdio::null())
245        .stdout(Stdio::null())
246        .stderr(Stdio::null())
247        .spawn()?;
248
249    // P0.4: wait until the child is actually alive before persisting the
250    // pid file. Otherwise a concurrent CLI sees the file pointing at a
251    // PID that isn't yet bound to anything — "daemon reports running but
252    // can't accept connections" race.
253    let pid = child.id();
254    if !wait_until_alive(pid, Duration::from_millis(500)) {
255        anyhow::bail!(
256            "spawned `wire {}` (pid {pid}) did not appear alive within 500ms",
257            args.join(" ")
258        );
259    }
260
261    let record = build_pid_record(pid);
262    write_pid_record(name, &record)?;
263    Ok(true)
264}
265
266/// Check the running daemon's version against the CLI's CARGO_PKG_VERSION.
267/// Returns Some(stale_version) if they disagree, None if they match (or no
268/// daemon, or legacy-int pidfile without version info).
269///
270/// Called by `wire status` + `wire doctor`. The intent is loud, non-fatal
271/// warning — don't BLOCK CLI invocations on version mismatch (operator may
272/// be running a one-shot debug while daemon is old), but DO make it
273/// impossible to miss.
274pub fn daemon_version_mismatch() -> Option<String> {
275    let record = read_pid_record("daemon");
276    let pid = record.pid()?;
277    if !process_alive(pid) {
278        return None;
279    }
280    match record {
281        PidRecord::Json(d) => {
282            if d.version != env!("CARGO_PKG_VERSION") {
283                Some(d.version)
284            } else {
285                None
286            }
287        }
288        PidRecord::LegacyInt(_) => {
289            // Legacy pidfile = pre-0.5.11 daemon writing raw int. By
290            // definition older than this CLI, so flag it.
291            Some("<pre-0.5.11>".to_string())
292        }
293        _ => None,
294    }
295}
296
297#[cfg(target_os = "linux")]
298fn process_alive(pid: u32) -> bool {
299    std::path::Path::new(&format!("/proc/{pid}")).exists()
300}
301
302#[cfg(not(target_os = "linux"))]
303fn process_alive(pid: u32) -> bool {
304    // macOS / others: signal-0 check via `kill -0 <pid>` exit status.
305    Command::new("kill")
306        .args(["-0", &pid.to_string()])
307        .stdin(Stdio::null())
308        .stdout(Stdio::null())
309        .stderr(Stdio::null())
310        .status()
311        .map(|s| s.success())
312        .unwrap_or(false)
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318
319    #[test]
320    fn process_alive_self() {
321        assert!(process_alive(std::process::id()));
322    }
323
324    #[test]
325    fn process_alive_zero_is_false_or_self() {
326        assert!(!process_alive(99_999_999));
327    }
328
329    #[test]
330    fn pid_record_round_trips_via_json_form() {
331        // P0.4 contract: a record written by 0.5.11 must be readable by
332        // 0.5.11. If serde gets out of sync with the file format, every
333        // single CLI invocation breaks silently.
334        crate::config::test_support::with_temp_home(|| {
335            crate::config::ensure_dirs().unwrap();
336            let record = DaemonPid {
337                schema: DAEMON_PID_SCHEMA.to_string(),
338                pid: 12345,
339                bin_path: "/usr/local/bin/wire".to_string(),
340                version: "0.5.11".to_string(),
341                started_at: "2026-05-16T01:23:45Z".to_string(),
342                did: Some("did:wire:paul-mac".to_string()),
343                relay_url: Some("https://wireup.net".to_string()),
344            };
345            write_pid_record("daemon", &record).unwrap();
346            let read = read_pid_record("daemon");
347            match read {
348                PidRecord::Json(d) => assert_eq!(d, record),
349                other => panic!("expected JSON record, got {other:?}"),
350            }
351        });
352    }
353
354    #[test]
355    fn pid_record_tolerates_legacy_int_form() {
356        // The whole point of LegacyInt: a 0.5.11 daemon must be able to
357        // take over from a 0.5.10 leftover without operator intervention.
358        // If this assertion fails, every operator with a 0.5.10 daemon
359        // running has to manually delete their pidfile on upgrade.
360        crate::config::test_support::with_temp_home(|| {
361            crate::config::ensure_dirs().unwrap();
362            let path = super::pid_file("daemon").unwrap();
363            std::fs::write(&path, "98765").unwrap();
364            let read = read_pid_record("daemon");
365            match read {
366                PidRecord::LegacyInt(pid) => assert_eq!(pid, 98765),
367                other => panic!("expected LegacyInt, got {other:?}"),
368            }
369        });
370    }
371
372    #[test]
373    fn pid_record_corrupt_reports_corrupt_not_panic() {
374        // Today's debug had a stale pidfile pointing at a dead PID. The
375        // reader was tolerant. A future bug might write garbage; the reader
376        // must not panic — it must report Corrupt so wire doctor can
377        // surface it visibly.
378        crate::config::test_support::with_temp_home(|| {
379            crate::config::ensure_dirs().unwrap();
380            let path = super::pid_file("daemon").unwrap();
381            std::fs::write(&path, "not-a-pid-or-json {{{").unwrap();
382            let read = read_pid_record("daemon");
383            assert!(matches!(read, PidRecord::Corrupt(_)), "got {read:?}");
384        });
385    }
386
387    #[test]
388    fn daemon_version_mismatch_returns_none_when_no_pidfile() {
389        crate::config::test_support::with_temp_home(|| {
390            assert_eq!(daemon_version_mismatch(), None);
391        });
392    }
393}