wire/ensure_up.rs
1//! Background-process bootstrapper for the MCP path.
2//!
3//! Post-pair, an agent shouldn't have to ask the user "start the daemon?" —
4//! `wire_pair_confirm` invokes [`ensure_daemon_running`] + [`ensure_notify_running`]
5//! so push/pull and OS toasts are already armed by the time the agent surfaces
6//! "paired ✓" back to chat.
7//!
8//! ## Idempotency
9//!
10//! Each subcommand writes its pid record to `$WIRE_HOME/state/wire/<name>.pid`
11//! on spawn. The next call reads the record and skips spawning if the pid is
12//! still alive. Stale pid files (process died) are silently overwritten.
13//!
14//! ## Pid-file shape (P0.4, 0.5.11)
15//!
16//! The pid file used to be a raw integer (`12345\n`). Today's debug surfaced
17//! a process running an OLD binary text in memory under a current symlink,
18//! and `wire status` had no way to detect that. The pid file is now a
19//! versioned JSON record:
20//!
21//! ```json
22//! {
23//! "schema": "wire-daemon-pid-v1",
24//! "pid": 12345,
25//! "bin_path": "/usr/local/bin/wire",
26//! "version": "0.5.11",
27//! "started_at": "2026-05-16T01:23:45Z",
28//! "did": "did:wire:paul-mac",
29//! "relay_url": "https://wireup.net"
30//! }
31//! ```
32//!
33//! Readers are TOLERANT of the legacy int form for one transition cycle —
34//! `read_daemon_pid` falls through to raw-int parse when JSON decode fails
35//! and reports `version: None` so callers can degrade gracefully.
36//!
37//! ## Wait-until-alive
38//!
39//! On spawn, we wait briefly for the child to be alive before persisting the
40//! pid file. A concurrent CLI seeing the file pointing at a not-yet-bound
41//! PID is the "daemon reports running but can't accept connections" race
42//! spark flagged in our P0.4 design call.
43//!
44//! ## Detachment (Unix)
45//!
46//! Spawned with stdio nulled. Since `wire mcp` runs without a controlling
47//! TTY (it's a stdio MCP server, not a login shell), the spawned children
48//! inherit no TTY → no SIGHUP arrives when the parent exits, so they
49//! survive a Claude Code restart cycle. PIDs are reaped by init.
50//!
51//! Worst case: a child dies; the next `wire_pair_confirm` call respawns it.
52//! No data is lost (outbox/inbox is on disk, content-addressed dedupe).
53
54use std::path::PathBuf;
55use std::process::{Command, Stdio};
56use std::time::{Duration, Instant};
57
58use anyhow::Result;
59use serde::{Deserialize, Serialize};
60use serde_json::Value;
61
62/// Schema string written into every JSON pid file. Bumped if the pid-file
63/// shape ever changes incompatibly. Readers warn on unknown schema.
64pub const DAEMON_PID_SCHEMA: &str = "wire-daemon-pid-v1";
65
66/// Versioned daemon pid record — the JSON form written by 0.5.11+.
67#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
68pub struct DaemonPid {
69 /// Schema discriminator. Always `wire-daemon-pid-v1` for now.
70 pub schema: String,
71 pub pid: u32,
72 /// Absolute path of the binary that was exec'd. Catches today's exact
73 /// bug: a stale 0.2.4 daemon process kept running under a symlink that
74 /// was repointed at 0.5.10 — `wire --version` says 0.5.10 but the
75 /// running daemon's text in memory is still 0.2.4.
76 pub bin_path: String,
77 /// CARGO_PKG_VERSION captured at spawn. Compared against the CLI's
78 /// own version on every invocation; mismatch = loud warn.
79 pub version: String,
80 /// RFC3339 timestamp of spawn.
81 pub started_at: String,
82 /// Self DID — catches multi-identity contamination (one user, two wire
83 /// identities on same host, daemon launched as wrong one). Cheap
84 /// field, expensive bug.
85 pub did: Option<String>,
86 /// Relay this daemon was bound to at spawn. Catches daemon-bound-to-
87 /// old-relay-after-migration drift.
88 pub relay_url: Option<String>,
89}
90
91/// Result of reading a pid file. Distinguishes legacy-int (no metadata)
92/// from JSON (full metadata) so callers can degrade gracefully.
93#[derive(Debug, Clone)]
94pub enum PidRecord {
95 Json(DaemonPid),
96 LegacyInt(u32),
97 Missing,
98 Corrupt(String),
99}
100
101impl PidRecord {
102 pub fn pid(&self) -> Option<u32> {
103 match self {
104 PidRecord::Json(d) => Some(d.pid),
105 PidRecord::LegacyInt(p) => Some(*p),
106 _ => None,
107 }
108 }
109}
110
111/// Ensure a `wire daemon --interval 5` process is alive. Returns `Ok(true)`
112/// if a fresh process was spawned, `Ok(false)` if one was already running.
113pub fn ensure_daemon_running() -> Result<bool> {
114 ensure_background("daemon", &["daemon", "--interval", "5"])
115}
116
117/// Ensure a `wire notify --interval 2` process is alive (OS toasts on
118/// every new verified inbox event). Returns true if newly spawned.
119pub fn ensure_notify_running() -> Result<bool> {
120 ensure_background("notify", &["notify", "--interval", "2"])
121}
122
123fn pid_file(name: &str) -> Result<PathBuf> {
124 Ok(crate::config::state_dir()?.join(format!("{name}.pid")))
125}
126
127/// Snapshot of daemon liveness state read through ONE consistent
128/// view. Consumed by `wire status`, `wire doctor`'s `daemon` check,
129/// and `daemon_pid_consistency` so all three surfaces agree by
130/// construction — issue #2 root cause was three call sites that
131/// each computed liveness independently and disagreed for 25 min.
132#[derive(Debug, Clone)]
133pub struct DaemonLiveness {
134 /// PID claimed by `daemon.pid` (None if missing/corrupt).
135 pub pidfile_pid: Option<u32>,
136 /// True iff `pidfile_pid` is currently a live process.
137 pub pidfile_alive: bool,
138 /// Every PID matching `pgrep -f "wire daemon"`. Empty if pgrep is
139 /// unavailable (non-Unix systems, missing util) — the consumer
140 /// must not treat empty as "no daemons" without considering this.
141 pub pgrep_pids: Vec<u32>,
142 /// PIDs in `pgrep_pids` that do NOT match `pidfile_pid`. These are
143 /// orphan daemons racing the cursor with the pidfile-recorded one.
144 pub orphan_pids: Vec<u32>,
145 /// Full parsed pidfile record (Json / LegacyInt / Missing / Corrupt).
146 pub record: PidRecord,
147}
148
149/// True iff `pid` is currently a live OS process. Linux: `/proc/<pid>`.
150/// Other Unix: `kill -0`. Returns false on any error.
151pub fn pid_is_alive(pid: u32) -> bool {
152 #[cfg(target_os = "linux")]
153 {
154 std::path::Path::new(&format!("/proc/{pid}")).exists()
155 }
156 #[cfg(not(target_os = "linux"))]
157 {
158 std::process::Command::new("kill")
159 .args(["-0", &pid.to_string()])
160 .output()
161 .map(|o| o.status.success())
162 .unwrap_or(false)
163 }
164}
165
166/// Read the daemon pid file + pgrep in one shot, producing a snapshot
167/// every caller can interpret identically. The point of this helper
168/// is that three independent callers used to compute liveness three
169/// different ways (#2): pidfile-pid-alive (cmd_status), pgrep-only
170/// (early check_daemon_health), neither (check_daemon_pid_consistency).
171/// Now all three flow through the same `DaemonLiveness`.
172pub fn daemon_liveness() -> DaemonLiveness {
173 let record = read_pid_record("daemon");
174 let pidfile_pid = record.pid();
175 let pidfile_alive = pidfile_pid.map(pid_is_alive).unwrap_or(false);
176 let pgrep_pids: Vec<u32> = std::process::Command::new("pgrep")
177 .args(["-f", "wire daemon"])
178 .output()
179 .ok()
180 .filter(|o| o.status.success())
181 .map(|o| {
182 String::from_utf8_lossy(&o.stdout)
183 .split_whitespace()
184 .filter_map(|s| s.parse::<u32>().ok())
185 .collect()
186 })
187 .unwrap_or_default();
188 let orphan_pids: Vec<u32> = pgrep_pids
189 .iter()
190 .filter(|p| Some(**p) != pidfile_pid)
191 .copied()
192 .collect();
193 DaemonLiveness {
194 pidfile_pid,
195 pidfile_alive,
196 pgrep_pids,
197 orphan_pids,
198 record,
199 }
200}
201
202/// Read a pid file, tolerating both JSON and legacy-int forms. Never
203/// panics — corrupt input becomes `PidRecord::Corrupt`.
204pub fn read_pid_record(name: &str) -> PidRecord {
205 let path = match pid_file(name) {
206 Ok(p) => p,
207 Err(_) => return PidRecord::Missing,
208 };
209 let body = match std::fs::read_to_string(&path) {
210 Ok(b) => b,
211 Err(_) => return PidRecord::Missing,
212 };
213 let trimmed = body.trim();
214 if trimmed.is_empty() {
215 return PidRecord::Missing;
216 }
217 // JSON form first.
218 if trimmed.starts_with('{') {
219 match serde_json::from_str::<DaemonPid>(trimmed) {
220 Ok(d) => return PidRecord::Json(d),
221 Err(e) => return PidRecord::Corrupt(format!("JSON parse: {e}")),
222 }
223 }
224 // Legacy raw-int form — keep readable for one transition cycle so a
225 // 0.5.11 daemon can take over from a 0.5.10 leftover without
226 // operator intervention.
227 match trimmed.parse::<u32>() {
228 Ok(pid) => PidRecord::LegacyInt(pid),
229 Err(e) => PidRecord::Corrupt(format!("expected int or JSON: {e}")),
230 }
231}
232
233/// Write a JSON pid record. P0.4: replaces the raw-int write.
234fn write_pid_record(name: &str, record: &DaemonPid) -> Result<()> {
235 let path = pid_file(name)?;
236 let body = serde_json::to_vec_pretty(record)?;
237 std::fs::write(&path, body)?;
238 Ok(())
239}
240
241/// Build a `DaemonPid` for a freshly-spawned child. Reads bin_path,
242/// current binary version, identity DID, and bound relay URL.
243fn build_pid_record(pid: u32) -> DaemonPid {
244 let bin_path = std::env::current_exe()
245 .map(|p| p.to_string_lossy().to_string())
246 .unwrap_or_default();
247 let version = env!("CARGO_PKG_VERSION").to_string();
248 let started_at = time::OffsetDateTime::now_utc()
249 .format(&time::format_description::well_known::Rfc3339)
250 .unwrap_or_default();
251 let (did, relay_url) = identity_for_pid_record();
252 DaemonPid {
253 schema: DAEMON_PID_SCHEMA.to_string(),
254 pid,
255 bin_path,
256 version,
257 started_at,
258 did,
259 relay_url,
260 }
261}
262
263/// Best-effort: pull DID + relay_url from the configured identity. None
264/// fields are written as `null` so the file stays well-formed even before
265/// the operator runs `wire init`.
266fn identity_for_pid_record() -> (Option<String>, Option<String>) {
267 let did = crate::config::read_agent_card()
268 .ok()
269 .and_then(|card| {
270 card.get("did")
271 .and_then(Value::as_str)
272 .map(str::to_string)
273 });
274 let relay_url = crate::config::read_relay_state()
275 .ok()
276 .and_then(|state| {
277 state
278 .get("self")
279 .and_then(|s| s.get("relay_url"))
280 .and_then(Value::as_str)
281 .map(str::to_string)
282 });
283 (did, relay_url)
284}
285
286/// Wait briefly for `process_alive(pid)` to be true. Returns true if the
287/// child went live within the budget. Default budget is 500ms — enough for
288/// std::process::Command::spawn to fork + exec on any reasonable platform.
289fn wait_until_alive(pid: u32, budget: Duration) -> bool {
290 let deadline = Instant::now() + budget;
291 while Instant::now() < deadline {
292 if process_alive(pid) {
293 return true;
294 }
295 std::thread::sleep(Duration::from_millis(10));
296 }
297 process_alive(pid)
298}
299
300fn ensure_background(name: &str, args: &[&str]) -> Result<bool> {
301 // Test escape hatch — tests/mcp_pair.rs spawns wire mcp with this env
302 // var set so wire_pair_confirm doesn't fork persistent daemon/notify
303 // processes that survive the test's temp WIRE_HOME.
304 if std::env::var("WIRE_MCP_SKIP_AUTO_UP").is_ok() {
305 return Ok(false);
306 }
307
308 // Skip spawn if existing pid is still alive.
309 if let Some(pid) = read_pid_record(name).pid()
310 && process_alive(pid)
311 {
312 return Ok(false);
313 }
314
315 crate::config::ensure_dirs()?;
316 let exe = std::env::current_exe()?;
317 let child = Command::new(&exe)
318 .args(args)
319 .stdin(Stdio::null())
320 .stdout(Stdio::null())
321 .stderr(Stdio::null())
322 .spawn()?;
323
324 // P0.4: wait until the child is actually alive before persisting the
325 // pid file. Otherwise a concurrent CLI sees the file pointing at a
326 // PID that isn't yet bound to anything — "daemon reports running but
327 // can't accept connections" race.
328 let pid = child.id();
329 if !wait_until_alive(pid, Duration::from_millis(500)) {
330 anyhow::bail!(
331 "spawned `wire {}` (pid {pid}) did not appear alive within 500ms",
332 args.join(" ")
333 );
334 }
335
336 let record = build_pid_record(pid);
337 write_pid_record(name, &record)?;
338 Ok(true)
339}
340
341/// Check the running daemon's version against the CLI's CARGO_PKG_VERSION.
342/// Returns Some(stale_version) if they disagree, None if they match (or no
343/// daemon, or legacy-int pidfile without version info).
344///
345/// Called by `wire status` + `wire doctor`. The intent is loud, non-fatal
346/// warning — don't BLOCK CLI invocations on version mismatch (operator may
347/// be running a one-shot debug while daemon is old), but DO make it
348/// impossible to miss.
349pub fn daemon_version_mismatch() -> Option<String> {
350 let record = read_pid_record("daemon");
351 let pid = record.pid()?;
352 if !process_alive(pid) {
353 return None;
354 }
355 match record {
356 PidRecord::Json(d) => {
357 if d.version != env!("CARGO_PKG_VERSION") {
358 Some(d.version)
359 } else {
360 None
361 }
362 }
363 PidRecord::LegacyInt(_) => {
364 // Legacy pidfile = pre-0.5.11 daemon writing raw int. By
365 // definition older than this CLI, so flag it.
366 Some("<pre-0.5.11>".to_string())
367 }
368 _ => None,
369 }
370}
371
372#[cfg(target_os = "linux")]
373fn process_alive(pid: u32) -> bool {
374 std::path::Path::new(&format!("/proc/{pid}")).exists()
375}
376
377#[cfg(not(target_os = "linux"))]
378fn process_alive(pid: u32) -> bool {
379 // macOS / others: signal-0 check via `kill -0 <pid>` exit status.
380 Command::new("kill")
381 .args(["-0", &pid.to_string()])
382 .stdin(Stdio::null())
383 .stdout(Stdio::null())
384 .stderr(Stdio::null())
385 .status()
386 .map(|s| s.success())
387 .unwrap_or(false)
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393
394 #[test]
395 fn process_alive_self() {
396 assert!(process_alive(std::process::id()));
397 }
398
399 #[test]
400 fn process_alive_zero_is_false_or_self() {
401 assert!(!process_alive(99_999_999));
402 }
403
404 #[test]
405 fn pid_record_round_trips_via_json_form() {
406 // P0.4 contract: a record written by 0.5.11 must be readable by
407 // 0.5.11. If serde gets out of sync with the file format, every
408 // single CLI invocation breaks silently.
409 crate::config::test_support::with_temp_home(|| {
410 crate::config::ensure_dirs().unwrap();
411 let record = DaemonPid {
412 schema: DAEMON_PID_SCHEMA.to_string(),
413 pid: 12345,
414 bin_path: "/usr/local/bin/wire".to_string(),
415 version: "0.5.11".to_string(),
416 started_at: "2026-05-16T01:23:45Z".to_string(),
417 did: Some("did:wire:paul-mac".to_string()),
418 relay_url: Some("https://wireup.net".to_string()),
419 };
420 write_pid_record("daemon", &record).unwrap();
421 let read = read_pid_record("daemon");
422 match read {
423 PidRecord::Json(d) => assert_eq!(d, record),
424 other => panic!("expected JSON record, got {other:?}"),
425 }
426 });
427 }
428
429 #[test]
430 fn pid_record_tolerates_legacy_int_form() {
431 // The whole point of LegacyInt: a 0.5.11 daemon must be able to
432 // take over from a 0.5.10 leftover without operator intervention.
433 // If this assertion fails, every operator with a 0.5.10 daemon
434 // running has to manually delete their pidfile on upgrade.
435 crate::config::test_support::with_temp_home(|| {
436 crate::config::ensure_dirs().unwrap();
437 let path = super::pid_file("daemon").unwrap();
438 std::fs::write(&path, "98765").unwrap();
439 let read = read_pid_record("daemon");
440 match read {
441 PidRecord::LegacyInt(pid) => assert_eq!(pid, 98765),
442 other => panic!("expected LegacyInt, got {other:?}"),
443 }
444 });
445 }
446
447 #[test]
448 fn pid_record_corrupt_reports_corrupt_not_panic() {
449 // Today's debug had a stale pidfile pointing at a dead PID. The
450 // reader was tolerant. A future bug might write garbage; the reader
451 // must not panic — it must report Corrupt so wire doctor can
452 // surface it visibly.
453 crate::config::test_support::with_temp_home(|| {
454 crate::config::ensure_dirs().unwrap();
455 let path = super::pid_file("daemon").unwrap();
456 std::fs::write(&path, "not-a-pid-or-json {{{").unwrap();
457 let read = read_pid_record("daemon");
458 assert!(matches!(read, PidRecord::Corrupt(_)), "got {read:?}");
459 });
460 }
461
462 #[test]
463 fn daemon_version_mismatch_returns_none_when_no_pidfile() {
464 crate::config::test_support::with_temp_home(|| {
465 assert_eq!(daemon_version_mismatch(), None);
466 });
467 }
468}