wire/ensure_up.rs
1//! Background-process bootstrapper for the MCP path.
2//!
3//! Post-pair, an agent shouldn't have to ask the user "start the daemon?" —
4//! the MCP accept/dial tools invoke [`ensure_daemon_running`] so push/pull is
5//! already armed by the time the agent surfaces "paired ✓" back to chat. OS
6//! toasts for inbound messages are folded into the daemon's own sync loop
7//! (see `cli::comms::notify_sweep_new_events`), so arming the daemon arms
8//! toasts too — no separate notify process.
9//!
10//! ## Idempotency
11//!
12//! Each subcommand writes its pid record to `$WIRE_HOME/state/wire/<name>.pid`
13//! on spawn. The next call reads the record and skips spawning if the pid is
14//! still alive. Stale pid files (process died) are silently overwritten.
15//!
16//! ## Pid-file shape (P0.4, 0.5.11)
17//!
18//! The pid file used to be a raw integer (`12345\n`). Today's debug surfaced
19//! a process running an OLD binary text in memory under a current symlink,
20//! and `wire status` had no way to detect that. The pid file is now a
21//! versioned JSON record:
22//!
23//! ```json
24//! {
25//! "schema": "wire-daemon-pid-v1",
26//! "pid": 12345,
27//! "bin_path": "/usr/local/bin/wire",
28//! "version": "0.5.11",
29//! "started_at": "2026-05-16T01:23:45Z",
30//! "did": "did:wire:paul-mac",
31//! "relay_url": "https://wireup.net"
32//! }
33//! ```
34//!
35//! The JSON `DaemonPid` form is the only supported on-disk format;
36//! `read_pid_record` reports anything else as `Corrupt`.
37//!
38//! ## Wait-until-alive
39//!
40//! On spawn, we wait briefly for the child to be alive before persisting the
41//! pid file. A concurrent CLI seeing the file pointing at a not-yet-bound
42//! PID is the "daemon reports running but can't accept connections" race
43//! spark flagged in our P0.4 design call.
44//!
45//! ## Detachment (Unix)
46//!
47//! Spawned with stdio nulled. Since `wire mcp` runs without a controlling
48//! TTY (it's a stdio MCP server, not a login shell), the spawned children
49//! inherit no TTY → no SIGHUP arrives when the parent exits, so they
50//! survive a Claude Code restart cycle. PIDs are reaped by init.
51//!
52//! Worst case: a child dies; the next accept/dial call respawns it.
53//! No data is lost (outbox/inbox is on disk, content-addressed dedupe).
54
55use std::path::PathBuf;
56use std::process::{Command, Stdio};
57use std::time::{Duration, Instant};
58
59use anyhow::Result;
60use serde::{Deserialize, Serialize};
61use serde_json::Value;
62
63/// Schema string written into every JSON pid file. Bumped if the pid-file
64/// shape ever changes incompatibly. Readers warn on unknown schema.
65pub const DAEMON_PID_SCHEMA: &str = "wire-daemon-pid-v1";
66
67/// Versioned daemon pid record — the JSON form written by 0.5.11+.
68#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
69pub struct DaemonPid {
70 /// Schema discriminator. Always `wire-daemon-pid-v1` for now.
71 pub schema: String,
72 pub pid: u32,
73 /// Absolute path of the binary that was exec'd. Catches today's exact
74 /// bug: a stale 0.2.4 daemon process kept running under a symlink that
75 /// was repointed at 0.5.10 — `wire --version` says 0.5.10 but the
76 /// running daemon's text in memory is still 0.2.4.
77 pub bin_path: String,
78 /// CARGO_PKG_VERSION captured at spawn. Compared against the CLI's
79 /// own version on every invocation; mismatch = loud warn.
80 pub version: String,
81 /// RFC3339 timestamp of spawn.
82 pub started_at: String,
83 /// Self DID — catches multi-identity contamination (one user, two wire
84 /// identities on same host, daemon launched as wrong one). Cheap
85 /// field, expensive bug.
86 pub did: Option<String>,
87 /// Relay this daemon was bound to at spawn. Catches daemon-bound-to-
88 /// old-relay-after-migration drift.
89 pub relay_url: Option<String>,
90}
91
92/// Result of reading a pid file. JSON (full metadata) is the only
93/// supported on-disk form; anything else is `Corrupt`.
94#[derive(Debug, Clone)]
95pub enum PidRecord {
96 Json(DaemonPid),
97 Missing,
98 Corrupt(String),
99}
100
101impl PidRecord {
102 pub fn pid(&self) -> Option<u32> {
103 match self {
104 PidRecord::Json(d) => Some(d.pid),
105 _ => None,
106 }
107 }
108}
109
110/// Ensure a `wire daemon --interval 5` process is alive. Returns `Ok(true)`
111/// if a fresh process was spawned, `Ok(false)` if one was already running.
112pub fn ensure_daemon_running() -> Result<bool> {
113 ensure_background("daemon", &["daemon", "--interval", "5"])
114}
115
116fn pid_file(name: &str) -> Result<PathBuf> {
117 Ok(crate::config::state_dir()?.join(format!("{name}.pid")))
118}
119
120/// Snapshot of daemon liveness state read through ONE consistent
121/// view. Consumed by `wire status`, `wire doctor`'s `daemon` check,
122/// and `daemon_pid_consistency` so all three surfaces agree by
123/// construction — issue #2 root cause was three call sites that
124/// each computed liveness independently and disagreed for 25 min.
125#[derive(Debug, Clone)]
126pub struct DaemonLiveness {
127 /// PID claimed by `daemon.pid` (None if missing/corrupt).
128 pub pidfile_pid: Option<u32>,
129 /// True iff `pidfile_pid` is currently a live process.
130 pub pidfile_alive: bool,
131 /// Every PID matching `pgrep -f "wire daemon"`. Empty if pgrep is
132 /// unavailable (non-Unix systems, missing util) — the consumer
133 /// must not treat empty as "no daemons" without considering this.
134 pub pgrep_pids: Vec<u32>,
135 /// PIDs in `pgrep_pids` that do NOT match `pidfile_pid`. These are
136 /// orphan daemons racing the cursor with the pidfile-recorded one.
137 pub orphan_pids: Vec<u32>,
138 /// Full parsed pidfile record (Json / Missing / Corrupt).
139 pub record: PidRecord,
140}
141
142/// True iff `pid` is currently a live OS process. Delegates to the
143/// platform-aware check (`/proc` on Linux, `kill -0` on other Unix,
144/// `tasklist` on Windows) so callers never disagree across OSes. The old
145/// local `kill -0` path false-negatived on Windows (no `kill`), making
146/// `wire status`/`doctor` report the daemon DOWN while it was alive.
147pub fn pid_is_alive(pid: u32) -> bool {
148 crate::platform::process_alive(pid)
149}
150
151/// Read the daemon pid file + pgrep in one shot, producing a snapshot
152/// every caller can interpret identically. The point of this helper
153/// is that three independent callers used to compute liveness three
154/// different ways (#2): pidfile-pid-alive (cmd_status), pgrep-only
155/// (early check_daemon_health), neither (check_daemon_pid_consistency).
156/// Now all three flow through the same `DaemonLiveness`.
157pub fn daemon_liveness() -> DaemonLiveness {
158 let record = read_pid_record("daemon");
159 let pidfile_pid = record.pid();
160 let pidfile_alive = pidfile_pid.map(pid_is_alive).unwrap_or(false);
161 // Platform-aware cmdline scan (Unix `pgrep`, Windows PowerShell CIM).
162 // Field stays named `pgrep_pids` for callers; on Windows the old direct
163 // `pgrep` shell-out returned empty (no such tool), masking live daemons.
164 let pgrep_pids: Vec<u32> = crate::platform::find_processes_by_cmdline("wire daemon");
165 // A2 (v0.13.2): on a multi-session box EVERY session runs its own daemon,
166 // so the old "any `wire daemon` whose pid != my pidfile = orphan" rule
167 // flagged sibling sessions' LEGITIMATE daemons as orphans — `wire doctor`
168 // FAILed on the very multi-agent-per-box setup wire exists for. A true
169 // orphan is a wire daemon owned by NO session: exclude every session's
170 // pidfile pid, not just this session's.
171 let known_session_pids: std::collections::HashSet<u32> = crate::session::list_sessions()
172 .map(|sessions| {
173 sessions
174 .iter()
175 .filter_map(|s| crate::session::session_daemon_pid(&s.home_dir))
176 .collect()
177 })
178 .unwrap_or_default();
179 // v0.14.2 (#170 follow-up): also exclude the `wire daemon --all-sessions`
180 // supervisor. It's pgrep-matched by the "wire daemon" cmdline scan but
181 // ISN'T orphaned — it has its own pidfile at `sessions_root/supervisor.pid`
182 // and legitimately owns the orchestration role. Pre-fix the supervisor
183 // showed up under `!! orphan daemon process(es)` on every `wire status`
184 // even though it was the load-bearing process keeping every session
185 // daemon alive — confusing operators into thinking it was stale.
186 let supervisor_pid: Option<u32> = crate::session::sessions_root()
187 .ok()
188 .map(|root| root.join("supervisor.pid"))
189 .filter(|p| p.exists())
190 .and_then(|p| std::fs::read_to_string(p).ok())
191 .and_then(|s| s.trim().parse::<u32>().ok())
192 .filter(|p| pid_is_alive(*p));
193 // v0.15.1: scope the orphan check to daemons that serve OUR WIRE_HOME.
194 // `pgrep "wire daemon"` is machine-global, but a daemon only "races
195 // our relay cursor" if it points at the SAME state tree. Pre-fix, a
196 // fresh install / any non-default WIRE_HOME ran the global scan but
197 // built its exclusion set (known_session_pids, supervisor) from the
198 // CURRENT home's sessions_root — so the operator's real default-home
199 // daemons all showed up as "orphan daemon process(es)... Multiple
200 // daemons race the relay cursor" on the very first `wire status`,
201 // even though they touch a completely different home.
202 let our_home = std::env::var("WIRE_HOME").ok();
203 let orphan_pids: Vec<u32> = pgrep_pids
204 .iter()
205 .copied()
206 .filter(|p| {
207 is_orphan_for_home(
208 *p,
209 pidfile_pid,
210 &known_session_pids,
211 supervisor_pid,
212 our_home.as_deref(),
213 crate::session::read_wire_home_from_pid(*p).as_deref(),
214 )
215 })
216 .collect();
217 DaemonLiveness {
218 pidfile_pid,
219 pidfile_alive,
220 pgrep_pids,
221 orphan_pids,
222 record,
223 }
224}
225
226/// Pure orphan predicate (pid-home reader injected for testability).
227///
228/// `pid` is a true orphan — a `wire daemon` racing OUR relay cursor with
229/// no legitimate owner — iff ALL hold:
230/// - it is not our own pidfile pid,
231/// - it is not any registered session's daemon pid,
232/// - it is not the `--all-sessions` supervisor,
233/// - AND it serves the SAME WIRE_HOME as us (`pid_home == our_home`,
234/// where `None == None` means both serve the default home).
235///
236/// The home check is the v0.15.1 fix: it is strictly subtractive (only
237/// ever removes a candidate), so it can never invent an orphan — it just
238/// stops a daemon for a *different* home (the operator's real install,
239/// seen by the machine-global `pgrep` from inside a fresh/temp home) from
240/// being mislabeled as racing our cursor. A pid whose home can't be read
241/// on this platform (`pid_home == None` on Windows) only matches when our
242/// home is also unreadable/default — the safe direction for the noise.
243fn is_orphan_for_home(
244 pid: u32,
245 pidfile_pid: Option<u32>,
246 known_session_pids: &std::collections::HashSet<u32>,
247 supervisor_pid: Option<u32>,
248 our_home: Option<&str>,
249 pid_home: Option<&str>,
250) -> bool {
251 Some(pid) != pidfile_pid
252 && !known_session_pids.contains(&pid)
253 && Some(pid) != supervisor_pid
254 && pid_home == our_home
255}
256
257/// Read a pid file. Only the JSON `DaemonPid` form is supported; any
258/// other content is reported as `Corrupt`. Never panics.
259pub fn read_pid_record(name: &str) -> PidRecord {
260 let path = match pid_file(name) {
261 Ok(p) => p,
262 Err(_) => return PidRecord::Missing,
263 };
264 let body = match std::fs::read_to_string(&path) {
265 Ok(b) => b,
266 Err(_) => return PidRecord::Missing,
267 };
268 let trimmed = body.trim();
269 if trimmed.is_empty() {
270 return PidRecord::Missing;
271 }
272 match serde_json::from_str::<DaemonPid>(trimmed) {
273 Ok(d) => PidRecord::Json(d),
274 Err(e) => PidRecord::Corrupt(format!("JSON parse: {e}")),
275 }
276}
277
278/// Write a JSON pid record. P0.4: replaces the raw-int write.
279fn write_pid_record(name: &str, record: &DaemonPid) -> Result<()> {
280 let path = pid_file(name)?;
281 let body = serde_json::to_vec_pretty(record)?;
282 std::fs::write(&path, body)?;
283 Ok(())
284}
285
286/// Daemon-startup: claim the `daemon.pid` file for THIS process.
287///
288/// A daemon started directly (`wire daemon`, not via `ensure_background`)
289/// must write its own versioned-JSON pidfile so `wire status` / doctor /
290/// the singleton guard can see it. Idempotent: if the pidfile already
291/// records our PID we leave it untouched. (Historically this lived in
292/// `pending_pair::cleanup_on_startup` alongside the now-removed SAS
293/// pending-pair recovery; the pidfile write was never SAS-specific.)
294pub fn write_self_daemon_pid() -> Result<()> {
295 let path = pid_file("daemon")?;
296 let my_pid = std::process::id();
297 if path.exists()
298 && let Ok(s) = std::fs::read_to_string(&path)
299 && let Ok(rec) = serde_json::from_str::<DaemonPid>(s.trim())
300 && rec.pid == my_pid
301 {
302 // We already own this pidfile — nothing to do.
303 return Ok(());
304 }
305 if let Some(parent) = path.parent() {
306 std::fs::create_dir_all(parent).ok();
307 }
308 write_pid_record("daemon", &build_pid_record(my_pid))
309}
310
311/// Schema string written into every JSON last-sync file. Bumped if the
312/// shape ever changes incompatibly. Readers tolerate any schema string +
313/// fall back to "unknown last_sync" when they don't recognize it.
314pub const LAST_SYNC_FILE_SCHEMA: &str = "wire-daemon-last-sync-v1";
315
316/// Versioned record written by `wire daemon` after each successful sync
317/// cycle. Readers (`wire status`, `mcp__wire__wire_status`,
318/// `mcp__wire__wire_send` annotations) inspect it to surface
319/// "is the sync loop alive RIGHT NOW?" — distinct from "is there a
320/// process with `wire daemon` in its cmdline?" (the existing pidfile-
321/// alive check), which can be true while the loop has been wedged for
322/// minutes. v0.14.2 (#162): closes the silent-send class where the MCP
323/// surface reports `status:"queued"` while no one is actually pushing.
324#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
325pub struct LastSyncRecord {
326 /// Schema discriminator. `wire-daemon-last-sync-v1`.
327 pub schema: String,
328 /// RFC3339 UTC timestamp of the most recently completed cycle.
329 pub ts: String,
330 /// Number of outbox events pushed in this cycle.
331 pub push_n: usize,
332 /// Number of inbox events pulled (verified + written) in this cycle.
333 pub pull_n: usize,
334 /// Number of inbox events rejected by signature/cursor checks.
335 pub rejected_n: usize,
336}
337
338fn last_sync_file() -> Result<PathBuf> {
339 Ok(crate::config::state_dir()?.join("last_sync.json"))
340}
341
342/// Write the last-sync record. Called by `cmd_daemon` after each cycle
343/// (including --once). Best-effort: any error logs to stderr but does NOT
344/// abort the daemon loop — a wedged pidfile path shouldn't take the sync
345/// loop down with it.
346pub fn write_last_sync_record(push_n: usize, pull_n: usize, rejected_n: usize) {
347 let record = LastSyncRecord {
348 schema: LAST_SYNC_FILE_SCHEMA.to_string(),
349 ts: time::OffsetDateTime::now_utc()
350 .format(&time::format_description::well_known::Rfc3339)
351 .unwrap_or_default(),
352 push_n,
353 pull_n,
354 rejected_n,
355 };
356 let _ = (|| -> Result<()> {
357 let path = last_sync_file()?;
358 if let Some(parent) = path.parent() {
359 std::fs::create_dir_all(parent)?;
360 }
361 let body = serde_json::to_vec_pretty(&record)?;
362 std::fs::write(&path, body)?;
363 Ok(())
364 })()
365 .map_err(|e| eprintln!("daemon: last-sync persist error (non-fatal): {e:#}"));
366}
367
368/// Read the last-sync record. Returns `None` if missing/corrupt — every
369/// caller should treat that as "unknown sync state, daemon may never
370/// have run" and surface it accordingly.
371pub fn read_last_sync_record() -> Option<LastSyncRecord> {
372 let path = last_sync_file().ok()?;
373 let body = std::fs::read_to_string(&path).ok()?;
374 serde_json::from_str(&body).ok()
375}
376
377/// Convenience: the wall-clock age (in whole seconds) of the most recent
378/// sync, or `None` if no record exists / the timestamp can't be parsed.
379/// Negative ages (clock skew between daemon + reader) are clamped to 0.
380pub fn last_sync_age_seconds() -> Option<u64> {
381 let rec = read_last_sync_record()?;
382 let parsed =
383 time::OffsetDateTime::parse(&rec.ts, &time::format_description::well_known::Rfc3339)
384 .ok()?;
385 let delta = time::OffsetDateTime::now_utc() - parsed;
386 let secs = delta.whole_seconds();
387 Some(secs.max(0) as u64)
388}
389
390/// Inspect the daemon singleton state. Returns `Some(pid)` iff the
391/// pidfile names a live `wire daemon` process — i.e., a singleton is
392/// currently held by another in-flight daemon. Returns `None` if the
393/// pidfile is missing, corrupt, or names a dead process.
394///
395/// v0.14.2 (#162): foreground `wire daemon` (the operator-typed kind,
396/// not the `ensure_background` spawn path) didn't write its own
397/// pidfile, so subsequent `ensure_daemon_running()` calls couldn't
398/// see it and would spawn duplicates. The duplicate-pull race is
399/// safe — per-path outbox locks prevent corruption — but it wastes
400/// relay polls and confuses operator diagnosis ("why are there 3
401/// daemons?"). The singleton helpers below let `cmd_daemon` claim
402/// the slot at startup + write its own pidfile, closing the gap.
403pub fn daemon_singleton_holder() -> Option<u32> {
404 // Exclude our OWN pid: `ensure_background` records the spawned daemon's pid
405 // in the "daemon" pidfile right after spawn (the P0.4 alive-confirmation
406 // write), and the daemon's own startup singleton check then reads that same
407 // pidfile. Without this self-exclusion the daemon sees its own pid as a live
408 // "other" holder, logs "another daemon is already running", and exits — so a
409 // freshly-`wire up`'d session ends up with NO running daemon and the first
410 // connection silently never completes (the receiver never pulls). A
411 // manually-started daemon dodged this only because nothing pre-wrote its
412 // pid. Self is never "another" daemon.
413 let me = std::process::id();
414 match read_pid_record("daemon").pid() {
415 Some(pid) if pid != me && pid_is_alive(pid) => Some(pid),
416 _ => None,
417 }
418}
419
420/// Claim the daemon-pid singleton by writing this process's pid +
421/// metadata to the pidfile. Callers should first check
422/// `daemon_singleton_holder()` — if Some, bail rather than overwrite.
423///
424/// Returns a `DaemonPidGuard` that removes the pidfile when dropped,
425/// so a graceful exit (SIGINT → normal Drop chain) cleans up.
426pub fn claim_daemon_singleton() -> Result<DaemonPidGuard> {
427 crate::config::ensure_dirs()?;
428 let pid = std::process::id();
429 let record = build_pid_record(pid);
430 write_pid_record("daemon", &record)?;
431 let path = pid_file("daemon")?;
432 Ok(DaemonPidGuard {
433 path,
434 owned_pid: pid,
435 })
436}
437
438/// Drop guard for a claimed daemon-pid singleton. On drop, removes
439/// the pidfile only if it still names the pid we wrote — protects
440/// against the case where another daemon raced in after we exited
441/// the singleton check but before we wrote, and we don't want to
442/// wipe their record on our exit.
443pub struct DaemonPidGuard {
444 path: PathBuf,
445 owned_pid: u32,
446}
447
448impl Drop for DaemonPidGuard {
449 fn drop(&mut self) {
450 // Only remove if the file still names US. If another wire
451 // daemon raced in and overwrote, leave their record alone.
452 if let Ok(body) = std::fs::read_to_string(&self.path) {
453 let still_ours = serde_json::from_str::<DaemonPid>(body.trim())
454 .map(|d| d.pid == self.owned_pid)
455 .unwrap_or_else(|_| {
456 body.trim()
457 .parse::<u32>()
458 .map(|p| p == self.owned_pid)
459 .unwrap_or(false)
460 });
461 if still_ours {
462 let _ = std::fs::remove_file(&self.path);
463 }
464 }
465 }
466}
467
468/// Build a `DaemonPid` for a freshly-spawned child. Reads bin_path,
469/// current binary version, identity DID, and bound relay URL.
470fn build_pid_record(pid: u32) -> DaemonPid {
471 let bin_path = std::env::current_exe()
472 .map(|p| p.to_string_lossy().to_string())
473 .unwrap_or_default();
474 let version = env!("CARGO_PKG_VERSION").to_string();
475 let started_at = time::OffsetDateTime::now_utc()
476 .format(&time::format_description::well_known::Rfc3339)
477 .unwrap_or_default();
478 let (did, relay_url) = identity_for_pid_record();
479 DaemonPid {
480 schema: DAEMON_PID_SCHEMA.to_string(),
481 pid,
482 bin_path,
483 version,
484 started_at,
485 did,
486 relay_url,
487 }
488}
489
490/// Best-effort: pull DID + relay_url from the configured identity. None
491/// fields are written as `null` so the file stays well-formed even before
492/// the operator runs `wire init`.
493fn identity_for_pid_record() -> (Option<String>, Option<String>) {
494 let did = crate::config::read_agent_card()
495 .ok()
496 .and_then(|card| card.get("did").and_then(Value::as_str).map(str::to_string));
497 let relay_url = crate::config::read_relay_state().ok().and_then(|state| {
498 state
499 .get("self")
500 .and_then(|s| s.get("relay_url"))
501 .and_then(Value::as_str)
502 .map(str::to_string)
503 });
504 (did, relay_url)
505}
506
507/// Wait briefly for `process_alive(pid)` to be true. Returns true if the
508/// child went live within the budget. Default budget is 500ms — enough for
509/// std::process::Command::spawn to fork + exec on any reasonable platform.
510fn wait_until_alive(pid: u32, budget: Duration) -> bool {
511 let deadline = Instant::now() + budget;
512 while Instant::now() < deadline {
513 if process_alive(pid) {
514 return true;
515 }
516 std::thread::sleep(Duration::from_millis(10));
517 }
518 process_alive(pid)
519}
520
521fn ensure_background(name: &str, args: &[&str]) -> Result<bool> {
522 // Test escape hatch — tests/mcp_pair.rs spawns wire mcp with this env
523 // var set so wire_accept/wire_dial don't fork persistent daemon/notify
524 // processes that survive the test's temp WIRE_HOME.
525 if std::env::var("WIRE_MCP_SKIP_AUTO_UP").is_ok() {
526 return Ok(false);
527 }
528
529 // Skip spawn if existing pid is still alive.
530 if let Some(pid) = read_pid_record(name).pid()
531 && process_alive(pid)
532 {
533 return Ok(false);
534 }
535
536 crate::config::ensure_dirs()?;
537 let exe = std::env::current_exe()?;
538 let mut cmd = Command::new(&exe);
539 cmd.args(args).stdin(Stdio::null()).stdout(Stdio::null());
540 // Capture the spawned daemon's stderr to a logfile instead of /dev/null so
541 // a daemon that dies on startup leaves a trace (otherwise its death is
542 // invisible — exactly the silent-fail class this guards). Best-effort: fall
543 // back to null if the log can't be opened.
544 let stderr_log = crate::config::state_dir()
545 .ok()
546 .map(|d| d.join(format!("{name}-spawn.log")));
547 match stderr_log
548 .as_ref()
549 .and_then(|p| std::fs::File::create(p).ok())
550 {
551 Some(f) => {
552 cmd.stderr(Stdio::from(f));
553 }
554 None => {
555 cmd.stderr(Stdio::null());
556 }
557 }
558
559 let child = cmd.spawn()?;
560
561 // P0.4: wait until the child is actually alive before persisting the
562 // pid file. Otherwise a concurrent CLI sees the file pointing at a
563 // PID that isn't yet bound to anything — "daemon reports running but
564 // can't accept connections" race.
565 let pid = child.id();
566 if !wait_until_alive(pid, Duration::from_millis(500)) {
567 anyhow::bail!(
568 "spawned `wire {}` (pid {pid}) did not appear alive within 500ms",
569 args.join(" ")
570 );
571 }
572
573 let record = build_pid_record(pid);
574 write_pid_record(name, &record)?;
575 Ok(true)
576}
577
578/// Check the running daemon's version against the CLI's CARGO_PKG_VERSION.
579/// Returns Some(stale_version) if they disagree, None if they match (or no
580/// daemon).
581///
582/// Called by `wire status` + `wire doctor`. The intent is loud, non-fatal
583/// warning — don't BLOCK CLI invocations on version mismatch (operator may
584/// be running a one-shot debug while daemon is old), but DO make it
585/// impossible to miss.
586pub fn daemon_version_mismatch() -> Option<String> {
587 let record = read_pid_record("daemon");
588 let pid = record.pid()?;
589 if !process_alive(pid) {
590 return None;
591 }
592 match record {
593 PidRecord::Json(d) => {
594 if d.version != env!("CARGO_PKG_VERSION") {
595 Some(d.version)
596 } else {
597 None
598 }
599 }
600 _ => None,
601 }
602}
603
604fn process_alive(pid: u32) -> bool {
605 crate::platform::process_alive(pid)
606}
607
608#[cfg(test)]
609mod tests {
610 use super::*;
611
612 #[test]
613 fn process_alive_self() {
614 assert!(process_alive(std::process::id()));
615 }
616
617 #[test]
618 fn orphan_excludes_daemon_serving_a_different_home() {
619 // The v0.15.1 regression: a fresh install (our_home = temp) runs
620 // a machine-global pgrep that sees the operator's real default-home
621 // daemon (pid_home = None). It must NOT be flagged as an orphan
622 // racing our cursor.
623 let empty = std::collections::HashSet::new();
624 assert!(!is_orphan_for_home(
625 42,
626 None,
627 &empty,
628 None,
629 Some("/tmp/fresh/home"), // we run under a temp WIRE_HOME
630 None, // the real daemon serves the default home
631 ));
632 // A foreign Some-home daemon is likewise not ours.
633 assert!(!is_orphan_for_home(
634 42,
635 None,
636 &empty,
637 None,
638 Some("/tmp/fresh/home"),
639 Some("/Users/op/other/home"),
640 ));
641 }
642
643 #[test]
644 fn orphan_flags_unowned_daemon_on_same_home() {
645 // A genuine orphan: same home as us, not our pidfile, not a known
646 // session, not the supervisor → still flagged (feature preserved).
647 let empty = std::collections::HashSet::new();
648 // Both default home (None == None).
649 assert!(is_orphan_for_home(42, Some(7), &empty, Some(9), None, None));
650 // Both the same explicit home.
651 assert!(is_orphan_for_home(
652 42,
653 None,
654 &empty,
655 None,
656 Some("/h"),
657 Some("/h")
658 ));
659 }
660
661 #[test]
662 fn orphan_excludes_self_session_and_supervisor_even_on_same_home() {
663 let mut known = std::collections::HashSet::new();
664 known.insert(100u32);
665 // our own pidfile pid
666 assert!(!is_orphan_for_home(7, Some(7), &known, Some(9), None, None));
667 // a registered session daemon
668 assert!(!is_orphan_for_home(
669 100,
670 Some(7),
671 &known,
672 Some(9),
673 None,
674 None
675 ));
676 // the supervisor
677 assert!(!is_orphan_for_home(9, Some(7), &known, Some(9), None, None));
678 }
679
680 #[test]
681 fn process_alive_zero_is_false_or_self() {
682 assert!(!process_alive(99_999_999));
683 }
684
685 #[test]
686 fn pid_record_round_trips_via_json_form() {
687 // P0.4 contract: a record written by 0.5.11 must be readable by
688 // 0.5.11. If serde gets out of sync with the file format, every
689 // single CLI invocation breaks silently.
690 crate::config::test_support::with_temp_home(|| {
691 crate::config::ensure_dirs().unwrap();
692 let record = DaemonPid {
693 schema: DAEMON_PID_SCHEMA.to_string(),
694 pid: 12345,
695 bin_path: "/usr/local/bin/wire".to_string(),
696 version: "0.5.11".to_string(),
697 started_at: "2026-05-16T01:23:45Z".to_string(),
698 did: Some("did:wire:paul-mac".to_string()),
699 relay_url: Some("https://wireup.net".to_string()),
700 };
701 write_pid_record("daemon", &record).unwrap();
702 let read = read_pid_record("daemon");
703 match read {
704 PidRecord::Json(d) => assert_eq!(d, record),
705 other => panic!("expected JSON record, got {other:?}"),
706 }
707 });
708 }
709
710 #[test]
711 fn pid_record_corrupt_reports_corrupt_not_panic() {
712 // Today's debug had a stale pidfile pointing at a dead PID. The
713 // reader was tolerant. A future bug might write garbage; the reader
714 // must not panic — it must report Corrupt so wire doctor can
715 // surface it visibly.
716 crate::config::test_support::with_temp_home(|| {
717 crate::config::ensure_dirs().unwrap();
718 let path = super::pid_file("daemon").unwrap();
719 std::fs::write(&path, "not-a-pid-or-json {{{").unwrap();
720 let read = read_pid_record("daemon");
721 assert!(matches!(read, PidRecord::Corrupt(_)), "got {read:?}");
722 });
723 }
724
725 #[test]
726 fn daemon_version_mismatch_returns_none_when_no_pidfile() {
727 crate::config::test_support::with_temp_home(|| {
728 assert_eq!(daemon_version_mismatch(), None);
729 });
730 }
731}