tldr_cli/commands/daemon/daemon_active.rs
1//! Active-daemon discovery file (VAL-013, issue #20).
2//!
3//! `tldr daemon status` historically defaulted `--project` to `"."`, computed
4//! a socket-path hash from the canonicalized cwd, and connected via that
5//! hash. Invoked from a cwd different from the original
6//! `daemon start --project` cwd, the hash differs → connect fails → status
7//! incorrectly reports `not_running`, even when a daemon IS alive.
8//!
9//! This module implements the **single-daemon quick-fix path** from the
10//! VAL-013 spec: on successful bind, daemon start atomically writes
11//! `<cache_dir>/tldr/daemon-active.json` containing `{project, pid, socket}`.
12//! When `daemon status` is invoked WITHOUT an explicit `--project`, it reads
13//! this file, verifies the PID is alive (via `kill(pid, 0)` on Unix), and
14//! falls back to the recorded project path for socket discovery.
15//!
16//! The multi-daemon case is intentionally NOT handled here — users running
17//! multiple daemons can still pass `--project` explicitly. A global daemon
18//! registry is deferred to v0.3.0.
19
20use std::path::{Path, PathBuf};
21
22use serde::{Deserialize, Serialize};
23
24/// Active-daemon discovery record persisted to disk.
25///
26/// Written atomically by `daemon start` after a successful socket bind, read
27/// by `daemon status` when `--project` is the default, and removed by
28/// `daemon stop` after a successful shutdown.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct DaemonActive {
31 /// Canonicalized project path the daemon was started with.
32 pub project: PathBuf,
33 /// PID of the daemon process. Validated via `kill(pid, 0)` on Unix
34 /// before the record is trusted.
35 pub pid: u32,
36 /// Path to the daemon's IPC socket (informational; status recomputes
37 /// from `project` for safety).
38 pub socket: PathBuf,
39}
40
41/// Path to the active-daemon discovery file.
42///
43/// Resolves to `<cache_dir>/tldr/daemon-active.json`. Falls back to
44/// `./.cache/tldr/daemon-active.json` if `dirs::cache_dir()` is unavailable
45/// (e.g., in restricted sandboxes); the file is auxiliary state, so this
46/// fallback is benign.
47pub fn active_file_path() -> PathBuf {
48 dirs::cache_dir()
49 .unwrap_or_else(|| PathBuf::from(".cache"))
50 .join("tldr")
51 .join("daemon-active.json")
52}
53
54/// Atomically write the active-daemon record.
55///
56/// Writes to `<path>.tmp` first, then renames into place. The rename is
57/// atomic on POSIX (and on NTFS via MoveFileEx), so a concurrent reader
58/// either sees the previous file or the new one — never a half-written
59/// file.
60///
61/// Failures are surfaced to the caller, but the caller (`daemon start`)
62/// treats them as warnings rather than fatal errors: the discovery file
63/// is auxiliary state and a missing file simply degrades to the
64/// pre-fix behaviour (i.e., `daemon status` from a different cwd reports
65/// `not_running`, exactly as today).
66pub fn write_active(project: &Path, pid: u32, socket: &Path) -> std::io::Result<()> {
67 let path = active_file_path();
68 if let Some(parent) = path.parent() {
69 std::fs::create_dir_all(parent)?;
70 }
71
72 let record = DaemonActive {
73 project: project.to_path_buf(),
74 pid,
75 socket: socket.to_path_buf(),
76 };
77 let json = serde_json::to_string_pretty(&record).map_err(std::io::Error::other)?;
78
79 let tmp = path.with_extension("json.tmp");
80 std::fs::write(&tmp, json)?;
81 std::fs::rename(&tmp, &path)?;
82 Ok(())
83}
84
85/// Read the active-daemon record, or `None` if absent / stale / corrupt.
86///
87/// "Stale" here means the recorded PID is no longer alive — `kill(pid, 0)`
88/// returns `ESRCH`. This guards against the case where a daemon crashed
89/// without removing the file: we don't want `daemon status` to report a
90/// dead daemon as `running`.
91pub fn read_active() -> Option<DaemonActive> {
92 let path = active_file_path();
93 let content = std::fs::read_to_string(&path).ok()?;
94 let parsed: DaemonActive = serde_json::from_str(&content).ok()?;
95 if !is_pid_alive(parsed.pid) {
96 return None;
97 }
98 Some(parsed)
99}
100
101/// Remove the active-daemon record, ignoring `NotFound`.
102///
103/// Called from `daemon stop` after a successful shutdown. NotFound is
104/// expected when the file was never written (e.g., daemon crashed during
105/// bind) or was already cleaned up.
106pub fn remove_active() -> std::io::Result<()> {
107 match std::fs::remove_file(active_file_path()) {
108 Ok(()) => Ok(()),
109 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
110 Err(e) => Err(e),
111 }
112}
113
114/// Best-effort liveness probe.
115///
116/// On Unix, sends signal 0 to `pid`. The signal-0 kernel path validates the
117/// target's existence and the caller's permission without actually
118/// delivering a signal:
119/// - `Ok` (return 0): process exists and we have permission.
120/// - `Err(EPERM)`: process exists but we don't have permission to signal
121/// it. Treat as alive — a different-uid daemon is still a daemon.
122/// - `Err(ESRCH)`: no such process — treat as dead.
123///
124/// On non-Unix platforms, returns `true` as a best-effort default; the
125/// status command's underlying `IpcStream::connect` will then surface a
126/// real failure if the daemon is not actually reachable.
127#[cfg(unix)]
128fn is_pid_alive(pid: u32) -> bool {
129 // Signal 0: existence + permission check, no actual signal delivered.
130 let rc = unsafe { libc::kill(pid as i32, 0) };
131 if rc == 0 {
132 return true;
133 }
134 // EPERM means the process exists but is owned by another user.
135 matches!(
136 std::io::Error::last_os_error().raw_os_error(),
137 Some(libc::EPERM)
138 )
139}
140
141#[cfg(not(unix))]
142fn is_pid_alive(_pid: u32) -> bool {
143 true
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149 use tempfile::TempDir;
150
151 #[test]
152 fn write_then_read_round_trips() {
153 // Use a private cache dir for the test to avoid clobbering the
154 // user's real daemon-active.json. We do this by overriding HOME
155 // and (on macOS) XDG_CACHE_HOME via a tempdir.
156 let tmp = TempDir::new().expect("tempdir");
157 let cache_root = tmp.path().to_path_buf();
158
159 // Build a record manually at a known location and verify
160 // serialization / round-trip without touching active_file_path.
161 let project = tmp.path().join("project");
162 std::fs::create_dir_all(&project).unwrap();
163 let socket = tmp.path().join("tldr-deadbeef.sock");
164
165 let record = DaemonActive {
166 project: project.clone(),
167 pid: std::process::id(),
168 socket: socket.clone(),
169 };
170 let json = serde_json::to_string(&record).unwrap();
171 let parsed: DaemonActive = serde_json::from_str(&json).unwrap();
172 assert_eq!(parsed.project, project);
173 assert_eq!(parsed.pid, std::process::id());
174 assert_eq!(parsed.socket, socket);
175
176 // Touch cache_root so the variable is used (placeholder until we
177 // fully decouple the cache location).
178 assert!(cache_root.exists());
179 }
180
181 #[cfg(unix)]
182 #[test]
183 fn pid_zero_is_not_alive() {
184 // PID 0 (the kernel scheduler on Linux / "any process in the
185 // session" on signalling semantics) is never a valid daemon
186 // candidate. kill(0, 0) actually targets the whole process group,
187 // so we can't strictly assert false here. Use a definitely-dead
188 // PID instead: a freshly reaped child.
189 // Spawn `true` and wait for it.
190 let mut child = std::process::Command::new("true")
191 .spawn()
192 .expect("spawn true");
193 let pid = child.id();
194 let _ = child.wait();
195 // After wait(), the PID has been reaped; signal 0 should return
196 // ESRCH.
197 assert!(!is_pid_alive(pid), "reaped child PID should not be alive");
198 }
199
200 #[cfg(unix)]
201 #[test]
202 fn current_process_is_alive() {
203 assert!(is_pid_alive(std::process::id()));
204 }
205}