Skip to main content

ao_core/
lockfile.rs

1//! PID-file based advisory locking for singleton daemons.
2//!
3//! Mirrors `packages/cli/src/lib/lifecycle-service.ts` in the reference repo:
4//! a would-be daemon reads a well-known pidfile, checks whether that pid is
5//! still running (via `kill(pid, 0)`), and takes over iff the previous owner
6//! is gone. The file is removed on clean shutdown.
7//!
8//! This is **advisory**, not enforced — two racing processes that both pass
9//! the "not running" check before either writes can still stomp on each
10//! other. The TS reference has the same limitation and shrugs it off for a
11//! single-user CLI. Slice 1 Phase D does the same.
12//!
13//! Why not `fs2::flock`? Flock survives across restarts on Linux but not
14//! macOS (BSD flock is tied to the fd), and we want "process-that-owns-pid
15//! is alive" semantics anyway, which flock doesn't give us. A PID probe is
16//! the behaviour the user actually wants.
17
18use std::fs;
19use std::io::{self, Write};
20use std::path::{Path, PathBuf};
21
22#[derive(Debug)]
23pub enum LockError {
24    /// Another live process currently holds the lock.
25    HeldBy {
26        pid: u32,
27        path: PathBuf,
28    },
29    Io(io::Error),
30}
31
32impl From<io::Error> for LockError {
33    fn from(e: io::Error) -> Self {
34        Self::Io(e)
35    }
36}
37
38impl std::fmt::Display for LockError {
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        match self {
41            Self::HeldBy { pid, path } => {
42                write!(f, "pidfile {} held by live process {pid}", path.display())
43            }
44            Self::Io(e) => write!(f, "pidfile io: {e}"),
45        }
46    }
47}
48
49impl std::error::Error for LockError {}
50
51/// RAII handle for a pidfile we currently own. Releases on `Drop`.
52///
53/// Dropping removes the file **only** if its contents still match our pid —
54/// so a second daemon that stole the lock (e.g. after we crashed) doesn't
55/// get its pidfile deleted out from under it when we eventually unwind.
56#[derive(Debug)]
57pub struct PidFile {
58    path: PathBuf,
59    pid: u32,
60    // Once released (drop already removed the file), flip this so the Drop
61    // impl doesn't try a second time.
62    released: bool,
63}
64
65impl PidFile {
66    /// Try to take the pidfile at `path`. On success, our pid is written
67    /// to disk and held until drop.
68    ///
69    /// If a previous pidfile exists and that pid is still alive,
70    /// `Err(LockError::HeldBy)` is returned and the file is left untouched.
71    /// A stale pidfile (dead pid) is silently replaced.
72    pub fn acquire(path: impl Into<PathBuf>) -> Result<Self, LockError> {
73        let path = path.into();
74        if let Some(parent) = path.parent() {
75            fs::create_dir_all(parent)?;
76        }
77
78        if let Some(existing) = read_pidfile(&path) {
79            if is_process_alive(existing) && existing != std::process::id() {
80                return Err(LockError::HeldBy {
81                    pid: existing,
82                    path,
83                });
84            }
85            // Stale: fall through and overwrite.
86        }
87
88        let pid = std::process::id();
89        // Write via a sibling temp + rename so a concurrent reader never
90        // sees a half-written file (matches `SessionManager::save`).
91        let temp = path.with_extension("pid.tmp");
92        {
93            let mut f = fs::File::create(&temp)?;
94            writeln!(f, "{pid}")?;
95            f.sync_all()?;
96        }
97        fs::rename(&temp, &path)?;
98
99        Ok(Self {
100            path,
101            pid,
102            released: false,
103        })
104    }
105
106    pub fn path(&self) -> &Path {
107        &self.path
108    }
109
110    pub fn pid(&self) -> u32 {
111        self.pid
112    }
113
114    /// Explicitly release the pidfile now. Equivalent to letting it drop,
115    /// except the caller can observe the io error instead of swallowing it.
116    pub fn release(mut self) -> io::Result<()> {
117        self.released = true;
118        remove_if_ours(&self.path, self.pid)
119    }
120}
121
122impl Drop for PidFile {
123    fn drop(&mut self) {
124        if self.released {
125            return;
126        }
127        // Best effort — a failed cleanup just leaves a stale file, which
128        // the next acquire() will replace.
129        let _ = remove_if_ours(&self.path, self.pid);
130    }
131}
132
133/// Read the pid stored in a pidfile, if the file exists and parses.
134pub fn read_pidfile(path: &Path) -> Option<u32> {
135    let raw = fs::read_to_string(path).ok()?;
136    raw.trim().parse::<u32>().ok()
137}
138
139/// Is the given pid currently a running process on this machine?
140///
141/// Uses `kill(pid, 0)` — the POSIX way to test for a pid's existence
142/// without actually signalling it. `EPERM` also counts as "alive" (the
143/// process exists but we don't own it, e.g. running as another user).
144pub fn is_process_alive(pid: u32) -> bool {
145    if pid == 0 {
146        return false;
147    }
148    // SAFETY: `kill` is thread-safe and a signal of 0 performs only the
149    // permission/existence check without delivering anything. The only
150    // preconditions are a valid pid (we reject 0 above) and a defined
151    // signal (0 is always defined), both of which we satisfy.
152    let rc = unsafe { libc::kill(pid as libc::pid_t, 0) };
153    if rc == 0 {
154        return true;
155    }
156    match io::Error::last_os_error().raw_os_error() {
157        Some(libc::EPERM) => true, // exists, not ours
158        _ => false,
159    }
160}
161
162fn remove_if_ours(path: &Path, our_pid: u32) -> io::Result<()> {
163    // Re-read the file before deleting; if it no longer says our pid,
164    // someone else owns it and we leave it alone.
165    match read_pidfile(path) {
166        Some(pid) if pid == our_pid => fs::remove_file(path),
167        // Either the file is gone already or a different owner took over —
168        // both are "nothing to clean up" as far as we're concerned.
169        _ => Ok(()),
170    }
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176    use std::sync::atomic::{AtomicUsize, Ordering};
177    use std::time::{SystemTime, UNIX_EPOCH};
178
179    fn unique_tmp(label: &str) -> PathBuf {
180        static COUNTER: AtomicUsize = AtomicUsize::new(0);
181        let nanos = SystemTime::now()
182            .duration_since(UNIX_EPOCH)
183            .unwrap()
184            .as_nanos();
185        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
186        std::env::temp_dir().join(format!("ao-rs-lock-{label}-{nanos}-{n}.pid"))
187    }
188
189    #[test]
190    fn acquire_when_no_file_writes_our_pid() {
191        let path = unique_tmp("fresh");
192        let lock = PidFile::acquire(&path).unwrap();
193        assert!(path.exists());
194        assert_eq!(read_pidfile(&path), Some(std::process::id()));
195        assert_eq!(lock.pid(), std::process::id());
196        drop(lock);
197        assert!(!path.exists(), "drop should remove the pidfile");
198    }
199
200    #[test]
201    fn acquire_replaces_stale_pidfile() {
202        // Pick a pid that is extremely unlikely to be alive. 999_999 is
203        // above the default Linux pid_max (32768) and also not a valid
204        // macOS pid. kill(pid, 0) returns ESRCH → dead.
205        let stale_pid: u32 = 999_999;
206        assert!(!is_process_alive(stale_pid), "sanity: {stale_pid} is dead");
207
208        let path = unique_tmp("stale");
209        fs::create_dir_all(path.parent().unwrap()).unwrap();
210        fs::write(&path, format!("{stale_pid}\n")).unwrap();
211
212        let lock = PidFile::acquire(&path).unwrap();
213        assert_eq!(read_pidfile(&path), Some(std::process::id()));
214        drop(lock);
215    }
216
217    #[test]
218    fn acquire_rejects_live_other_pid() {
219        // Fake a pidfile holding pid 1. On every Unix box pid 1 is alive
220        // (init / launchd), and it's not us, so this should fail.
221        let path = unique_tmp("held");
222        fs::create_dir_all(path.parent().unwrap()).unwrap();
223        fs::write(&path, "1\n").unwrap();
224
225        match PidFile::acquire(&path) {
226            Err(LockError::HeldBy { pid, .. }) => assert_eq!(pid, 1),
227            other => panic!("expected HeldBy(1), got {other:?}"),
228        }
229        // File must not be rewritten by a failed acquire.
230        assert_eq!(read_pidfile(&path), Some(1));
231        fs::remove_file(&path).ok();
232    }
233
234    #[test]
235    fn drop_does_not_remove_file_if_stolen() {
236        let path = unique_tmp("stolen");
237        let lock = PidFile::acquire(&path).unwrap();
238
239        // Simulate a racing daemon overwriting our pidfile with its own pid.
240        fs::write(&path, "1\n").unwrap();
241
242        drop(lock);
243
244        // The hijacked contents must survive — we only clean up our own pid.
245        assert_eq!(read_pidfile(&path), Some(1));
246        fs::remove_file(&path).ok();
247    }
248
249    #[test]
250    fn is_process_alive_returns_true_for_self() {
251        assert!(is_process_alive(std::process::id()));
252    }
253
254    #[test]
255    fn is_process_alive_returns_false_for_zero() {
256        assert!(!is_process_alive(0));
257    }
258}