Skip to main content

objects/store/
liveness.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Process liveness detection for reservation reaping.
3//!
4//! `heddle agent reserve` is a one-shot command — the CLI process exits as
5//! soon as the reservation has been recorded. Holding a per-session
6//! `flock` for the life of that process therefore buys nothing: the
7//! kernel releases the lock on `exit(2)` long before the next agent ever
8//! needs to check liveness.
9//!
10//! We instead record `(pid, boot_id)` at reservation time and check
11//! liveness on demand with `kill(pid, 0)` plus a boot-id comparison.
12//! `ESRCH` means the process is gone. A boot id mismatch means the host
13//! rebooted and the PID has been reused — the original owner is also
14//! gone.
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum Liveness {
18    /// The recorded process is still running on the current boot.
19    Alive,
20    /// The recorded process is gone (or the boot id has rolled).
21    Dead,
22    /// Insufficient information; default to leaving the entry alone so
23    /// we never reap a live owner on missing fields.
24    Unknown,
25}
26
27/// Best-effort current boot identifier.
28///
29/// - Linux: `/proc/sys/kernel/random/boot_id`.
30/// - macOS: a stable prefix of `sysctl -n kern.boottime` (the `{ sec = …, usec = … }`
31///   half is stable across invocations on the same boot; the trailing
32///   human-readable date is not).
33/// - Everything else: `None`.
34#[cfg(target_os = "linux")]
35pub fn current_boot_id() -> Option<String> {
36    std::fs::read_to_string("/proc/sys/kernel/random/boot_id")
37        .ok()
38        .map(|value| value.trim().to_string())
39        .filter(|value| !value.is_empty())
40}
41
42#[cfg(target_os = "macos")]
43pub fn current_boot_id() -> Option<String> {
44    std::process::Command::new("sysctl")
45        .arg("-n")
46        .arg("kern.boottime")
47        .output()
48        .ok()
49        .filter(|output| output.status.success())
50        .and_then(|output| String::from_utf8(output.stdout).ok())
51        .map(|value| {
52            let trimmed = value.trim();
53            let cutoff = trimmed
54                .find('}')
55                .map(|idx| idx + 1)
56                .unwrap_or(trimmed.len());
57            trimmed[..cutoff].to_string()
58        })
59        .filter(|value| !value.is_empty())
60}
61
62#[cfg(not(any(target_os = "linux", target_os = "macos")))]
63pub fn current_boot_id() -> Option<String> {
64    None
65}
66
67/// `true` if the process identified by `pid` is still running. ESRCH
68/// from `kill(pid, 0)` is treated as dead. Any other error (notably
69/// EPERM — the process exists but is owned by a different user) is
70/// treated as alive: "alive in another uid namespace" still means the
71/// reservation might be valid.
72#[cfg(unix)]
73pub fn process_alive(pid: u32) -> bool {
74    let pid = pid as libc::pid_t;
75    if pid <= 0 {
76        return false;
77    }
78    let result = unsafe { libc::kill(pid, 0) };
79    if result == 0 {
80        return true;
81    }
82    let errno = std::io::Error::last_os_error().raw_os_error().unwrap_or(0);
83    errno != libc::ESRCH
84}
85
86#[cfg(not(unix))]
87pub fn process_alive(_pid: u32) -> bool {
88    // Windows path — we don't have a kill(0) primitive without pulling
89    // the Win32 process query in here. Default to Alive; the terminal-
90    // status TTL remains the backstop.
91    true
92}
93
94/// Combined check: PID is alive *and* the recorded boot id matches the
95/// current boot id (when both are known). Missing fields collapse to
96/// `Unknown` — callers should not reap on `Unknown`.
97pub fn is_owner_alive(pid: Option<u32>, recorded_boot_id: Option<&str>) -> Liveness {
98    let Some(pid) = pid else {
99        return Liveness::Unknown;
100    };
101
102    if !process_alive(pid) {
103        return Liveness::Dead;
104    }
105
106    match (recorded_boot_id, current_boot_id()) {
107        (Some(recorded), Some(current)) if recorded != current => Liveness::Dead,
108        _ => Liveness::Alive,
109    }
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    #[test]
117    fn process_alive_returns_true_for_self() {
118        assert!(process_alive(std::process::id()));
119    }
120
121    #[test]
122    fn process_alive_returns_false_for_pid_zero() {
123        assert!(!process_alive(0));
124    }
125
126    #[test]
127    fn process_alive_returns_false_for_unlikely_pid() {
128        // PID 0x7fff_ffff is reserved on Linux and never assignable.
129        // On macOS pids cap below 100k by default, so this is also
130        // safely never-allocated. We accept the result for either case
131        // since the test exists to ensure the ESRCH path is reachable.
132        assert!(!process_alive(0x7fff_ffff));
133    }
134
135    #[test]
136    fn is_owner_alive_unknown_without_pid() {
137        assert_eq!(is_owner_alive(None, Some("boot")), Liveness::Unknown);
138    }
139
140    #[test]
141    fn is_owner_alive_dead_when_boot_id_mismatches() {
142        let pid = std::process::id();
143        let liveness = is_owner_alive(Some(pid), Some("definitely-not-the-current-boot-id"));
144        // If we can derive a real boot id on this platform the answer
145        // is Dead; if we can't, the function falls through to Alive.
146        if current_boot_id().is_some() {
147            assert_eq!(liveness, Liveness::Dead);
148        } else {
149            assert_eq!(liveness, Liveness::Alive);
150        }
151    }
152
153    #[test]
154    fn is_owner_alive_alive_when_self_pid_and_matching_or_missing_boot_id() {
155        let pid = std::process::id();
156        let boot = current_boot_id();
157        assert_eq!(is_owner_alive(Some(pid), boot.as_deref()), Liveness::Alive);
158    }
159
160    #[test]
161    fn is_owner_alive_dead_when_pid_is_dead() {
162        // PID 0x7fff_ffff is never assigned on Linux/macOS; treat as a
163        // dead-pid proxy.
164        let liveness = is_owner_alive(Some(0x7fff_ffff), current_boot_id().as_deref());
165        assert_eq!(liveness, Liveness::Dead);
166    }
167}