Skip to main content

mur_common/
lock_file.rs

1//! Canonical `running.lock` reader + 3-state agent status classifier.
2//!
3//! Used by:
4//! - `mur agent list/status` (CLI) — see `mur-core/src/cmd/agent.rs`
5//! - `/api/v1/agents/*` (HTTP) — see `mur-core/src/server_agents/`
6//! - `mur-agent-runtime` supervisor — see `mur-agent-runtime/src/lock_file.rs`
7//!   (the runtime additionally uses `flock`; that check stays local)
8
9use crate::LockFile;
10use serde::Serialize;
11use std::path::Path;
12
13/// Three-state classification of an agent's runtime state.
14#[derive(Serialize, Debug, Clone, Copy, PartialEq, Eq)]
15#[serde(rename_all = "snake_case")]
16pub enum AgentStatusKind {
17    /// Lock present and the recorded pid is alive.
18    Running,
19    /// Lock present but the pid is not alive (crash/kill — orphan lock).
20    Stale,
21    /// No lock file.
22    Stopped,
23}
24
25/// Result of classifying an agent's lock state.
26#[derive(Debug, Clone, Copy)]
27pub struct AgentStatus {
28    pub kind: AgentStatusKind,
29    /// PID from the lock file. `None` when no lock or unparseable lock.
30    pub pid: Option<u32>,
31}
32
33/// Read and JSON-parse `<home>/running.lock`. Returns:
34/// - `Ok(None)` if the file does not exist (agent stopped).
35/// - `Ok(Some(_))` if the file exists and parses successfully.
36/// - `Err(_)` if the file exists but I/O fails or JSON is malformed.
37pub fn read(lock_path: &Path) -> std::io::Result<Option<LockFile>> {
38    let bytes = match std::fs::read(lock_path) {
39        Ok(b) => b,
40        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
41        Err(e) => return Err(e),
42    };
43    serde_json::from_slice(&bytes)
44        .map(Some)
45        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
46}
47
48/// Is the given pid currently a live process the calling user can signal?
49///
50/// On Unix uses `kill(pid, 0)` — signal 0 is a no-op probe that checks
51/// process existence and permission without delivering any signal.
52///
53/// On Windows uses `OpenProcess` with `PROCESS_QUERY_LIMITED_INFORMATION`.
54///
55/// On other platforms returns `true` (optimistically treat any present lock
56/// as live, since P0a agents are not supported there).
57#[cfg(unix)]
58pub fn pid_alive(pid: u32) -> bool {
59    // SAFETY: kill(2) with signal 0 delivers no signal; it only checks
60    // process existence and our permission to signal it. Always safe to call.
61    unsafe { libc::kill(pid as libc::pid_t, 0) == 0 }
62}
63
64#[cfg(windows)]
65pub fn pid_alive(pid: u32) -> bool {
66    use windows_sys::Win32::Foundation::CloseHandle;
67    use windows_sys::Win32::System::Threading::{OpenProcess, PROCESS_QUERY_LIMITED_INFORMATION};
68    unsafe {
69        let h = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid);
70        if h.is_null() {
71            return false;
72        }
73        CloseHandle(h);
74        true
75    }
76}
77
78#[cfg(not(any(unix, windows)))]
79pub fn pid_alive(_pid: u32) -> bool {
80    true
81}
82
83/// Classify the agent's running state by inspecting `<home>/running.lock`.
84///
85/// - No lock → `Stopped`
86/// - Lock present, parses, pid alive → `Running`
87/// - Lock present but pid not alive (crash / SIGKILL / OOM) → `Stale`
88/// - Lock present but unparseable / unreadable → `Stale` with `pid: None`
89///   (treat as stale rather than running so dashboards don't paint dead
90///   agents green)
91pub fn classify(lock_path: &Path) -> AgentStatus {
92    match read(lock_path) {
93        Ok(None) => AgentStatus {
94            kind: AgentStatusKind::Stopped,
95            pid: None,
96        },
97        Err(_) => AgentStatus {
98            kind: AgentStatusKind::Stale,
99            pid: None,
100        },
101        Ok(Some(lock)) => {
102            let kind = if pid_alive(lock.pid) {
103                AgentStatusKind::Running
104            } else {
105                AgentStatusKind::Stale
106            };
107            AgentStatus {
108                kind,
109                pid: Some(lock.pid),
110            }
111        }
112    }
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118    use crate::agent::LockTransports;
119
120    fn make_lock(pid: u32) -> LockFile {
121        LockFile {
122            schema: 1,
123            uuid: "01JQX4TM8Y9K7VQH6B2N3R5DPE".into(),
124            name: "agent_a".into(),
125            pid,
126            ppid: 1,
127            started_at: "2026-04-22T08:00:00Z".into(),
128            binary_version: "mur-agent-runtime 0.1.0".into(),
129            transports: LockTransports {
130                stdio: false,
131                unix_socket: Some("/tmp/x.sock".into()),
132                tcp: None,
133                webhook: None,
134            },
135            card_digest: "sha256:abc".into(),
136            capabilities: vec!["a2a.message.send".into()],
137        }
138    }
139
140    fn write_lock_file(dir: &std::path::Path, pid: u32) -> std::path::PathBuf {
141        let path = dir.join("running.lock");
142        let lock = make_lock(pid);
143        std::fs::write(&path, serde_json::to_vec_pretty(&lock).unwrap()).unwrap();
144        path
145    }
146
147    #[test]
148    fn classify_returns_stopped_when_no_lock() {
149        let tmp = tempfile::tempdir().unwrap();
150        let lock_path = tmp.path().join("running.lock");
151        let status = classify(&lock_path);
152        assert_eq!(status.kind, AgentStatusKind::Stopped);
153        assert_eq!(status.pid, None);
154    }
155
156    #[cfg(unix)]
157    #[test]
158    fn classify_returns_running_when_pid_alive() {
159        let tmp = tempfile::tempdir().unwrap();
160        let lock_path = write_lock_file(tmp.path(), std::process::id());
161        let status = classify(&lock_path);
162        assert_eq!(status.kind, AgentStatusKind::Running);
163        assert_eq!(status.pid, Some(std::process::id()));
164    }
165
166    #[cfg(unix)]
167    #[test]
168    fn classify_returns_stale_when_pid_dead() {
169        let tmp = tempfile::tempdir().unwrap();
170        let dead_pid: u32 = 999_999;
171        let lock_path = write_lock_file(tmp.path(), dead_pid);
172        let status = classify(&lock_path);
173        assert_eq!(status.kind, AgentStatusKind::Stale);
174        assert_eq!(status.pid, Some(dead_pid));
175    }
176
177    #[test]
178    fn classify_returns_stale_when_lock_malformed() {
179        let tmp = tempfile::tempdir().unwrap();
180        let lock_path = tmp.path().join("running.lock");
181        std::fs::write(&lock_path, b"not json").unwrap();
182        let status = classify(&lock_path);
183        assert_eq!(status.kind, AgentStatusKind::Stale);
184        assert_eq!(status.pid, None);
185    }
186
187    #[test]
188    fn read_returns_none_for_missing_file() {
189        let tmp = tempfile::tempdir().unwrap();
190        let lock_path = tmp.path().join("running.lock");
191        let result = read(&lock_path).unwrap();
192        assert!(result.is_none());
193    }
194
195    #[test]
196    fn read_returns_ok_for_valid_lock() {
197        let tmp = tempfile::tempdir().unwrap();
198        let lock_path = write_lock_file(tmp.path(), 42);
199        let result = read(&lock_path).unwrap();
200        assert!(result.is_some());
201        assert_eq!(result.unwrap().pid, 42);
202    }
203
204    #[test]
205    fn read_returns_err_for_malformed_json() {
206        let tmp = tempfile::tempdir().unwrap();
207        let lock_path = tmp.path().join("running.lock");
208        std::fs::write(&lock_path, b"not json").unwrap();
209        let result = read(&lock_path);
210        assert!(result.is_err());
211    }
212
213    #[cfg(unix)]
214    #[test]
215    fn pid_alive_returns_true_for_self() {
216        assert!(pid_alive(std::process::id()));
217    }
218
219    #[cfg(unix)]
220    #[test]
221    fn pid_alive_returns_false_for_dead_pid() {
222        assert!(!pid_alive(999_999));
223    }
224}