Skip to main content

mur_common/
lock_file.rs

1//! Canonical `running.lock` reader + 3-state agent status classifier.
2//!
3//! Used by:
4//! - `mur agent list/status` (CLI) — see `mur-core/src/cmd/agent.rs`
5//! - `/api/v1/agents/*` (HTTP) — see `mur-core/src/server_agents/`
6//! - `mur-agent-runtime` supervisor — see `mur-agent-runtime/src/lock_file.rs`
7//!   (the runtime additionally uses `flock`; that check stays local)
8
9use crate::LockFile;
10use serde::Serialize;
11use std::path::Path;
12
13/// Three-state classification of an agent's runtime state.
14#[derive(Serialize, Debug, Clone, Copy, PartialEq, Eq)]
15#[serde(rename_all = "snake_case")]
16pub enum AgentStatusKind {
17    /// Lock present and the recorded pid is alive.
18    Running,
19    /// Lock present but the pid is not alive (crash/kill — orphan lock).
20    Stale,
21    /// No lock file.
22    Stopped,
23}
24
25impl AgentStatusKind {
26    /// Stable visual marker for this status — the single source of truth for
27    /// the status→emoji mapping used by `mur agent list` and the agent card.
28    /// Exhaustive match: adding a variant is a compile error here by design.
29    pub fn emoji(&self) -> &'static str {
30        match self {
31            AgentStatusKind::Running => "🟢",
32            AgentStatusKind::Stale => "🟡",
33            AgentStatusKind::Stopped => "⚪",
34        }
35    }
36}
37
38/// Result of classifying an agent's lock state.
39#[derive(Debug, Clone, Copy)]
40pub struct AgentStatus {
41    pub kind: AgentStatusKind,
42    /// PID from the lock file. `None` when no lock or unparseable lock.
43    pub pid: Option<u32>,
44}
45
46/// Read and JSON-parse `<home>/running.lock`. Returns:
47/// - `Ok(None)` if the file does not exist (agent stopped).
48/// - `Ok(Some(_))` if the file exists and parses successfully.
49/// - `Err(_)` if the file exists but I/O fails or JSON is malformed.
50pub fn read(lock_path: &Path) -> std::io::Result<Option<LockFile>> {
51    let bytes = match std::fs::read(lock_path) {
52        Ok(b) => b,
53        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
54        Err(e) => return Err(e),
55    };
56    serde_json::from_slice(&bytes)
57        .map(Some)
58        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
59}
60
61/// Is the given pid currently a live process the calling user can signal?
62///
63/// On Unix uses `kill(pid, 0)` — signal 0 is a no-op probe that checks
64/// process existence and permission without delivering any signal.
65///
66/// On Windows uses `OpenProcess` with `PROCESS_QUERY_LIMITED_INFORMATION`.
67///
68/// On other platforms returns `true` (optimistically treat any present lock
69/// as live, since P0a agents are not supported there).
70#[cfg(unix)]
71pub fn pid_alive(pid: u32) -> bool {
72    // SAFETY: kill(2) with signal 0 delivers no signal; it only checks
73    // process existence and our permission to signal it. Always safe to call.
74    unsafe { libc::kill(pid as libc::pid_t, 0) == 0 }
75}
76
77#[cfg(windows)]
78pub fn pid_alive(pid: u32) -> bool {
79    use windows_sys::Win32::Foundation::CloseHandle;
80    use windows_sys::Win32::System::Threading::{OpenProcess, PROCESS_QUERY_LIMITED_INFORMATION};
81    unsafe {
82        let h = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid);
83        if h.is_null() {
84            return false;
85        }
86        CloseHandle(h);
87        true
88    }
89}
90
91#[cfg(not(any(unix, windows)))]
92pub fn pid_alive(_pid: u32) -> bool {
93    true
94}
95
96/// Classify the agent's running state by inspecting `<home>/running.lock`.
97///
98/// - No lock → `Stopped`
99/// - Lock present, parses, pid alive → `Running`
100/// - Lock present but pid not alive (crash / SIGKILL / OOM) → `Stale`
101/// - Lock present but unparseable / unreadable → `Stale` with `pid: None`
102///   (treat as stale rather than running so dashboards don't paint dead
103///   agents green)
104pub fn classify(lock_path: &Path) -> AgentStatus {
105    match read(lock_path) {
106        Ok(None) => AgentStatus {
107            kind: AgentStatusKind::Stopped,
108            pid: None,
109        },
110        Err(_) => AgentStatus {
111            kind: AgentStatusKind::Stale,
112            pid: None,
113        },
114        Ok(Some(lock)) => {
115            let kind = if pid_alive(lock.pid) {
116                AgentStatusKind::Running
117            } else {
118                AgentStatusKind::Stale
119            };
120            AgentStatus {
121                kind,
122                pid: Some(lock.pid),
123            }
124        }
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use crate::agent::LockTransports;
132
133    #[test]
134    fn status_emoji_mapping_is_stable() {
135        assert_eq!(AgentStatusKind::Running.emoji(), "🟢");
136        assert_eq!(AgentStatusKind::Stale.emoji(), "🟡");
137        assert_eq!(AgentStatusKind::Stopped.emoji(), "⚪");
138    }
139
140    fn make_lock(pid: u32) -> LockFile {
141        LockFile {
142            schema: 1,
143            uuid: "01JQX4TM8Y9K7VQH6B2N3R5DPE".into(),
144            name: "agent_a".into(),
145            pid,
146            ppid: 1,
147            started_at: "2026-04-22T08:00:00Z".into(),
148            binary_version: "mur-agent-runtime 0.1.0".into(),
149            transports: LockTransports {
150                stdio: false,
151                unix_socket: Some("/tmp/x.sock".into()),
152                tcp: None,
153                webhook: None,
154            },
155            card_digest: "sha256:abc".into(),
156            capabilities: vec!["a2a.message.send".into()],
157            build_sha: String::new(),
158            proto_version: 0,
159        }
160    }
161
162    fn write_lock_file(dir: &std::path::Path, pid: u32) -> std::path::PathBuf {
163        let path = dir.join("running.lock");
164        let lock = make_lock(pid);
165        std::fs::write(&path, serde_json::to_vec_pretty(&lock).unwrap()).unwrap();
166        path
167    }
168
169    #[test]
170    fn classify_returns_stopped_when_no_lock() {
171        let tmp = tempfile::tempdir().unwrap();
172        let lock_path = tmp.path().join("running.lock");
173        let status = classify(&lock_path);
174        assert_eq!(status.kind, AgentStatusKind::Stopped);
175        assert_eq!(status.pid, None);
176    }
177
178    #[cfg(unix)]
179    #[test]
180    fn classify_returns_running_when_pid_alive() {
181        let tmp = tempfile::tempdir().unwrap();
182        let lock_path = write_lock_file(tmp.path(), std::process::id());
183        let status = classify(&lock_path);
184        assert_eq!(status.kind, AgentStatusKind::Running);
185        assert_eq!(status.pid, Some(std::process::id()));
186    }
187
188    #[cfg(unix)]
189    #[test]
190    fn classify_returns_stale_when_pid_dead() {
191        let tmp = tempfile::tempdir().unwrap();
192        let dead_pid: u32 = 999_999;
193        let lock_path = write_lock_file(tmp.path(), dead_pid);
194        let status = classify(&lock_path);
195        assert_eq!(status.kind, AgentStatusKind::Stale);
196        assert_eq!(status.pid, Some(dead_pid));
197    }
198
199    #[test]
200    fn classify_returns_stale_when_lock_malformed() {
201        let tmp = tempfile::tempdir().unwrap();
202        let lock_path = tmp.path().join("running.lock");
203        std::fs::write(&lock_path, b"not json").unwrap();
204        let status = classify(&lock_path);
205        assert_eq!(status.kind, AgentStatusKind::Stale);
206        assert_eq!(status.pid, None);
207    }
208
209    #[test]
210    fn read_returns_none_for_missing_file() {
211        let tmp = tempfile::tempdir().unwrap();
212        let lock_path = tmp.path().join("running.lock");
213        let result = read(&lock_path).unwrap();
214        assert!(result.is_none());
215    }
216
217    #[test]
218    fn read_returns_ok_for_valid_lock() {
219        let tmp = tempfile::tempdir().unwrap();
220        let lock_path = write_lock_file(tmp.path(), 42);
221        let result = read(&lock_path).unwrap();
222        assert!(result.is_some());
223        assert_eq!(result.unwrap().pid, 42);
224    }
225
226    #[test]
227    fn read_returns_err_for_malformed_json() {
228        let tmp = tempfile::tempdir().unwrap();
229        let lock_path = tmp.path().join("running.lock");
230        std::fs::write(&lock_path, b"not json").unwrap();
231        let result = read(&lock_path);
232        assert!(result.is_err());
233    }
234
235    #[cfg(unix)]
236    #[test]
237    fn pid_alive_returns_true_for_self() {
238        assert!(pid_alive(std::process::id()));
239    }
240
241    #[cfg(unix)]
242    #[test]
243    fn pid_alive_returns_false_for_dead_pid() {
244        assert!(!pid_alive(999_999));
245    }
246}