Skip to main content

mur_common/
lock_file.rs

1//! Canonical `running.lock` reader + 3-state agent status classifier.
2//!
3//! Used by:
4//! - `mur agent list/status` (CLI) — see `mur-core/src/cmd/agent.rs`
5//! - `/api/v1/agents/*` (HTTP) — see `mur-core/src/server_agents/`
6//! - `mur-agent-runtime` supervisor — see `mur-agent-runtime/src/lock_file.rs`
7//!   (the runtime additionally uses `flock`; that check stays local)
8
9use crate::LockFile;
10use serde::Serialize;
11use std::path::Path;
12
13/// Three-state classification of an agent's runtime state.
14#[derive(Serialize, Debug, Clone, Copy, PartialEq, Eq)]
15#[serde(rename_all = "snake_case")]
16pub enum AgentStatusKind {
17    /// Lock present and the recorded pid is alive.
18    Running,
19    /// Lock present but the pid is not alive (crash/kill — orphan lock).
20    Stale,
21    /// No lock file.
22    Stopped,
23}
24
25impl AgentStatusKind {
26    /// Stable visual marker for this status — the single source of truth for
27    /// the status→emoji mapping used by `mur agent list` and the agent card.
28    /// Exhaustive match: adding a variant is a compile error here by design.
29    pub fn emoji(&self) -> &'static str {
30        match self {
31            AgentStatusKind::Running => "🟢",
32            AgentStatusKind::Stale => "🟡",
33            AgentStatusKind::Stopped => "⚪",
34        }
35    }
36}
37
38/// Result of classifying an agent's lock state.
39#[derive(Debug, Clone, Copy)]
40pub struct AgentStatus {
41    pub kind: AgentStatusKind,
42    /// PID from the lock file. `None` when no lock or unparseable lock.
43    pub pid: Option<u32>,
44}
45
46/// Read and JSON-parse `<home>/running.lock`. Returns:
47/// - `Ok(None)` if the file does not exist (agent stopped).
48/// - `Ok(Some(_))` if the file exists and parses successfully.
49/// - `Err(_)` if the file exists but I/O fails or JSON is malformed.
50pub fn read(lock_path: &Path) -> std::io::Result<Option<LockFile>> {
51    let bytes = match std::fs::read(lock_path) {
52        Ok(b) => b,
53        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
54        Err(e) => return Err(e),
55    };
56    serde_json::from_slice(&bytes)
57        .map(Some)
58        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
59}
60
61/// Is the given pid currently a live process the calling user can signal?
62///
63/// On Unix uses `kill(pid, 0)` — signal 0 is a no-op probe that checks
64/// process existence and permission without delivering any signal.
65///
66/// On Windows uses `OpenProcess` with `PROCESS_QUERY_LIMITED_INFORMATION`.
67///
68/// On other platforms returns `true` (optimistically treat any present lock
69/// as live, since P0a agents are not supported there).
70#[cfg(unix)]
71pub fn pid_alive(pid: u32) -> bool {
72    // SAFETY: kill(2) with signal 0 delivers no signal; it only checks
73    // process existence and our permission to signal it. Always safe to call.
74    unsafe { libc::kill(pid as libc::pid_t, 0) == 0 }
75}
76
77#[cfg(windows)]
78pub fn pid_alive(pid: u32) -> bool {
79    use windows_sys::Win32::Foundation::CloseHandle;
80    use windows_sys::Win32::System::Threading::{OpenProcess, PROCESS_QUERY_LIMITED_INFORMATION};
81    unsafe {
82        let h = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid);
83        if h.is_null() {
84            return false;
85        }
86        CloseHandle(h);
87        true
88    }
89}
90
91#[cfg(not(any(unix, windows)))]
92pub fn pid_alive(_pid: u32) -> bool {
93    true
94}
95
96/// Classify the agent's running state by inspecting `<home>/running.lock`.
97///
98/// - No lock → `Stopped`
99/// - Lock present, parses, pid alive → `Running`
100/// - Lock present but pid not alive (crash / SIGKILL / OOM) → `Stale`
101/// - Lock present but unparseable / unreadable → `Stale` with `pid: None`
102///   (treat as stale rather than running so dashboards don't paint dead
103///   agents green)
104pub fn classify(lock_path: &Path) -> AgentStatus {
105    match read(lock_path) {
106        Ok(None) => AgentStatus {
107            kind: AgentStatusKind::Stopped,
108            pid: None,
109        },
110        Err(_) => AgentStatus {
111            kind: AgentStatusKind::Stale,
112            pid: None,
113        },
114        Ok(Some(lock)) => {
115            let kind = if pid_alive(lock.pid) {
116                AgentStatusKind::Running
117            } else {
118                AgentStatusKind::Stale
119            };
120            AgentStatus {
121                kind,
122                pid: Some(lock.pid),
123            }
124        }
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use crate::agent::LockTransports;
132
133    #[test]
134    fn status_emoji_mapping_is_stable() {
135        assert_eq!(AgentStatusKind::Running.emoji(), "🟢");
136        assert_eq!(AgentStatusKind::Stale.emoji(), "🟡");
137        assert_eq!(AgentStatusKind::Stopped.emoji(), "⚪");
138    }
139
140    fn make_lock(pid: u32) -> LockFile {
141        LockFile {
142            schema: 1,
143            uuid: "01JQX4TM8Y9K7VQH6B2N3R5DPE".into(),
144            name: "agent_a".into(),
145            pid,
146            ppid: 1,
147            started_at: "2026-04-22T08:00:00Z".into(),
148            binary_version: "mur-agent-runtime 0.1.0".into(),
149            transports: LockTransports {
150                stdio: false,
151                unix_socket: Some("/tmp/x.sock".into()),
152                tcp: None,
153                webhook: None,
154            },
155            card_digest: "sha256:abc".into(),
156            capabilities: vec!["a2a.message.send".into()],
157        }
158    }
159
160    fn write_lock_file(dir: &std::path::Path, pid: u32) -> std::path::PathBuf {
161        let path = dir.join("running.lock");
162        let lock = make_lock(pid);
163        std::fs::write(&path, serde_json::to_vec_pretty(&lock).unwrap()).unwrap();
164        path
165    }
166
167    #[test]
168    fn classify_returns_stopped_when_no_lock() {
169        let tmp = tempfile::tempdir().unwrap();
170        let lock_path = tmp.path().join("running.lock");
171        let status = classify(&lock_path);
172        assert_eq!(status.kind, AgentStatusKind::Stopped);
173        assert_eq!(status.pid, None);
174    }
175
176    #[cfg(unix)]
177    #[test]
178    fn classify_returns_running_when_pid_alive() {
179        let tmp = tempfile::tempdir().unwrap();
180        let lock_path = write_lock_file(tmp.path(), std::process::id());
181        let status = classify(&lock_path);
182        assert_eq!(status.kind, AgentStatusKind::Running);
183        assert_eq!(status.pid, Some(std::process::id()));
184    }
185
186    #[cfg(unix)]
187    #[test]
188    fn classify_returns_stale_when_pid_dead() {
189        let tmp = tempfile::tempdir().unwrap();
190        let dead_pid: u32 = 999_999;
191        let lock_path = write_lock_file(tmp.path(), dead_pid);
192        let status = classify(&lock_path);
193        assert_eq!(status.kind, AgentStatusKind::Stale);
194        assert_eq!(status.pid, Some(dead_pid));
195    }
196
197    #[test]
198    fn classify_returns_stale_when_lock_malformed() {
199        let tmp = tempfile::tempdir().unwrap();
200        let lock_path = tmp.path().join("running.lock");
201        std::fs::write(&lock_path, b"not json").unwrap();
202        let status = classify(&lock_path);
203        assert_eq!(status.kind, AgentStatusKind::Stale);
204        assert_eq!(status.pid, None);
205    }
206
207    #[test]
208    fn read_returns_none_for_missing_file() {
209        let tmp = tempfile::tempdir().unwrap();
210        let lock_path = tmp.path().join("running.lock");
211        let result = read(&lock_path).unwrap();
212        assert!(result.is_none());
213    }
214
215    #[test]
216    fn read_returns_ok_for_valid_lock() {
217        let tmp = tempfile::tempdir().unwrap();
218        let lock_path = write_lock_file(tmp.path(), 42);
219        let result = read(&lock_path).unwrap();
220        assert!(result.is_some());
221        assert_eq!(result.unwrap().pid, 42);
222    }
223
224    #[test]
225    fn read_returns_err_for_malformed_json() {
226        let tmp = tempfile::tempdir().unwrap();
227        let lock_path = tmp.path().join("running.lock");
228        std::fs::write(&lock_path, b"not json").unwrap();
229        let result = read(&lock_path);
230        assert!(result.is_err());
231    }
232
233    #[cfg(unix)]
234    #[test]
235    fn pid_alive_returns_true_for_self() {
236        assert!(pid_alive(std::process::id()));
237    }
238
239    #[cfg(unix)]
240    #[test]
241    fn pid_alive_returns_false_for_dead_pid() {
242        assert!(!pid_alive(999_999));
243    }
244}