Skip to main content

systemprompt_models/
subprocess.rs

1//! Environment-marker contract between the supervisor and the detached agent
2//! and MCP children it spawns.
3//!
4//! The supervisor stamps these markers at spawn time; shutdown and
5//! reconciliation read them back from `/proc/<pid>/environ` to confirm a
6//! registry PID still names *this* installation's child before signalling it.
7//! PIDs are recycled, and group-signalling a stale PID (`kill(-pid)`) could
8//! reach an unrelated session leader — so a row is only ever signalled once
9//! both the subprocess marker and the exact `name_key=service_name` pairing
10//! are found.
11
12pub const SUBPROCESS_MARKER_ENV: &str = "SYSTEMPROMPT_SUBPROCESS";
13pub const AGENT_NAME_ENV: &str = "AGENT_NAME";
14pub const MCP_SERVICE_ID_ENV: &str = "MCP_SERVICE_ID";
15
16/// Convert an OS process id into the signed form `kill(2)` expects, rejecting
17/// any value that would target more than that single process.
18///
19/// A `u32` above `i32::MAX` wraps to a negative `i32`, and `kill(2)` reads a
20/// negative pid as a *process group* — `-1` broadcasts to **every** process the
21/// caller may signal, and `0` means the caller's own group. Routing every pid
22/// through this guard turns those cases into a no-op (`None`) instead of
23/// letting a single-PID request escalate into a group or session-wide kill.
24#[must_use]
25pub fn signalable_pid(pid: u32) -> Option<i32> {
26    if pid == 0 {
27        return None;
28    }
29    i32::try_from(pid).ok()
30}
31
32#[must_use]
33pub fn environ_identifies_child(environ: &[u8], name_key: &str, service_name: &str) -> bool {
34    let marker = format!("{SUBPROCESS_MARKER_ENV}=1");
35    let expected_name = format!("{name_key}={service_name}");
36
37    let mut has_marker = false;
38    let mut has_name = false;
39    for entry in environ.split(|&b| b == 0) {
40        if entry == marker.as_bytes() {
41            has_marker = true;
42        } else if entry == expected_name.as_bytes() {
43            has_name = true;
44        }
45    }
46
47    has_marker && has_name
48}
49
50/// Confirm a *live* PID still names this installation's child by reading its
51/// `/proc/<pid>/environ` and matching the spawn markers.
52///
53/// Fail-closed: an unreadable environ — or any non-Linux target, where
54/// `/proc` does not exist — yields `false`, so an unverified PID is never
55/// signalled. Callers must use this before any `kill`/`kill(-pid)` on a PID
56/// loaded from the persisted service registry, because those PIDs outlive the
57/// processes that minted them and are recycled by the kernel.
58#[cfg(target_os = "linux")]
59#[must_use]
60pub fn live_pid_is_subprocess(pid: u32, name_key: &str, service_name: &str) -> bool {
61    match std::fs::read(format!("/proc/{pid}/environ")) {
62        Ok(environ) => environ_identifies_child(&environ, name_key, service_name),
63        Err(e) => {
64            tracing::warn!(pid, error = %e, "Could not read process environ to verify child identity");
65            false
66        },
67    }
68}
69
70#[cfg(not(target_os = "linux"))]
71#[must_use]
72pub fn live_pid_is_subprocess(_pid: u32, _name_key: &str, _service_name: &str) -> bool {
73    false
74}
75
76/// Reports whether `pid` is a zombie — terminated but not yet reaped.
77///
78/// The supervisor never reaps the children it spawns (their `Child` handle is
79/// forgotten), so a terminated child still answers `kill(pid, 0)`; liveness and
80/// shutdown probes must consult this to avoid treating a dead child as alive.
81/// Non-Linux targets have no `/proc` and always return `false`.
82#[cfg(target_os = "linux")]
83#[must_use]
84pub fn is_zombie(pid: u32) -> bool {
85    let Ok(stat) = std::fs::read_to_string(format!("/proc/{pid}/stat")) else {
86        return false;
87    };
88    // The comm field is parenthesised and may contain spaces or `)`, so the
89    // state char is the first token after the final `)`.
90    let Some((_, after_comm)) = stat.rsplit_once(')') else {
91        return false;
92    };
93    after_comm.split_whitespace().next() == Some("Z")
94}
95
96#[cfg(not(target_os = "linux"))]
97#[must_use]
98pub fn is_zombie(_pid: u32) -> bool {
99    false
100}