Skip to main content

rag_rat_core/
fleet.rs

1//! Fleet hot-upgrade trigger.
2//!
3//! When a new `rag-rat` binary lands at the configured install path (an atomic `cargo install`
4//! rename), the elected watcher signals every still-old, hot-upgrade-armed `rag-rat mcp` server
5//! — including this process, last — with `SIGUSR1`, so each `exec`s the new binary at its own
6//! request boundary. Linux-only (it walks `/proc`); a no-op elsewhere.
7//!
8//! Targeting is deliberately conservative. A process is signaled only if it is (a) running *our*
9//! binary, (b) the `mcp` subcommand, (c) hot-upgrade-armed (its environ carries
10//! [`UPGRADE_BIN_ENV`], i.e. it has a `SIGUSR1` handler installed), and (d) on an outdated binary
11//! (its exe inode differs from the inode now installed). The environ check is the safety
12//! interlock: without it a `SIGUSR1` to an un-armed server would hit the default disposition and
13//! terminate it.
14
15use std::path::Path;
16
17/// Env var naming the installed-binary path; presence in a process's environ means it armed the
18/// hot-upgrade `SIGUSR1` handler. Shared contract with the MCP server (`rag-rat-mcp` re-exports
19/// this constant) and used here to read other processes' environ.
20pub const UPGRADE_BIN_ENV: &str = "RAG_RAT_UPGRADE_BIN";
21
22/// A candidate process discovered under `/proc`, reduced to what target selection needs.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub(crate) struct ProcInfo {
25    pub pid: i32,
26    /// Inode of the binary image the process is executing (`/proc/<pid>/exe`).
27    pub exe_inode: u64,
28    pub is_self: bool,
29    /// Our binary, running `mcp`, with the hot-upgrade env armed.
30    pub eligible: bool,
31}
32
33/// Choose which PIDs to `SIGUSR1`, ordered so this process upgrades **last** (others ascending,
34/// self appended). A process is a target iff it is [`ProcInfo::eligible`] and running an outdated
35/// binary (`exe_inode != installed_inode`). Pure, so it is unit-testable without `/proc`.
36pub(crate) fn select_targets(procs: &[ProcInfo], installed_inode: u64) -> Vec<i32> {
37    let mut others: Vec<i32> = Vec::new();
38    let mut own: Option<i32> = None;
39    for proc in procs {
40        if !proc.eligible || proc.exe_inode == installed_inode {
41            continue;
42        }
43        if proc.is_self {
44            own = Some(proc.pid);
45        } else {
46            others.push(proc.pid);
47        }
48    }
49    others.sort_unstable();
50    others.extend(own); // self last
51    others
52}
53
54/// Signal the fleet to hot-upgrade to the binary now at `install_path`. Best-effort and
55/// non-blocking; failures (unreadable `/proc`, vanished PIDs) are skipped silently.
56#[cfg(target_os = "linux")]
57pub fn trigger(install_path: &Path) {
58    let Some(installed_inode) = linux::inode(install_path) else {
59        return;
60    };
61    let Some(bin_name) = install_path.file_name() else {
62        return;
63    };
64    let procs = linux::scan_proc(bin_name);
65    for pid in select_targets(&procs, installed_inode) {
66        linux::send_sigusr1(pid);
67    }
68}
69
70#[cfg(not(target_os = "linux"))]
71pub fn trigger(_install_path: &Path) {}
72
73#[cfg(target_os = "linux")]
74mod linux {
75    use std::{
76        ffi::OsStr,
77        fs,
78        os::unix::{ffi::OsStrExt, fs::MetadataExt},
79        path::Path,
80    };
81
82    use super::{ProcInfo, UPGRADE_BIN_ENV};
83
84    pub(super) fn inode(path: &Path) -> Option<u64> {
85        fs::metadata(path).ok().map(|meta| meta.ino())
86    }
87
88    /// Walk `/proc/<pid>` and build a [`ProcInfo`] for every numeric PID, classifying eligibility.
89    pub(super) fn scan_proc(bin_name: &OsStr) -> Vec<ProcInfo> {
90        let self_pid = std::process::id() as i32;
91        let Ok(entries) = fs::read_dir("/proc") else {
92            return Vec::new();
93        };
94        entries
95            .flatten()
96            .filter_map(|entry| {
97                let pid: i32 = entry.file_name().to_str()?.parse().ok()?;
98                let proc_dir = entry.path();
99                // Inode of the running image; `/proc/<pid>/exe` follows to the file even when it
100                // was unlinked by the install rename (the classic "(deleted)" case).
101                let exe_inode = fs::metadata(proc_dir.join("exe")).ok()?.ino();
102                Some(ProcInfo {
103                    pid,
104                    exe_inode,
105                    is_self: pid == self_pid,
106                    eligible: is_eligible(&proc_dir, bin_name),
107                })
108            })
109            .collect()
110    }
111
112    /// Our binary (`argv[0]` basename matches), running the `mcp` subcommand, with the hot-upgrade
113    /// env armed. Any unreadable bit (permission, race) makes the process ineligible — fail safe.
114    fn is_eligible(proc_dir: &Path, bin_name: &OsStr) -> bool {
115        runs_our_mcp(proc_dir, bin_name) && has_upgrade_env(proc_dir)
116    }
117
118    fn runs_our_mcp(proc_dir: &Path, bin_name: &OsStr) -> bool {
119        let Ok(cmdline) = fs::read(proc_dir.join("cmdline")) else {
120            return false;
121        };
122        // `/proc/<pid>/cmdline` is NUL-separated argv.
123        let mut args = cmdline.split(|&byte| byte == 0).filter(|arg| !arg.is_empty());
124        let Some(argv0) = args.next() else {
125            return false;
126        };
127        let argv0_name = Path::new(OsStr::from_bytes(argv0)).file_name();
128        let is_our_binary = argv0_name == Some(bin_name);
129        let runs_mcp = args.any(|arg| arg == b"mcp");
130        is_our_binary && runs_mcp
131    }
132
133    fn has_upgrade_env(proc_dir: &Path) -> bool {
134        let Ok(environ) = fs::read(proc_dir.join("environ")) else {
135            return false; // environ is unreadable across uids — fail safe.
136        };
137        let needle = format!("{UPGRADE_BIN_ENV}=");
138        environ.split(|&byte| byte == 0).any(|entry| entry.starts_with(needle.as_bytes()))
139    }
140
141    pub(super) fn send_sigusr1(pid: i32) {
142        // SAFETY: `kill(2)` with a valid signal number; an invalid/vanished PID just returns an
143        // error we ignore. No memory is touched.
144        unsafe {
145            libc::kill(pid, libc::SIGUSR1);
146        }
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    fn proc(pid: i32, exe_inode: u64, is_self: bool, eligible: bool) -> ProcInfo {
155        ProcInfo { pid, exe_inode, is_self, eligible }
156    }
157
158    #[test]
159    fn selects_only_outdated_eligible_processes() {
160        let installed = 100;
161        let procs = vec![
162            proc(10, 99, false, true),  // eligible + outdated  → target
163            proc(11, 100, false, true), // eligible but already new → skip
164            proc(12, 99, false, false), // outdated but ineligible → skip
165            proc(13, 42, false, true),  // eligible + outdated  → target
166        ];
167        assert_eq!(select_targets(&procs, installed), vec![10, 13]);
168    }
169
170    #[test]
171    fn self_is_signaled_last() {
172        let installed = 100;
173        let procs = vec![
174            proc(7, 1, true, true),   // self, outdated
175            proc(30, 1, false, true), // other, outdated
176            proc(20, 1, false, true), // other, outdated
177        ];
178        // Others ascending, then self.
179        assert_eq!(select_targets(&procs, installed), vec![20, 30, 7]);
180    }
181
182    #[test]
183    fn empty_when_nothing_outdated_or_eligible() {
184        let installed = 5;
185        let procs = vec![proc(1, 5, false, true), proc(2, 4, false, false)];
186        assert!(select_targets(&procs, installed).is_empty());
187    }
188}