sqlite_graphrag/
reaper.rs1use std::time::Duration;
18
19const ORPHAN_MIN_AGE_SECS: u64 = 60;
20const ORPHAN_SCAN_TARGETS: &[&str] = &["claude", "codex"];
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub struct ReaperReport {
24 pub found: usize,
26 pub killed: usize,
28 pub failed: usize,
30 pub elapsed_ms: u64,
32}
33
34pub fn scan_and_kill_orphans() -> ReaperReport {
39 let start = std::time::Instant::now();
40 let mut report = ReaperReport {
41 found: 0,
42 killed: 0,
43 failed: 0,
44 elapsed_ms: 0,
45 };
46
47 #[cfg(unix)]
48 {
49 if let Err(e) = scan_unix(&mut report) {
50 tracing::warn!(target: "reaper", error = %e, "orphan scan failed");
51 }
52 }
53
54 #[cfg(not(unix))]
55 {
56 tracing::debug!(target: "reaper", "orphan scan is a no-op on non-Unix platforms");
57 }
58
59 report.elapsed_ms = start.elapsed().as_millis() as u64;
60 if report.killed > 0 {
61 tracing::warn!(
62 target: "reaper",
63 found = report.found,
64 killed = report.killed,
65 failed = report.failed,
66 "reaped orphan LLM subprocesses"
67 );
68 } else {
69 tracing::info!(target: "reaper", found = report.found, "no orphan LLM subprocesses detected");
70 }
71 report
72}
73
74#[cfg(unix)]
75fn scan_unix(report: &mut ReaperReport) -> std::io::Result<()> {
76 use std::fs;
77 use std::path::Path;
78
79 let proc = Path::new("/proc");
80 let entries = fs::read_dir(proc)?;
81 for entry in entries.flatten() {
82 let name = entry.file_name();
83 let Some(name_str) = name.to_str() else {
84 continue;
85 };
86 if !name_str.chars().all(|c| c.is_ascii_digit()) {
87 continue;
88 }
89 let pid: i32 = match name_str.parse() {
90 Ok(p) => p,
91 Err(_) => continue,
92 };
93 if pid == std::process::id() as i32 {
94 continue;
95 }
96
97 let stat_path = entry.path().join("stat");
98 let stat = match fs::read_to_string(&stat_path) {
99 Ok(s) => s,
100 Err(_) => continue,
101 };
102
103 let Some(close_paren) = stat.rfind(')') else {
107 continue;
108 };
109 let after = &stat[close_paren + 1..];
110 let mut parts = after.split_whitespace();
111 let state = parts.next().unwrap_or("");
113 let ppid: i32 = parts.next().and_then(|p| p.parse().ok()).unwrap_or(-1);
114
115 if ppid != 1 {
118 continue;
119 }
120
121 if state.starts_with('Z') {
123 continue;
124 }
125
126 let comm_path = entry.path().join("comm");
130 let comm = match fs::read_to_string(&comm_path) {
131 Ok(s) => s.trim().to_string(),
132 Err(_) => continue,
133 };
134
135 if !ORPHAN_SCAN_TARGETS.iter().any(|t| comm == *t) {
136 continue;
137 }
138
139 let age_ok = check_process_age(pid, ORPHAN_MIN_AGE_SECS);
142 if !age_ok {
143 continue;
144 }
145
146 report.found += 1;
147 match terminate_pid(pid) {
148 Ok(()) => {
149 report.killed += 1;
150 tracing::info!(target: "reaper", pid, comm = %comm, "killed orphan LLM subprocess");
151 }
152 Err(e) => {
153 report.failed += 1;
154 tracing::warn!(target: "reaper", pid, comm = %comm, error = %e, "failed to kill orphan");
155 }
156 }
157 }
158 Ok(())
159}
160
161#[cfg(unix)]
162fn check_process_age(pid: i32, min_age_secs: u64) -> bool {
163 use std::fs;
164 let stat_path = std::path::Path::new("/proc")
167 .join(pid.to_string())
168 .join("stat");
169 let Ok(meta) = fs::metadata(&stat_path) else {
170 return false;
171 };
172 let Ok(modified) = meta.modified() else {
173 return false;
174 };
175 let Ok(elapsed) = std::time::SystemTime::now().duration_since(modified) else {
176 return false;
177 };
178 elapsed >= Duration::from_secs(min_age_secs)
179}
180
181#[cfg(unix)]
182fn terminate_pid(pid: i32) -> std::io::Result<()> {
183 let rc = unsafe { libc::kill(pid, libc::SIGTERM) };
187 if rc == 0 {
188 Ok(())
189 } else {
190 Err(std::io::Error::last_os_error())
191 }
192}
193
194#[cfg(test)]
195mod tests {
196 use super::*;
197
198 #[test]
199 fn reaper_report_starts_zeroed() {
200 let r = ReaperReport {
201 found: 0,
202 killed: 0,
203 failed: 0,
204 elapsed_ms: 0,
205 };
206 assert_eq!(r.found, 0);
207 assert_eq!(r.killed, 0);
208 assert_eq!(r.failed, 0);
209 }
210
211 #[test]
212 fn orphan_min_age_is_one_minute() {
213 assert_eq!(ORPHAN_MIN_AGE_SECS, 60);
217 }
218
219 #[test]
220 fn orphan_targets_include_claude_and_codex() {
221 assert!(ORPHAN_SCAN_TARGETS.contains(&"claude"));
222 assert!(ORPHAN_SCAN_TARGETS.contains(&"codex"));
223 }
224
225 #[test]
226 fn scan_completes_without_panic_on_linux() {
227 let r = scan_and_kill_orphans();
231 assert!(r.elapsed_ms < 30_000, "scan must finish in <30s");
232 }
233}