sqlite_graphrag/
reaper.rs1#[cfg(unix)]
23use std::time::Duration;
24
25#[cfg(unix)]
26const ORPHAN_MIN_AGE_SECS: u64 = 60;
27
28#[cfg(unix)]
29const ORPHAN_SCAN_TARGETS: &[&str] = &["claude", "codex"];
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub struct ReaperReport {
33 pub found: usize,
35 pub killed: usize,
37 pub failed: usize,
39 pub elapsed_ms: u64,
41}
42
43pub fn scan_and_kill_orphans() -> ReaperReport {
48 let start = std::time::Instant::now();
49 let mut report = ReaperReport {
50 found: 0,
51 killed: 0,
52 failed: 0,
53 elapsed_ms: 0,
54 };
55
56 #[cfg(unix)]
57 {
58 if let Err(e) = scan_unix(&mut report) {
59 tracing::warn!(target: "reaper", error = %e, "orphan scan failed");
60 }
61 clean_stale_codex_homes();
64 }
65
66 #[cfg(not(unix))]
67 {
68 tracing::debug!(target: "reaper", "orphan scan is a no-op on non-Unix platforms");
69 }
70
71 report.elapsed_ms = start.elapsed().as_millis() as u64;
72 if report.killed > 0 {
73 tracing::warn!(
74 target: "reaper",
75 found = report.found,
76 killed = report.killed,
77 failed = report.failed,
78 "reaped orphan LLM subprocesses"
79 );
80 } else {
81 tracing::info!(target: "reaper", found = report.found, "no orphan LLM subprocesses detected");
82 }
83 report
84}
85
86#[cfg(unix)]
87fn scan_unix(report: &mut ReaperReport) -> std::io::Result<()> {
88 use std::fs;
89 use std::path::Path;
90
91 let proc = Path::new("/proc");
92 let entries = fs::read_dir(proc)?;
93 for entry in entries.flatten() {
94 let name = entry.file_name();
95 let Some(name_str) = name.to_str() else {
96 continue;
97 };
98 if !name_str.chars().all(|c| c.is_ascii_digit()) {
99 continue;
100 }
101 let pid: i32 = match name_str.parse() {
102 Ok(p) => p,
103 Err(_) => continue,
104 };
105 if pid == std::process::id() as i32 {
106 continue;
107 }
108
109 let stat_path = entry.path().join("stat");
110 let stat = match fs::read_to_string(&stat_path) {
111 Ok(s) => s,
112 Err(_) => continue,
113 };
114
115 let Some(close_paren) = stat.rfind(')') else {
119 continue;
120 };
121 let after = &stat[close_paren + 1..];
122 let mut parts = after.split_whitespace();
123 let state = parts.next().unwrap_or("");
125 let ppid: i32 = parts.next().and_then(|p| p.parse().ok()).unwrap_or(-1);
126
127 if ppid != 1 {
130 continue;
131 }
132
133 if state.starts_with('Z') {
135 continue;
136 }
137
138 let comm_path = entry.path().join("comm");
142 let comm = match fs::read_to_string(&comm_path) {
143 Ok(s) => s.trim().to_string(),
144 Err(_) => continue,
145 };
146
147 if !ORPHAN_SCAN_TARGETS.iter().any(|t| comm == *t) {
148 continue;
149 }
150
151 let age_ok = check_process_age(pid, ORPHAN_MIN_AGE_SECS);
154 if !age_ok {
155 continue;
156 }
157
158 report.found += 1;
159 match terminate_pid(pid) {
160 Ok(()) => {
161 report.killed += 1;
162 tracing::info!(target: "reaper", pid, comm = %comm, "killed orphan LLM subprocess");
163 }
164 Err(e) => {
165 report.failed += 1;
166 tracing::warn!(target: "reaper", pid, comm = %comm, error = %e, "failed to kill orphan");
167 }
168 }
169 }
170 Ok(())
171}
172
173#[cfg(unix)]
174fn check_process_age(pid: i32, min_age_secs: u64) -> bool {
175 use std::fs;
176 let stat_path = std::path::Path::new("/proc")
179 .join(pid.to_string())
180 .join("stat");
181 let Ok(meta) = fs::metadata(&stat_path) else {
182 return false;
183 };
184 let Ok(modified) = meta.modified() else {
185 return false;
186 };
187 let Ok(elapsed) = std::time::SystemTime::now().duration_since(modified) else {
188 return false;
189 };
190 elapsed >= Duration::from_secs(min_age_secs)
191}
192
193#[cfg(unix)]
205fn clean_stale_codex_homes() {
206 let Ok(home) = std::env::var("HOME") else {
207 return;
208 };
209 let base = std::path::Path::new(&home).join(".local/share/sqlite-graphrag");
210 let Ok(entries) = std::fs::read_dir(&base) else {
211 return;
212 };
213 let mut removed = 0usize;
214 for entry in entries.flatten() {
215 let name = entry.file_name();
216 let Some(name_str) = name.to_str() else {
217 continue;
218 };
219 let Some(pid_str) = name_str.strip_prefix("codex-home-") else {
220 continue;
221 };
222 let Ok(pid) = pid_str.parse::<i32>() else {
223 continue;
224 };
225 if pid == std::process::id() as i32 {
226 continue;
227 }
228 let alive = unsafe { libc::kill(pid, 0) } == 0
231 || std::io::Error::last_os_error().raw_os_error() != Some(libc::ESRCH);
232 if alive {
233 continue;
234 }
235 if std::fs::remove_dir_all(entry.path()).is_ok() {
236 removed += 1;
237 }
238 }
239 if removed > 0 {
240 tracing::info!(target: "reaper", removed, "removed stale codex-home isolation dirs");
241 }
242}
243
244#[cfg(unix)]
245fn terminate_pid(pid: i32) -> std::io::Result<()> {
246 let rc = unsafe { libc::kill(pid, libc::SIGTERM) };
250 if rc == 0 {
251 Ok(())
252 } else {
253 Err(std::io::Error::last_os_error())
254 }
255}
256
257#[cfg(test)]
258mod tests {
259 use super::*;
260
261 #[test]
262 fn reaper_report_starts_zeroed() {
263 let r = ReaperReport {
264 found: 0,
265 killed: 0,
266 failed: 0,
267 elapsed_ms: 0,
268 };
269 assert_eq!(r.found, 0);
270 assert_eq!(r.killed, 0);
271 assert_eq!(r.failed, 0);
272 }
273
274 #[cfg(unix)]
275 #[test]
276 fn orphan_min_age_is_one_minute() {
277 assert_eq!(ORPHAN_MIN_AGE_SECS, 60);
281 }
282
283 #[cfg(unix)]
284 #[test]
285 fn orphan_targets_include_claude_and_codex() {
286 assert!(ORPHAN_SCAN_TARGETS.contains(&"claude"));
287 assert!(ORPHAN_SCAN_TARGETS.contains(&"codex"));
288 }
289
290 #[test]
291 fn scan_completes_without_panic_on_linux() {
292 let r = scan_and_kill_orphans();
296 assert!(r.elapsed_ms < 30_000, "scan must finish in <30s");
297 }
298}