Skip to main content

sqlite_graphrag/commands/
claude_runner.rs

1//! Shared module for spawning Claude Code (`claude -p`) subprocesses.
2//!
3//! Eliminates duplication between `enrich.rs` and `ingest_claude.rs` (G02).
4//! Detects `terminal_reason: "max_turns"` in the JSON output (G03).
5
6use crate::errors::AppError;
7use std::path::Path;
8use std::process::{Command, Stdio};
9
10/// Minimum Claude Code version required for structured JSON output.
11const MIN_CLAUDE_VERSION: &str = "2.1.0";
12
13/// Environment variables whitelisted for the subprocess.
14const ENV_WHITELIST: &[&str] = &[
15    "PATH",
16    "HOME",
17    "USER",
18    "SHELL",
19    "TERM",
20    "LANG",
21    "XDG_CONFIG_HOME",
22    "XDG_DATA_HOME",
23    "XDG_RUNTIME_DIR",
24    "ANTHROPIC_API_KEY",
25    "CLAUDE_CONFIG_DIR",
26    "TMPDIR",
27    "TMP",
28    "TEMP",
29    "DYLD_FALLBACK_LIBRARY_PATH",
30];
31
32/// Windows-only environment variables.
33#[cfg(windows)]
34const ENV_WHITELIST_WINDOWS: &[&str] = &[
35    "LOCALAPPDATA",
36    "APPDATA",
37    "USERPROFILE",
38    "SystemRoot",
39    "COMSPEC",
40    "PATHEXT",
41    "HOMEPATH",
42    "HOMEDRIVE",
43];
44
45/// Default virtual memory limit for LLM subprocesses (4 GiB).
46const DEFAULT_SUBPROCESS_MEMORY_LIMIT_MB: u64 = 4096;
47
48/// Spawns a command with a virtual memory limit via `setrlimit(RLIMIT_AS)`.
49///
50/// On Linux, applies the limit in a `pre_exec` hook before the child process
51/// starts.  On non-Linux platforms, falls back to an unlimited spawn.
52/// The limit is read from `SQLITE_GRAPHRAG_SUBPROCESS_MEMORY_LIMIT_MB`
53/// (default: 4096 MiB).
54#[cfg(target_os = "linux")]
55pub fn spawn_with_memory_limit(cmd: &mut Command) -> std::io::Result<std::process::Child> {
56    use std::os::unix::process::CommandExt;
57    let max_mb: u64 = std::env::var("SQLITE_GRAPHRAG_SUBPROCESS_MEMORY_LIMIT_MB")
58        .ok()
59        .and_then(|v| v.parse().ok())
60        .unwrap_or(DEFAULT_SUBPROCESS_MEMORY_LIMIT_MB);
61    let max_bytes = max_mb * 1024 * 1024;
62    // SAFETY: pre_exec closure runs between fork() and exec() in the
63    // single-threaded child process — no other threads exist.
64    // libc::setsid and libc::setrlimit are async-signal-safe per POSIX.1-2008 §2.4.3.
65    // RLIMIT_AS limits virtual address space, not physical RSS.
66    // setsid failure with EPERM is tolerated (process already a session leader).
67    // On setrlimit failure, Err(last_os_error()) prevents exec.
68    unsafe {
69        cmd.pre_exec(move || {
70            let sid = libc::setsid();
71            if sid == -1 {
72                let err = std::io::Error::last_os_error();
73                if err.raw_os_error() != Some(libc::EPERM) {
74                    return Err(err);
75                }
76            }
77            let limit = libc::rlimit {
78                rlim_cur: max_bytes,
79                rlim_max: max_bytes,
80            };
81            if libc::setrlimit(libc::RLIMIT_AS, &limit) != 0 {
82                return Err(std::io::Error::last_os_error());
83            }
84            Ok(())
85        });
86    }
87    tracing::debug!(
88        target: "process",
89        program = ?cmd.get_program(),
90        args = ?cmd.get_args().collect::<Vec<_>>(),
91        "spawning external process"
92    );
93    cmd.spawn()
94}
95
96/// Spawns a command without memory limits (non-Linux fallback).
97/// On Unix (macOS, FreeBSD), applies setsid for process group isolation.
98#[cfg(not(target_os = "linux"))]
99pub fn spawn_with_memory_limit(cmd: &mut Command) -> std::io::Result<std::process::Child> {
100    #[cfg(unix)]
101    {
102        use std::os::unix::process::CommandExt;
103        // SAFETY: setsid() is async-signal-safe per POSIX.1-2008 §2.4.3.
104        // Creates independent session for cascade termination.
105        unsafe {
106            cmd.pre_exec(|| {
107                let sid = libc::setsid();
108                if sid == -1 {
109                    let err = std::io::Error::last_os_error();
110                    if err.raw_os_error() != Some(libc::EPERM) {
111                        return Err(err);
112                    }
113                }
114                Ok(())
115            });
116        }
117    }
118    tracing::debug!(
119        target: "process",
120        program = ?cmd.get_program(),
121        args = ?cmd.get_args().collect::<Vec<_>>(),
122        "spawning external process"
123    );
124    cmd.spawn()
125}
126
127/// Parsed output element from `claude -p --output-format json`.
128#[derive(Debug, serde::Deserialize)]
129pub struct ClaudeOutputElement {
130    pub r#type: Option<String>,
131    pub subtype: Option<String>,
132    #[serde(default)]
133    pub is_error: bool,
134    pub structured_output: Option<serde_json::Value>,
135    pub result: Option<String>,
136    pub total_cost_usd: Option<f64>,
137    pub error: Option<String>,
138    pub terminal_reason: Option<String>,
139    #[serde(rename = "apiKeySource")]
140    pub api_key_source: Option<String>,
141}
142
143/// Result of a successful Claude invocation.
144#[derive(Debug)]
145pub struct ClaudeResult {
146    pub value: serde_json::Value,
147    pub cost_usd: f64,
148    pub is_oauth: bool,
149}
150
151/// Validates that the Claude binary meets the minimum version requirement.
152pub fn validate_claude_version(binary: &Path) -> Result<String, AppError> {
153    let resolved = which::which(binary).map_err(|_| {
154        AppError::Validation(format!(
155            "executable '{}' not found in PATH; ensure it is installed and accessible",
156            binary.display()
157        ))
158    })?;
159    let output = Command::new(&resolved)
160        .arg("--version")
161        .stdin(Stdio::null())
162        .stdout(Stdio::piped())
163        .stderr(Stdio::piped())
164        .output()
165        .map_err(AppError::Io)?;
166
167    if !output.status.success() {
168        return Err(AppError::Validation(
169            "failed to run 'claude --version'".to_string(),
170        ));
171    }
172
173    let version_str = String::from_utf8(output.stdout)
174        .map_err(|_| AppError::Validation("claude --version output is not UTF-8".to_string()))?;
175    let version = version_str.trim().to_string();
176    let numeric = version.split([' ', '(']).next().unwrap_or("").trim();
177
178    fn parse_semver(s: &str) -> Option<(u64, u64, u64)> {
179        let parts: Vec<&str> = s.splitn(3, '.').collect();
180        if parts.len() < 2 {
181            return None;
182        }
183        let major = parts[0].parse::<u64>().ok()?;
184        let minor = parts[1].parse::<u64>().ok()?;
185        let patch = parts
186            .get(2)
187            .and_then(|p| p.parse::<u64>().ok())
188            .unwrap_or(0);
189        Some((major, minor, patch))
190    }
191
192    if let (Some(actual), Some(min)) = (parse_semver(numeric), parse_semver(MIN_CLAUDE_VERSION)) {
193        if actual < min {
194            return Err(AppError::Validation(format!(
195                "Claude Code version {numeric} is below minimum required {MIN_CLAUDE_VERSION}"
196            )));
197        }
198    }
199
200    Ok(version)
201}
202
203/// Builds a `Command` for `claude -p` with least-privilege environment.
204///
205/// G28-A (v1.0.68): respects `SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR` as a
206/// directory that exists but is empty; when set, we export it as
207/// `CLAUDE_CONFIG_DIR` so Claude Code loads no user-scoped MCP servers
208/// (and no settings.json hooks).  This cuts the typical 8-10 MCP process
209/// tree to zero.  When the env var is unset, behaviour is unchanged.
210///
211/// We deliberately do NOT pass `--strict-mcp-config` or `--mcp-config '{}'`
212/// because GitHub issue [anthropics/claude-code#10787] documents that
213/// Claude Code CLI ignores both flags and always falls back to
214/// `~/.mcp.json` regardless.  The `CLAUDE_CONFIG_DIR` env var is the only
215/// mechanism upstream actually honours.
216///
217/// [anthropics/claude-code#10787]: https://github.com/anthropics/claude-code/issues/10787
218pub fn build_claude_command(
219    binary: &Path,
220    prompt: &str,
221    json_schema: &str,
222    model: Option<&str>,
223    max_turns: u32,
224) -> Command {
225    let mut cmd = Command::new(binary);
226
227    cmd.env_clear();
228    for var in ENV_WHITELIST {
229        if let Ok(val) = std::env::var(var) {
230            cmd.env(var, val);
231        }
232    }
233
234    #[cfg(windows)]
235    for var in ENV_WHITELIST_WINDOWS {
236        if let Ok(val) = std::env::var(var) {
237            cmd.env(var, val);
238        }
239    }
240
241    // G28-A: if the user has pointed us at an empty config dir, force Claude
242    // Code to use it (which suppresses user-scoped MCP servers and hooks).
243    if let Ok(empty_dir) = std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR") {
244        if std::path::Path::new(&empty_dir).is_dir() {
245            cmd.env("CLAUDE_CONFIG_DIR", &empty_dir);
246            tracing::debug!(
247                target: "claude_runner",
248                "isolating claude subprocess to CLAUDE_CONFIG_DIR={}",
249                empty_dir
250            );
251        } else {
252            tracing::warn!(
253                target: "claude_runner",
254                path = %empty_dir,
255                "SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR is set but path is not a directory; \
256                 ignoring.  MCP isolation will NOT be applied."
257            );
258        }
259    }
260
261    cmd.arg("-p")
262        .arg(prompt)
263        .arg("--output-format")
264        .arg("json")
265        .arg("--json-schema")
266        .arg(json_schema)
267        .arg("--max-turns")
268        .arg(max_turns.to_string())
269        .arg("--no-session-persistence");
270
271    if std::env::var("ANTHROPIC_API_KEY").is_ok() {
272        cmd.arg("--bare");
273    } else {
274        cmd.arg("--dangerously-skip-permissions")
275            .arg("--settings")
276            .arg(r#"{"hooks":{}}"#);
277    }
278
279    if let Some(m) = model {
280        cmd.arg("--model").arg(m);
281    }
282
283    cmd.stdin(Stdio::null())
284        .stdout(Stdio::piped())
285        .stderr(Stdio::piped());
286
287    cmd
288}
289
290/// Parses `claude -p --output-format json` output array.
291///
292/// G03: detects `terminal_reason: "max_turns"` and returns a specific error
293/// instead of a generic failure message.
294pub fn parse_claude_output(stdout: &str) -> Result<ClaudeResult, AppError> {
295    let elements: Vec<ClaudeOutputElement> = serde_json::from_str(stdout).map_err(|e| {
296        AppError::Validation(format!("failed to parse claude output as JSON array: {e}"))
297    })?;
298
299    let is_oauth = elements
300        .iter()
301        .find(|e| e.r#type.as_deref() == Some("system") && e.subtype.as_deref() == Some("init"))
302        .and_then(|e| e.api_key_source.as_deref())
303        .map(|s| s == "none")
304        .unwrap_or(false);
305
306    let result_elem = elements
307        .iter()
308        .find(|e| e.r#type.as_deref() == Some("result"))
309        .ok_or_else(|| {
310            AppError::Validation("claude output missing 'result' element".to_string())
311        })?;
312
313    // G03: detect max_turns exhaustion before checking is_error
314    if result_elem.terminal_reason.as_deref() == Some("max_turns") {
315        tracing::warn!(
316            target: "claude_runner",
317            "claude -p hit max_turns limit — hooks may have consumed turns"
318        );
319        return Err(AppError::Validation(
320            "claude -p hit max_turns: hooks may be consuming turns; increase --max-turns or disable hooks".to_string(),
321        ));
322    }
323
324    if result_elem.is_error {
325        let err_msg = result_elem
326            .error
327            .as_deref()
328            .or(result_elem.result.as_deref())
329            .unwrap_or("unknown error");
330        if err_msg.contains("rate_limit") || err_msg.contains("overloaded") {
331            return Err(AppError::RateLimited {
332                detail: err_msg.to_string(),
333            });
334        }
335        if err_msg.contains("Not logged in") || err_msg.contains("authentication") {
336            tracing::warn!(
337                target: "claude_runner",
338                "Claude Code authentication failed. Re-authenticate interactively with: claude"
339            );
340        }
341        return Err(AppError::Validation(format!(
342            "claude extraction failed: {err_msg}"
343        )));
344    }
345
346    let value = if let Some(v) = result_elem.structured_output.clone() {
347        v
348    } else if let Some(text) = &result_elem.result {
349        serde_json::from_str(text).map_err(|e| {
350            AppError::Validation(format!("failed to parse claude result field as JSON: {e}"))
351        })?
352    } else {
353        return Err(AppError::Validation(
354            "claude result missing structured_output and result field".into(),
355        ));
356    };
357
358    let cost = result_elem.total_cost_usd.unwrap_or(0.0);
359    Ok(ClaudeResult {
360        value,
361        cost_usd: cost,
362        is_oauth,
363    })
364}
365
366/// Calls `claude -p` with prompt and schema, waits with timeout, and parses output.
367///
368/// G03: parses stdout even on non-zero exit to detect `terminal_reason: "max_turns"`.
369pub fn run_claude(
370    binary: &Path,
371    prompt: &str,
372    json_schema: &str,
373    input_text: &str,
374    model: Option<&str>,
375    timeout_secs: u64,
376    max_turns: u32,
377) -> Result<ClaudeResult, AppError> {
378    use wait_timeout::ChildExt;
379
380    let full_prompt = format!("{prompt}\n\n{input_text}");
381    let mut cmd = build_claude_command(binary, &full_prompt, json_schema, model, max_turns);
382
383    let mut child = spawn_with_memory_limit(&mut cmd).map_err(|e| {
384        AppError::Io(std::io::Error::new(
385            e.kind(),
386            format!("failed to spawn claude: {e}"),
387        ))
388    })?;
389
390    let start = std::time::Instant::now();
391    let timeout = std::time::Duration::from_secs(timeout_secs);
392    let status = child.wait_timeout(timeout).map_err(AppError::Io)?;
393
394    match status {
395        Some(exit_status) => {
396            tracing::debug!(
397                target: "process",
398                exit_code = ?exit_status.code(),
399                elapsed_ms = start.elapsed().as_millis() as u64,
400                "external process completed"
401            );
402
403            let mut stdout_buf = Vec::new();
404            let mut stderr_buf = Vec::new();
405            if let Some(mut out) = child.stdout.take() {
406                std::io::Read::read_to_end(&mut out, &mut stdout_buf).map_err(AppError::Io)?;
407            }
408            if let Some(mut err) = child.stderr.take() {
409                std::io::Read::read_to_end(&mut err, &mut stderr_buf).map_err(AppError::Io)?;
410            }
411
412            let stdout_str = String::from_utf8(stdout_buf)
413                .map_err(|_| AppError::Validation("claude -p stdout is not valid UTF-8".into()))?;
414
415            // G03: parse stdout even on failure to detect terminal_reason
416            if !exit_status.success() {
417                if let Ok(result) = parse_claude_output(&stdout_str) {
418                    return Ok(result);
419                }
420                let stderr_str = String::from_utf8_lossy(&stderr_buf);
421                if stderr_str.contains("auth") || stderr_str.contains("login") {
422                    tracing::warn!(
423                        target: "claude_runner",
424                        "Claude Code authentication may have failed. Re-authenticate with: claude"
425                    );
426                }
427                return Err(AppError::Validation(format!(
428                    "claude -p exited with code {:?}: {}",
429                    exit_status.code(),
430                    stderr_str.trim()
431                )));
432            }
433
434            parse_claude_output(&stdout_str)
435        }
436        None => {
437            tracing::warn!(target: "claude_runner", timeout_secs, "claude -p timed out, terminating");
438            terminate_gracefully(&mut child, 3);
439            Err(AppError::Validation(format!(
440                "claude -p timed out after {timeout_secs} seconds"
441            )))
442        }
443    }
444}
445
446/// Terminates a child process gracefully: SIGTERM first, SIGKILL after grace period.
447#[cfg(unix)]
448pub fn terminate_gracefully(child: &mut std::process::Child, grace_secs: u64) {
449    use wait_timeout::ChildExt;
450    unsafe {
451        libc::kill(child.id() as i32, libc::SIGTERM);
452    }
453    match child.wait_timeout(std::time::Duration::from_secs(grace_secs)) {
454        Ok(Some(_)) => {}
455        _ => {
456            tracing::warn!(target: "process", pid = child.id(), "child ignored SIGTERM, sending SIGKILL");
457            let _ = child.kill();
458            let _ = child.wait();
459        }
460    }
461}
462
463/// Non-Unix fallback: kill immediately (Windows TerminateProcess).
464#[cfg(not(unix))]
465pub fn terminate_gracefully(child: &mut std::process::Child, _grace_secs: u64) {
466    let _ = child.kill();
467    let _ = child.wait();
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473
474    #[test]
475    fn parse_output_detects_max_turns() {
476        let stdout = r#"[{"type":"system","subtype":"init","apiKeySource":"none"},{"type":"result","is_error":false,"terminal_reason":"max_turns","structured_output":{"name":"t"}}]"#;
477        let err = parse_claude_output(stdout).unwrap_err();
478        assert!(
479            format!("{err}").contains("max_turns"),
480            "must detect max_turns in output"
481        );
482    }
483
484    #[test]
485    fn parse_output_extracts_structured_value() {
486        let stdout = r#"[{"type":"system","subtype":"init","apiKeySource":"none"},{"type":"result","is_error":false,"structured_output":{"key":"val"},"total_cost_usd":0.01}]"#;
487        let result = parse_claude_output(stdout).unwrap();
488        assert_eq!(result.value["key"], "val");
489        assert!((result.cost_usd - 0.01).abs() < f64::EPSILON);
490        assert!(result.is_oauth);
491    }
492
493    #[test]
494    fn parse_output_detects_rate_limit() {
495        let stdout = r#"[{"type":"result","is_error":true,"error":"rate_limit exceeded"}]"#;
496        let err = parse_claude_output(stdout).unwrap_err();
497        assert!(
498            matches!(err, AppError::RateLimited { .. }),
499            "expected AppError::RateLimited, got: {err}"
500        );
501    }
502}