sqlite_graphrag/commands/
claude_runner.rs1use crate::errors::AppError;
7use std::path::Path;
8use std::process::{Command, Stdio};
9
10const MIN_CLAUDE_VERSION: &str = "2.1.0";
12
13const ENV_WHITELIST: &[&str] = &[
15 "PATH",
16 "HOME",
17 "USER",
18 "SHELL",
19 "TERM",
20 "LANG",
21 "XDG_CONFIG_HOME",
22 "XDG_DATA_HOME",
23 "XDG_RUNTIME_DIR",
24 "CLAUDE_CONFIG_DIR",
31 "TMPDIR",
32 "TMP",
33 "TEMP",
34 "DYLD_FALLBACK_LIBRARY_PATH",
35];
36
37#[cfg(windows)]
39const ENV_WHITELIST_WINDOWS: &[&str] = &[
40 "LOCALAPPDATA",
41 "APPDATA",
42 "USERPROFILE",
43 "SystemRoot",
44 "COMSPEC",
45 "PATHEXT",
46 "HOMEPATH",
47 "HOMEDRIVE",
48];
49
50#[cfg(target_os = "linux")]
52const DEFAULT_SUBPROCESS_MEMORY_LIMIT_MB: u64 = 4096;
53
54#[cfg(target_os = "linux")]
79pub fn spawn_with_memory_limit(cmd: &mut Command) -> std::io::Result<std::process::Child> {
80 use std::os::unix::process::CommandExt;
81 let max_mb: u64 = std::env::var("SQLITE_GRAPHRAG_SUBPROCESS_MEMORY_LIMIT_MB")
82 .ok()
83 .and_then(|v| v.parse().ok())
84 .unwrap_or(DEFAULT_SUBPROCESS_MEMORY_LIMIT_MB);
85 let max_bytes = max_mb * 1024 * 1024;
86 unsafe {
93 cmd.pre_exec(move || {
94 let sid = libc::setsid();
95 if sid == -1 {
96 let err = std::io::Error::last_os_error();
97 if err.raw_os_error() != Some(libc::EPERM) {
98 return Err(err);
99 }
100 }
101 let limit = libc::rlimit {
102 rlim_cur: max_bytes,
103 rlim_max: max_bytes,
104 };
105 if libc::setrlimit(libc::RLIMIT_AS, &limit) != 0 {
106 return Err(std::io::Error::last_os_error());
107 }
108 Ok(())
109 });
110 }
111 tracing::debug!(
112 target: "process",
113 program = ?cmd.get_program(),
114 args = ?cmd.get_args().collect::<Vec<_>>(),
115 "spawning external process"
116 );
117 cmd.spawn()
118}
119
120#[cfg(not(target_os = "linux"))]
123pub fn spawn_with_memory_limit(cmd: &mut Command) -> std::io::Result<std::process::Child> {
124 #[cfg(unix)]
125 {
126 use std::os::unix::process::CommandExt;
127 unsafe {
130 cmd.pre_exec(|| {
131 let sid = libc::setsid();
132 if sid == -1 {
133 let err = std::io::Error::last_os_error();
134 if err.raw_os_error() != Some(libc::EPERM) {
135 return Err(err);
136 }
137 }
138 Ok(())
139 });
140 }
141 }
142 tracing::debug!(
143 target: "process",
144 program = ?cmd.get_program(),
145 args = ?cmd.get_args().collect::<Vec<_>>(),
146 "spawning external process"
147 );
148 cmd.spawn()
149}
150
151#[derive(Debug, serde::Deserialize)]
153pub struct ClaudeOutputElement {
154 pub r#type: Option<String>,
155 pub subtype: Option<String>,
156 #[serde(default)]
157 pub is_error: bool,
158 pub structured_output: Option<serde_json::Value>,
159 pub result: Option<String>,
160 pub total_cost_usd: Option<f64>,
161 pub error: Option<String>,
162 pub terminal_reason: Option<String>,
163 #[serde(rename = "apiKeySource")]
164 pub api_key_source: Option<String>,
165}
166
167#[derive(Debug)]
169pub struct ClaudeResult {
170 pub value: serde_json::Value,
171 pub cost_usd: f64,
172 pub is_oauth: bool,
173}
174
175pub fn validate_claude_version(binary: &Path) -> Result<String, AppError> {
177 let resolved = which::which(binary).map_err(|_| {
178 AppError::Validation(format!(
179 "executable '{}' not found in PATH; ensure it is installed and accessible",
180 binary.display()
181 ))
182 })?;
183 let output = Command::new(&resolved)
184 .arg("--version")
185 .stdin(Stdio::null())
186 .stdout(Stdio::piped())
187 .stderr(Stdio::piped())
188 .output()
189 .map_err(AppError::Io)?;
190
191 if !output.status.success() {
192 return Err(AppError::Validation(
193 "failed to run 'claude --version'".to_string(),
194 ));
195 }
196
197 let version_str = String::from_utf8(output.stdout)
198 .map_err(|_| AppError::Validation("claude --version output is not UTF-8".to_string()))?;
199 let version = version_str.trim().to_string();
200 let numeric = version.split([' ', '(']).next().unwrap_or("").trim();
201
202 fn parse_semver(s: &str) -> Option<(u64, u64, u64)> {
203 let parts: Vec<&str> = s.splitn(3, '.').collect();
204 if parts.len() < 2 {
205 return None;
206 }
207 let major = parts[0].parse::<u64>().ok()?;
208 let minor = parts[1].parse::<u64>().ok()?;
209 let patch = parts
210 .get(2)
211 .and_then(|p| p.parse::<u64>().ok())
212 .unwrap_or(0);
213 Some((major, minor, patch))
214 }
215
216 if let (Some(actual), Some(min)) = (parse_semver(numeric), parse_semver(MIN_CLAUDE_VERSION)) {
217 if actual < min {
218 return Err(AppError::Validation(format!(
219 "Claude Code version {numeric} is below minimum required {MIN_CLAUDE_VERSION}"
220 )));
221 }
222 }
223
224 Ok(version)
225}
226
227pub fn build_claude_command(
264 binary: &Path,
265 prompt: &str,
266 json_schema: &str,
267 model: Option<&str>,
268 max_turns: u32,
269) -> Command {
270 if let Ok(_key) = std::env::var("ANTHROPIC_API_KEY") {
274 let mut cmd = Command::new("false");
278 cmd.env_clear();
279 cmd.env("PATH", "/nonexistent");
280 cmd.arg("--oauth-only-violation-anthropic-api-key-set");
281 return cmd;
282 }
283
284 let mut cmd = Command::new(binary);
285
286 cmd.env_clear();
287 for var in ENV_WHITELIST {
288 if let Ok(val) = std::env::var(var) {
289 cmd.env(var, val);
290 }
291 }
292
293 #[cfg(windows)]
294 for var in ENV_WHITELIST_WINDOWS {
295 if let Ok(val) = std::env::var(var) {
296 cmd.env(var, val);
297 }
298 }
299
300 if let Ok(empty_dir) = std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR") {
303 if std::path::Path::new(&empty_dir).is_dir() {
304 cmd.env("CLAUDE_CONFIG_DIR", &empty_dir);
305 tracing::debug!(
306 target: "claude_runner",
307 "isolating claude subprocess to CLAUDE_CONFIG_DIR={}",
308 empty_dir
309 );
310 } else {
311 tracing::warn!(
312 target: "claude_runner",
313 path = %empty_dir,
314 "SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR is set but path is not a directory; \
315 ignoring. MCP isolation will NOT be applied."
316 );
317 }
318 }
319
320 cmd.arg("-p")
323 .arg(prompt)
324 .arg("--strict-mcp-config")
325 .arg("--mcp-config")
326 .arg("{}")
327 .arg("--dangerously-skip-permissions")
328 .arg("--settings")
329 .arg(r#"{"hooks":{}}"#)
330 .arg("--output-format")
331 .arg("json")
332 .arg("--json-schema")
333 .arg(json_schema)
334 .arg("--max-turns")
335 .arg(max_turns.to_string())
336 .arg("--no-session-persistence");
337
338 if let Some(m) = model {
339 cmd.arg("--model").arg(m);
340 }
341
342 cmd.stdin(Stdio::null())
343 .stdout(Stdio::piped())
344 .stderr(Stdio::piped());
345
346 cmd
347}
348
349pub fn parse_claude_output(stdout: &str) -> Result<ClaudeResult, AppError> {
354 let elements: Vec<ClaudeOutputElement> = serde_json::from_str(stdout).map_err(|e| {
355 AppError::Validation(format!("failed to parse claude output as JSON array: {e}"))
356 })?;
357
358 let is_oauth = elements
359 .iter()
360 .find(|e| e.r#type.as_deref() == Some("system") && e.subtype.as_deref() == Some("init"))
361 .and_then(|e| e.api_key_source.as_deref())
362 .map(|s| s == "none")
363 .unwrap_or(false);
364
365 let result_elem = elements
366 .iter()
367 .find(|e| e.r#type.as_deref() == Some("result"))
368 .ok_or_else(|| {
369 AppError::Validation("claude output missing 'result' element".to_string())
370 })?;
371
372 if result_elem.terminal_reason.as_deref() == Some("max_turns") {
374 tracing::warn!(
375 target: "claude_runner",
376 "claude -p hit max_turns limit — hooks may have consumed turns"
377 );
378 return Err(AppError::Validation(
379 "claude -p hit max_turns: hooks may be consuming turns; increase --max-turns or disable hooks".to_string(),
380 ));
381 }
382
383 if result_elem.is_error {
384 let err_msg = result_elem
385 .error
386 .as_deref()
387 .or(result_elem.result.as_deref())
388 .unwrap_or("unknown error");
389 if err_msg.contains("rate_limit") || err_msg.contains("overloaded") {
390 return Err(AppError::RateLimited {
391 detail: err_msg.to_string(),
392 });
393 }
394 if err_msg.contains("Not logged in") || err_msg.contains("authentication") {
395 tracing::warn!(
396 target: "claude_runner",
397 "Claude Code authentication failed. Re-authenticate interactively with: claude"
398 );
399 }
400 return Err(AppError::Validation(format!(
401 "claude extraction failed: {err_msg}"
402 )));
403 }
404
405 let value = if let Some(v) = result_elem.structured_output.clone() {
406 v
407 } else if let Some(text) = &result_elem.result {
408 serde_json::from_str(text).map_err(|e| {
409 AppError::Validation(format!("failed to parse claude result field as JSON: {e}"))
410 })?
411 } else {
412 return Err(AppError::Validation(
413 "claude result missing structured_output and result field".into(),
414 ));
415 };
416
417 let cost = result_elem.total_cost_usd.unwrap_or(0.0);
418 Ok(ClaudeResult {
419 value,
420 cost_usd: cost,
421 is_oauth,
422 })
423}
424
425pub fn run_claude(
431 binary: &Path,
432 prompt: &str,
433 json_schema: &str,
434 input_text: &str,
435 model: Option<&str>,
436 timeout_secs: u64,
437 max_turns: u32,
438) -> Result<ClaudeResult, AppError> {
439 use wait_timeout::ChildExt;
440
441 let full_prompt = format!("{prompt}\n\n{input_text}");
442 let mut cmd = build_claude_command(binary, &full_prompt, json_schema, model, max_turns);
443
444 let mut child = spawn_with_memory_limit(&mut cmd).map_err(|e| {
445 AppError::Io(std::io::Error::new(
446 e.kind(),
447 format!("failed to spawn claude: {e}"),
448 ))
449 })?;
450
451 let start = std::time::Instant::now();
452 let timeout = std::time::Duration::from_secs(timeout_secs);
453 let status = child.wait_timeout(timeout).map_err(AppError::Io)?;
454
455 if status.is_none() {
456 #[cfg(unix)]
462 unsafe {
463 libc::kill(child.id() as i32, libc::SIGTERM);
464 }
465 let _ = child.kill();
466 let _ = child.wait();
467 }
468
469 match status {
470 Some(exit_status) => {
471 tracing::debug!(
472 target: "process",
473 exit_code = ?exit_status.code(),
474 elapsed_ms = start.elapsed().as_millis() as u64,
475 "external process completed"
476 );
477
478 let mut stdout_buf = Vec::new();
479 let mut stderr_buf = Vec::new();
480 if let Some(mut out) = child.stdout.take() {
481 std::io::Read::read_to_end(&mut out, &mut stdout_buf).map_err(AppError::Io)?;
482 }
483 if let Some(mut err) = child.stderr.take() {
484 std::io::Read::read_to_end(&mut err, &mut stderr_buf).map_err(AppError::Io)?;
485 }
486
487 let stdout_str = String::from_utf8(stdout_buf)
488 .map_err(|_| AppError::Validation("claude -p stdout is not valid UTF-8".into()))?;
489
490 if !exit_status.success() {
492 if let Ok(result) = parse_claude_output(&stdout_str) {
493 return Ok(result);
494 }
495 let stderr_str = String::from_utf8_lossy(&stderr_buf);
496 if stderr_str.contains("auth") || stderr_str.contains("login") {
497 tracing::warn!(
498 target: "claude_runner",
499 "Claude Code authentication may have failed. Re-authenticate with: claude"
500 );
501 }
502 return Err(AppError::Validation(format!(
503 "claude -p exited with code {:?}: {}",
504 exit_status.code(),
505 stderr_str.trim()
506 )));
507 }
508
509 parse_claude_output(&stdout_str)
510 }
511 None => {
512 tracing::warn!(target: "claude_runner", timeout_secs, "claude -p timed out, terminating");
513 terminate_gracefully(&mut child, 3);
514 Err(AppError::Validation(format!(
515 "claude -p timed out after {timeout_secs} seconds"
516 )))
517 }
518 }
519}
520
521#[cfg(unix)]
523pub fn terminate_gracefully(child: &mut std::process::Child, grace_secs: u64) {
524 use wait_timeout::ChildExt;
525 unsafe {
526 libc::kill(child.id() as i32, libc::SIGTERM);
527 }
528 match child.wait_timeout(std::time::Duration::from_secs(grace_secs)) {
529 Ok(Some(_)) => {}
530 _ => {
531 tracing::warn!(target: "process", pid = child.id(), "child ignored SIGTERM, sending SIGKILL");
532 let _ = child.kill();
533 let _ = child.wait();
534 }
535 }
536}
537
538#[cfg(not(unix))]
540pub fn terminate_gracefully(child: &mut std::process::Child, _grace_secs: u64) {
541 let _ = child.kill();
542 let _ = child.wait();
543}
544
545#[cfg(test)]
546mod tests {
547 use super::*;
548
549 #[test]
550 fn parse_output_detects_max_turns() {
551 let stdout = r#"[{"type":"system","subtype":"init","apiKeySource":"none"},{"type":"result","is_error":false,"terminal_reason":"max_turns","structured_output":{"name":"t"}}]"#;
552 let err = parse_claude_output(stdout).unwrap_err();
553 assert!(
554 format!("{err}").contains("max_turns"),
555 "must detect max_turns in output"
556 );
557 }
558
559 #[test]
560 fn parse_output_extracts_structured_value() {
561 let stdout = r#"[{"type":"system","subtype":"init","apiKeySource":"none"},{"type":"result","is_error":false,"structured_output":{"key":"val"},"total_cost_usd":0.01}]"#;
562 let result = parse_claude_output(stdout).unwrap();
563 assert_eq!(result.value["key"], "val");
564 assert!((result.cost_usd - 0.01).abs() < f64::EPSILON);
565 assert!(result.is_oauth);
566 }
567
568 #[test]
569 fn parse_output_detects_rate_limit() {
570 let stdout = r#"[{"type":"result","is_error":true,"error":"rate_limit exceeded"}]"#;
571 let err = parse_claude_output(stdout).unwrap_err();
572 assert!(
573 matches!(err, AppError::RateLimited { .. }),
574 "expected AppError::RateLimited, got: {err}"
575 );
576 }
577
578 #[test]
582 #[serial_test::serial(env)]
583 fn build_command_oauth_only_mandatory_flags() {
584 unsafe {
586 std::env::remove_var("ANTHROPIC_API_KEY");
587 }
588 let cmd = build_claude_command(
589 std::path::Path::new("/usr/bin/false"),
590 "test prompt",
591 "{}",
592 Some("sonnet"),
593 4,
594 );
595 let args: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
596 assert!(args.contains(&"-p"), "must have -p");
598 assert!(
599 args.contains(&"--strict-mcp-config"),
600 "must have --strict-mcp-config (gaps.md:206)"
601 );
602 assert!(
603 args.contains(&"--mcp-config"),
604 "must have --mcp-config (gaps.md:207)"
605 );
606 assert!(
607 args.contains(&"--dangerously-skip-permissions"),
608 "must have --dangerously-skip-permissions (gaps.md:208)"
609 );
610 assert!(
611 args.contains(&"--settings"),
612 "must have --settings (gaps.md:209)"
613 );
614 assert!(
615 args.contains(&"--output-format"),
616 "must have --output-format json (gaps.md:213)"
617 );
618 assert!(args.contains(&"--json-schema"), "must have --json-schema");
619 assert!(
620 args.contains(&"--max-turns"),
621 "must have --max-turns (gaps.md:212)"
622 );
623 assert!(
624 args.contains(&"--no-session-persistence"),
625 "must have --no-session-persistence"
626 );
627 assert!(
628 args.contains(&"--model"),
629 "must have --model when model is Some"
630 );
631 assert!(
633 !args.contains(&"--bare"),
634 "--bare is PROHIBITED (gaps.md:49)"
635 );
636 }
637
638 #[test]
642 #[serial_test::serial(env)]
643 fn build_command_aborts_when_anthropic_api_key_set() {
644 unsafe {
646 std::env::set_var("ANTHROPIC_API_KEY", "sk-test-violation");
647 }
648 let cmd = build_claude_command(
649 std::path::Path::new("/usr/bin/claude"),
650 "test prompt",
651 "{}",
652 Some("sonnet"),
653 4,
654 );
655 let program = cmd.get_program().to_string_lossy().to_string();
656 let args: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
657 assert_eq!(
658 program, "false",
659 "when ANTHROPIC_API_KEY is set, build_claude_command must abort"
660 );
661 assert!(
662 args.contains(&"--oauth-only-violation-anthropic-api-key-set"),
663 "aborted command must carry violation marker"
664 );
665 unsafe {
666 std::env::remove_var("ANTHROPIC_API_KEY");
667 }
668 }
669}