sqlite_graphrag/commands/
claude_runner.rs1use crate::errors::AppError;
12use crate::spawn::env_whitelist::apply_env_whitelist;
13use std::path::Path;
14use std::process::{Command, Stdio};
15
16const MIN_CLAUDE_VERSION: &str = "2.1.0";
18
19#[cfg(target_os = "linux")]
21const DEFAULT_SUBPROCESS_MEMORY_LIMIT_MB: u64 = 4096;
22
23#[cfg(target_os = "linux")]
48pub fn spawn_with_memory_limit(cmd: &mut Command) -> std::io::Result<std::process::Child> {
49 use std::os::unix::process::CommandExt;
50 let max_mb: u64 = std::env::var("SQLITE_GRAPHRAG_SUBPROCESS_MEMORY_LIMIT_MB")
51 .ok()
52 .and_then(|v| v.parse().ok())
53 .unwrap_or(DEFAULT_SUBPROCESS_MEMORY_LIMIT_MB);
54 let max_bytes = max_mb * 1024 * 1024;
55 unsafe {
62 cmd.pre_exec(move || {
63 let sid = libc::setsid();
64 if sid == -1 {
65 let err = std::io::Error::last_os_error();
66 if err.raw_os_error() != Some(libc::EPERM) {
67 return Err(err);
68 }
69 }
70 let limit = libc::rlimit {
71 rlim_cur: max_bytes,
72 rlim_max: max_bytes,
73 };
74 if libc::setrlimit(libc::RLIMIT_AS, &limit) != 0 {
75 return Err(std::io::Error::last_os_error());
76 }
77 Ok(())
78 });
79 }
80 tracing::debug!(
81 target: "process",
82 program = ?cmd.get_program(),
83 args = ?cmd.get_args().collect::<Vec<_>>(),
84 "spawning external process"
85 );
86 cmd.spawn()
87}
88
89#[cfg(not(target_os = "linux"))]
92pub fn spawn_with_memory_limit(cmd: &mut Command) -> std::io::Result<std::process::Child> {
93 #[cfg(unix)]
94 {
95 use std::os::unix::process::CommandExt;
96 unsafe {
99 cmd.pre_exec(|| {
100 let sid = libc::setsid();
101 if sid == -1 {
102 let err = std::io::Error::last_os_error();
103 if err.raw_os_error() != Some(libc::EPERM) {
104 return Err(err);
105 }
106 }
107 Ok(())
108 });
109 }
110 }
111 tracing::debug!(
112 target: "process",
113 program = ?cmd.get_program(),
114 args = ?cmd.get_args().collect::<Vec<_>>(),
115 "spawning external process"
116 );
117 cmd.spawn()
118}
119
120#[derive(Debug, serde::Deserialize)]
122pub struct ClaudeOutputElement {
123 pub r#type: Option<String>,
124 pub subtype: Option<String>,
125 #[serde(default)]
126 pub is_error: bool,
127 pub structured_output: Option<serde_json::Value>,
128 pub result: Option<String>,
129 pub total_cost_usd: Option<f64>,
130 pub error: Option<String>,
131 pub terminal_reason: Option<String>,
132 #[serde(rename = "apiKeySource")]
133 pub api_key_source: Option<String>,
134}
135
136#[derive(Debug)]
138pub struct ClaudeResult {
139 pub value: serde_json::Value,
140 pub cost_usd: f64,
141 pub is_oauth: bool,
142}
143
144pub fn validate_claude_version(binary: &Path) -> Result<String, AppError> {
146 let resolved = which::which(binary).map_err(|_| {
147 AppError::Validation(format!(
148 "executable '{}' not found in PATH; ensure it is installed and accessible",
149 binary.display()
150 ))
151 })?;
152 let output = Command::new(&resolved)
153 .arg("--version")
154 .stdin(Stdio::null())
155 .stdout(Stdio::piped())
156 .stderr(Stdio::piped())
157 .output()
158 .map_err(AppError::Io)?;
159
160 if !output.status.success() {
161 return Err(AppError::Validation(
162 "failed to run 'claude --version'".to_string(),
163 ));
164 }
165
166 let version_str = String::from_utf8(output.stdout)
167 .map_err(|_| AppError::Validation("claude --version output is not UTF-8".to_string()))?;
168 let version = version_str.trim().to_string();
169 let numeric = version.split([' ', '(']).next().unwrap_or("").trim();
170
171 fn parse_semver(s: &str) -> Option<(u64, u64, u64)> {
172 let parts: Vec<&str> = s.splitn(3, '.').collect();
173 if parts.len() < 2 {
174 return None;
175 }
176 let major = parts[0].parse::<u64>().ok()?;
177 let minor = parts[1].parse::<u64>().ok()?;
178 let patch = parts
179 .get(2)
180 .and_then(|p| p.parse::<u64>().ok())
181 .unwrap_or(0);
182 Some((major, minor, patch))
183 }
184
185 if let (Some(actual), Some(min)) = (parse_semver(numeric), parse_semver(MIN_CLAUDE_VERSION)) {
186 if actual < min {
187 return Err(AppError::Validation(format!(
188 "Claude Code version {numeric} is below minimum required {MIN_CLAUDE_VERSION}"
189 )));
190 }
191 }
192
193 Ok(version)
194}
195
196pub fn build_claude_command(
233 binary: &Path,
234 prompt: &str,
235 json_schema: &str,
236 model: Option<&str>,
237 max_turns: u32,
238) -> Result<Command, crate::errors::AppError> {
239 if let Ok(_key) = std::env::var("ANTHROPIC_API_KEY") {
243 let mut cmd = Command::new("false");
248 cmd.env_clear();
249 cmd.env("PATH", "/nonexistent");
250 cmd.arg("--oauth-only-violation-anthropic-api-key-set");
251 cmd.arg("--oauth-only-resolution-use-anthropic-auth-token");
252 return Ok(cmd);
253 }
254
255 let mut cmd = Command::new(binary);
256
257 apply_env_whitelist(&mut cmd, crate::spawn::env_whitelist::is_strict_env_clear());
262
263 if let Ok(empty_dir) = std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR") {
266 if std::path::Path::new(&empty_dir).is_dir() {
267 cmd.env("CLAUDE_CONFIG_DIR", &empty_dir);
268 tracing::debug!(
269 target: "claude_runner",
270 "isolating claude subprocess to CLAUDE_CONFIG_DIR={}",
271 empty_dir
272 );
273 } else {
274 tracing::warn!(
275 target: "claude_runner",
276 path = %empty_dir,
277 "SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR is set but path is not a directory; \
278 ignoring. MCP isolation will NOT be applied."
279 );
280 }
281 }
282
283 let mcp_config_path = crate::spawn::preflight::write_empty_mcp_config_tempfile()?;
292
293 cmd.arg("-p")
294 .arg(prompt)
295 .arg("--strict-mcp-config")
296 .arg("--mcp-config")
297 .arg(mcp_config_path.as_os_str())
298 .arg("--dangerously-skip-permissions")
299 .arg("--settings")
300 .arg(r#"{"hooks":{}}"#)
301 .arg("--output-format")
302 .arg("json")
303 .arg("--json-schema")
304 .arg(json_schema)
305 .arg("--max-turns")
306 .arg(max_turns.to_string())
307 .arg("--no-session-persistence");
308
309 if let Some(m) = model {
310 cmd.arg("--model").arg(m);
311 }
312
313 cmd.stdin(Stdio::null())
314 .stdout(Stdio::piped())
315 .stderr(Stdio::piped());
316
317 let argv_refs: Vec<std::ffi::OsString> = cmd.get_args().map(|s| s.to_os_string()).collect();
323 let preflight_args = crate::spawn::preflight::PreFlightArgs {
324 binary_path: binary,
325 argv: &argv_refs,
326 workspace_root: std::path::Path::new("."),
327 mcp_config_inline_json: None,
328 expected_output_bytes: 65_536,
329 spawner_name: "claude_runner",
330 };
331 if let Err(e) = crate::spawn::preflight::preflight_check(&preflight_args) {
332 return Err(crate::errors::AppError::from(e));
338 }
339
340 Ok(cmd)
341}
342
343pub fn parse_claude_output(stdout: &str) -> Result<ClaudeResult, AppError> {
348 let elements: Vec<ClaudeOutputElement> = serde_json::from_str(stdout).map_err(|e| {
349 AppError::Validation(format!("failed to parse claude output as JSON array: {e}"))
350 })?;
351
352 let is_oauth = elements
353 .iter()
354 .find(|e| e.r#type.as_deref() == Some("system") && e.subtype.as_deref() == Some("init"))
355 .and_then(|e| e.api_key_source.as_deref())
356 .map(|s| s == "none")
357 .unwrap_or(false);
358
359 let result_elem = elements
360 .iter()
361 .find(|e| e.r#type.as_deref() == Some("result"))
362 .ok_or_else(|| {
363 AppError::Validation("claude output missing 'result' element".to_string())
364 })?;
365
366 if result_elem.terminal_reason.as_deref() == Some("max_turns") {
368 tracing::warn!(
369 target: "claude_runner",
370 "claude -p hit max_turns limit — hooks may have consumed turns"
371 );
372 return Err(AppError::Validation(
373 "claude -p hit max_turns: hooks may be consuming turns; increase --max-turns or disable hooks".to_string(),
374 ));
375 }
376
377 if result_elem.is_error {
378 let err_msg = result_elem
379 .error
380 .as_deref()
381 .or(result_elem.result.as_deref())
382 .unwrap_or("unknown error");
383 if err_msg.contains("rate_limit") || err_msg.contains("overloaded") {
384 return Err(AppError::RateLimited {
385 detail: err_msg.to_string(),
386 });
387 }
388 if err_msg.contains("Not logged in") || err_msg.contains("authentication") {
389 tracing::warn!(
390 target: "claude_runner",
391 "Claude Code authentication failed. Re-authenticate interactively with: claude"
392 );
393 }
394 return Err(AppError::Validation(format!(
395 "claude extraction failed: {err_msg}"
396 )));
397 }
398
399 let value = if let Some(v) = result_elem.structured_output.clone() {
400 v
401 } else if let Some(text) = &result_elem.result {
402 serde_json::from_str(text).map_err(|e| {
403 AppError::Validation(format!("failed to parse claude result field as JSON: {e}"))
404 })?
405 } else {
406 return Err(AppError::Validation(
407 "claude result missing structured_output and result field".into(),
408 ));
409 };
410
411 let cost = result_elem.total_cost_usd.unwrap_or(0.0);
412 Ok(ClaudeResult {
413 value,
414 cost_usd: cost,
415 is_oauth,
416 })
417}
418
419pub fn run_claude(
425 binary: &Path,
426 prompt: &str,
427 json_schema: &str,
428 input_text: &str,
429 model: Option<&str>,
430 timeout_secs: u64,
431 max_turns: u32,
432) -> Result<ClaudeResult, AppError> {
433 use wait_timeout::ChildExt;
434
435 let full_prompt = format!("{prompt}\n\n{input_text}");
436 let mut cmd = build_claude_command(binary, &full_prompt, json_schema, model, max_turns)?;
437
438 let mut child = spawn_with_memory_limit(&mut cmd).map_err(|e| {
439 AppError::Io(std::io::Error::new(
440 e.kind(),
441 format!("failed to spawn claude: {e}"),
442 ))
443 })?;
444
445 let start = std::time::Instant::now();
446 let timeout = std::time::Duration::from_secs(timeout_secs);
447 let status = child.wait_timeout(timeout).map_err(AppError::Io)?;
448
449 if status.is_none() {
450 #[cfg(unix)]
456 unsafe {
457 libc::kill(child.id() as i32, libc::SIGTERM);
458 }
459 let _ = child.kill();
460 let _ = child.wait();
461 }
462
463 match status {
464 Some(exit_status) => {
465 tracing::debug!(
466 target: "process",
467 exit_code = ?exit_status.code(),
468 elapsed_ms = start.elapsed().as_millis() as u64,
469 "external process completed"
470 );
471
472 let mut stdout_buf = Vec::new();
473 let mut stderr_buf = Vec::new();
474 if let Some(mut out) = child.stdout.take() {
475 std::io::Read::read_to_end(&mut out, &mut stdout_buf).map_err(AppError::Io)?;
476 }
477 if let Some(mut err) = child.stderr.take() {
478 std::io::Read::read_to_end(&mut err, &mut stderr_buf).map_err(AppError::Io)?;
479 }
480
481 let stdout_str = String::from_utf8(stdout_buf)
482 .map_err(|_| AppError::Validation("claude -p stdout is not valid UTF-8".into()))?;
483
484 if !exit_status.success() {
486 if let Ok(result) = parse_claude_output(&stdout_str) {
487 return Ok(result);
488 }
489 let stderr_str = String::from_utf8_lossy(&stderr_buf);
490 if stderr_str.contains("auth") || stderr_str.contains("login") {
491 tracing::warn!(
492 target: "claude_runner",
493 "Claude Code authentication may have failed. Re-authenticate with: claude"
494 );
495 }
496 return Err(AppError::Validation(format!(
497 "claude -p exited with code {:?}: {}",
498 exit_status.code(),
499 stderr_str.trim()
500 )));
501 }
502
503 parse_claude_output(&stdout_str)
504 }
505 None => {
506 tracing::warn!(target: "claude_runner", timeout_secs, "claude -p timed out, terminating");
507 terminate_gracefully(&mut child, 3);
508 Err(AppError::Validation(format!(
509 "claude -p timed out after {timeout_secs} seconds"
510 )))
511 }
512 }
513}
514
515#[cfg(unix)]
517pub fn terminate_gracefully(child: &mut std::process::Child, grace_secs: u64) {
518 use wait_timeout::ChildExt;
519 unsafe {
520 libc::kill(child.id() as i32, libc::SIGTERM);
521 }
522 match child.wait_timeout(std::time::Duration::from_secs(grace_secs)) {
523 Ok(Some(_)) => {}
524 _ => {
525 tracing::warn!(target: "process", pid = child.id(), "child ignored SIGTERM, sending SIGKILL");
526 let _ = child.kill();
527 let _ = child.wait();
528 }
529 }
530}
531
532#[cfg(not(unix))]
534pub fn terminate_gracefully(child: &mut std::process::Child, _grace_secs: u64) {
535 let _ = child.kill();
536 let _ = child.wait();
537}
538
539#[cfg(test)]
540mod tests {
541 use super::*;
542
543 #[test]
544 fn parse_output_detects_max_turns() {
545 let stdout = r#"[{"type":"system","subtype":"init","apiKeySource":"none"},{"type":"result","is_error":false,"terminal_reason":"max_turns","structured_output":{"name":"t"}}]"#;
546 let err = parse_claude_output(stdout).unwrap_err();
547 assert!(
548 format!("{err}").contains("max_turns"),
549 "must detect max_turns in output"
550 );
551 }
552
553 #[test]
554 fn parse_output_extracts_structured_value() {
555 let stdout = r#"[{"type":"system","subtype":"init","apiKeySource":"none"},{"type":"result","is_error":false,"structured_output":{"key":"val"},"total_cost_usd":0.01}]"#;
556 let result = parse_claude_output(stdout).unwrap();
557 assert_eq!(result.value["key"], "val");
558 assert!((result.cost_usd - 0.01).abs() < f64::EPSILON);
559 assert!(result.is_oauth);
560 }
561
562 #[test]
563 fn parse_output_detects_rate_limit() {
564 let stdout = r#"[{"type":"result","is_error":true,"error":"rate_limit exceeded"}]"#;
565 let err = parse_claude_output(stdout).unwrap_err();
566 assert!(
567 matches!(err, AppError::RateLimited { .. }),
568 "expected AppError::RateLimited, got: {err}"
569 );
570 }
571
572 #[test]
576 #[serial_test::serial(env)]
577 fn build_command_oauth_only_mandatory_flags() {
578 unsafe {
580 std::env::remove_var("ANTHROPIC_API_KEY");
581 std::env::remove_var("CLAUDE_CONFIG_DIR");
584 }
585 let cmd = build_claude_command(
586 std::path::Path::new("/usr/bin/false"),
587 "test prompt",
588 "{}",
589 Some("sonnet"),
590 4,
591 )
592 .expect("preflight gate accepts valid args");
593 let args: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
594 assert!(args.contains(&"-p"), "must have -p");
596 assert!(
597 args.contains(&"--strict-mcp-config"),
598 "must have --strict-mcp-config (gaps.md:206)"
599 );
600 assert!(
601 args.contains(&"--mcp-config"),
602 "must have --mcp-config (gaps.md:207)"
603 );
604 assert!(
605 args.contains(&"--dangerously-skip-permissions"),
606 "must have --dangerously-skip-permissions (gaps.md:208)"
607 );
608 assert!(
609 args.contains(&"--settings"),
610 "must have --settings (gaps.md:209)"
611 );
612 assert!(
613 args.contains(&"--output-format"),
614 "must have --output-format json (gaps.md:213)"
615 );
616 assert!(args.contains(&"--json-schema"), "must have --json-schema");
617 assert!(
618 args.contains(&"--max-turns"),
619 "must have --max-turns (gaps.md:212)"
620 );
621 assert!(
622 args.contains(&"--no-session-persistence"),
623 "must have --no-session-persistence"
624 );
625 assert!(
626 args.contains(&"--model"),
627 "must have --model when model is Some"
628 );
629 assert!(
631 !args.contains(&"--bare"),
632 "--bare is PROHIBITED (gaps.md:49)"
633 );
634 }
635
636 #[test]
640 #[serial_test::serial(env)]
641 fn build_command_aborts_when_anthropic_api_key_set() {
642 unsafe {
644 std::env::set_var("ANTHROPIC_API_KEY", "sk-test-violation");
645 std::env::remove_var("CLAUDE_CONFIG_DIR");
649 }
650 let cmd = build_claude_command(
651 std::path::Path::new("/usr/bin/claude"),
652 "test prompt",
653 "{}",
654 Some("sonnet"),
655 4,
656 )
657 .expect("preflight gate accepts valid args");
658 let program = cmd.get_program().to_string_lossy().to_string();
659 let args: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
660 assert_eq!(
661 program, "false",
662 "when ANTHROPIC_API_KEY is set, build_claude_command must abort"
663 );
664 assert!(
665 args.contains(&"--oauth-only-violation-anthropic-api-key-set"),
666 "aborted command must carry violation marker"
667 );
668 unsafe {
669 std::env::remove_var("ANTHROPIC_API_KEY");
670 }
671 }
672}