sqlite_graphrag/commands/
claude_runner.rs1use crate::errors::AppError;
7use std::path::Path;
8use std::process::{Command, Stdio};
9
10const MIN_CLAUDE_VERSION: &str = "2.1.0";
12
13const ENV_WHITELIST: &[&str] = &[
15 "PATH",
16 "HOME",
17 "USER",
18 "SHELL",
19 "TERM",
20 "LANG",
21 "XDG_CONFIG_HOME",
22 "XDG_DATA_HOME",
23 "XDG_RUNTIME_DIR",
24 "CLAUDE_CONFIG_DIR",
31 "TMPDIR",
32 "TMP",
33 "TEMP",
34 "DYLD_FALLBACK_LIBRARY_PATH",
35];
36
37#[cfg(windows)]
39const ENV_WHITELIST_WINDOWS: &[&str] = &[
40 "LOCALAPPDATA",
41 "APPDATA",
42 "USERPROFILE",
43 "SystemRoot",
44 "COMSPEC",
45 "PATHEXT",
46 "HOMEPATH",
47 "HOMEDRIVE",
48];
49
50const DEFAULT_SUBPROCESS_MEMORY_LIMIT_MB: u64 = 4096;
52
53#[cfg(target_os = "linux")]
78pub fn spawn_with_memory_limit(cmd: &mut Command) -> std::io::Result<std::process::Child> {
79 use std::os::unix::process::CommandExt;
80 let max_mb: u64 = std::env::var("SQLITE_GRAPHRAG_SUBPROCESS_MEMORY_LIMIT_MB")
81 .ok()
82 .and_then(|v| v.parse().ok())
83 .unwrap_or(DEFAULT_SUBPROCESS_MEMORY_LIMIT_MB);
84 let max_bytes = max_mb * 1024 * 1024;
85 unsafe {
92 cmd.pre_exec(move || {
93 let sid = libc::setsid();
94 if sid == -1 {
95 let err = std::io::Error::last_os_error();
96 if err.raw_os_error() != Some(libc::EPERM) {
97 return Err(err);
98 }
99 }
100 let limit = libc::rlimit {
101 rlim_cur: max_bytes,
102 rlim_max: max_bytes,
103 };
104 if libc::setrlimit(libc::RLIMIT_AS, &limit) != 0 {
105 return Err(std::io::Error::last_os_error());
106 }
107 Ok(())
108 });
109 }
110 tracing::debug!(
111 target: "process",
112 program = ?cmd.get_program(),
113 args = ?cmd.get_args().collect::<Vec<_>>(),
114 "spawning external process"
115 );
116 cmd.spawn()
117}
118
119#[cfg(not(target_os = "linux"))]
122pub fn spawn_with_memory_limit(cmd: &mut Command) -> std::io::Result<std::process::Child> {
123 #[cfg(unix)]
124 {
125 use std::os::unix::process::CommandExt;
126 unsafe {
129 cmd.pre_exec(|| {
130 let sid = libc::setsid();
131 if sid == -1 {
132 let err = std::io::Error::last_os_error();
133 if err.raw_os_error() != Some(libc::EPERM) {
134 return Err(err);
135 }
136 }
137 Ok(())
138 });
139 }
140 }
141 tracing::debug!(
142 target: "process",
143 program = ?cmd.get_program(),
144 args = ?cmd.get_args().collect::<Vec<_>>(),
145 "spawning external process"
146 );
147 cmd.spawn()
148}
149
150#[derive(Debug, serde::Deserialize)]
152pub struct ClaudeOutputElement {
153 pub r#type: Option<String>,
154 pub subtype: Option<String>,
155 #[serde(default)]
156 pub is_error: bool,
157 pub structured_output: Option<serde_json::Value>,
158 pub result: Option<String>,
159 pub total_cost_usd: Option<f64>,
160 pub error: Option<String>,
161 pub terminal_reason: Option<String>,
162 #[serde(rename = "apiKeySource")]
163 pub api_key_source: Option<String>,
164}
165
166#[derive(Debug)]
168pub struct ClaudeResult {
169 pub value: serde_json::Value,
170 pub cost_usd: f64,
171 pub is_oauth: bool,
172}
173
174pub fn validate_claude_version(binary: &Path) -> Result<String, AppError> {
176 let resolved = which::which(binary).map_err(|_| {
177 AppError::Validation(format!(
178 "executable '{}' not found in PATH; ensure it is installed and accessible",
179 binary.display()
180 ))
181 })?;
182 let output = Command::new(&resolved)
183 .arg("--version")
184 .stdin(Stdio::null())
185 .stdout(Stdio::piped())
186 .stderr(Stdio::piped())
187 .output()
188 .map_err(AppError::Io)?;
189
190 if !output.status.success() {
191 return Err(AppError::Validation(
192 "failed to run 'claude --version'".to_string(),
193 ));
194 }
195
196 let version_str = String::from_utf8(output.stdout)
197 .map_err(|_| AppError::Validation("claude --version output is not UTF-8".to_string()))?;
198 let version = version_str.trim().to_string();
199 let numeric = version.split([' ', '(']).next().unwrap_or("").trim();
200
201 fn parse_semver(s: &str) -> Option<(u64, u64, u64)> {
202 let parts: Vec<&str> = s.splitn(3, '.').collect();
203 if parts.len() < 2 {
204 return None;
205 }
206 let major = parts[0].parse::<u64>().ok()?;
207 let minor = parts[1].parse::<u64>().ok()?;
208 let patch = parts
209 .get(2)
210 .and_then(|p| p.parse::<u64>().ok())
211 .unwrap_or(0);
212 Some((major, minor, patch))
213 }
214
215 if let (Some(actual), Some(min)) = (parse_semver(numeric), parse_semver(MIN_CLAUDE_VERSION)) {
216 if actual < min {
217 return Err(AppError::Validation(format!(
218 "Claude Code version {numeric} is below minimum required {MIN_CLAUDE_VERSION}"
219 )));
220 }
221 }
222
223 Ok(version)
224}
225
226pub fn build_claude_command(
263 binary: &Path,
264 prompt: &str,
265 json_schema: &str,
266 model: Option<&str>,
267 max_turns: u32,
268) -> Command {
269 if let Ok(_key) = std::env::var("ANTHROPIC_API_KEY") {
273 let mut cmd = Command::new("false");
277 cmd.env_clear();
278 cmd.env("PATH", "/nonexistent");
279 cmd.arg("--oauth-only-violation-anthropic-api-key-set");
280 return cmd;
281 }
282
283 let mut cmd = Command::new(binary);
284
285 cmd.env_clear();
286 for var in ENV_WHITELIST {
287 if let Ok(val) = std::env::var(var) {
288 cmd.env(var, val);
289 }
290 }
291
292 #[cfg(windows)]
293 for var in ENV_WHITELIST_WINDOWS {
294 if let Ok(val) = std::env::var(var) {
295 cmd.env(var, val);
296 }
297 }
298
299 if let Ok(empty_dir) = std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR") {
302 if std::path::Path::new(&empty_dir).is_dir() {
303 cmd.env("CLAUDE_CONFIG_DIR", &empty_dir);
304 tracing::debug!(
305 target: "claude_runner",
306 "isolating claude subprocess to CLAUDE_CONFIG_DIR={}",
307 empty_dir
308 );
309 } else {
310 tracing::warn!(
311 target: "claude_runner",
312 path = %empty_dir,
313 "SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR is set but path is not a directory; \
314 ignoring. MCP isolation will NOT be applied."
315 );
316 }
317 }
318
319 cmd.arg("-p")
322 .arg(prompt)
323 .arg("--strict-mcp-config")
324 .arg("--mcp-config")
325 .arg("{}")
326 .arg("--dangerously-skip-permissions")
327 .arg("--settings")
328 .arg(r#"{"hooks":{}}"#)
329 .arg("--output-format")
330 .arg("json")
331 .arg("--json-schema")
332 .arg(json_schema)
333 .arg("--max-turns")
334 .arg(max_turns.to_string())
335 .arg("--no-session-persistence");
336
337 if let Some(m) = model {
338 cmd.arg("--model").arg(m);
339 }
340
341 cmd.stdin(Stdio::null())
342 .stdout(Stdio::piped())
343 .stderr(Stdio::piped());
344
345 cmd
346}
347
348pub fn parse_claude_output(stdout: &str) -> Result<ClaudeResult, AppError> {
353 let elements: Vec<ClaudeOutputElement> = serde_json::from_str(stdout).map_err(|e| {
354 AppError::Validation(format!("failed to parse claude output as JSON array: {e}"))
355 })?;
356
357 let is_oauth = elements
358 .iter()
359 .find(|e| e.r#type.as_deref() == Some("system") && e.subtype.as_deref() == Some("init"))
360 .and_then(|e| e.api_key_source.as_deref())
361 .map(|s| s == "none")
362 .unwrap_or(false);
363
364 let result_elem = elements
365 .iter()
366 .find(|e| e.r#type.as_deref() == Some("result"))
367 .ok_or_else(|| {
368 AppError::Validation("claude output missing 'result' element".to_string())
369 })?;
370
371 if result_elem.terminal_reason.as_deref() == Some("max_turns") {
373 tracing::warn!(
374 target: "claude_runner",
375 "claude -p hit max_turns limit — hooks may have consumed turns"
376 );
377 return Err(AppError::Validation(
378 "claude -p hit max_turns: hooks may be consuming turns; increase --max-turns or disable hooks".to_string(),
379 ));
380 }
381
382 if result_elem.is_error {
383 let err_msg = result_elem
384 .error
385 .as_deref()
386 .or(result_elem.result.as_deref())
387 .unwrap_or("unknown error");
388 if err_msg.contains("rate_limit") || err_msg.contains("overloaded") {
389 return Err(AppError::RateLimited {
390 detail: err_msg.to_string(),
391 });
392 }
393 if err_msg.contains("Not logged in") || err_msg.contains("authentication") {
394 tracing::warn!(
395 target: "claude_runner",
396 "Claude Code authentication failed. Re-authenticate interactively with: claude"
397 );
398 }
399 return Err(AppError::Validation(format!(
400 "claude extraction failed: {err_msg}"
401 )));
402 }
403
404 let value = if let Some(v) = result_elem.structured_output.clone() {
405 v
406 } else if let Some(text) = &result_elem.result {
407 serde_json::from_str(text).map_err(|e| {
408 AppError::Validation(format!("failed to parse claude result field as JSON: {e}"))
409 })?
410 } else {
411 return Err(AppError::Validation(
412 "claude result missing structured_output and result field".into(),
413 ));
414 };
415
416 let cost = result_elem.total_cost_usd.unwrap_or(0.0);
417 Ok(ClaudeResult {
418 value,
419 cost_usd: cost,
420 is_oauth,
421 })
422}
423
424pub fn run_claude(
430 binary: &Path,
431 prompt: &str,
432 json_schema: &str,
433 input_text: &str,
434 model: Option<&str>,
435 timeout_secs: u64,
436 max_turns: u32,
437) -> Result<ClaudeResult, AppError> {
438 use wait_timeout::ChildExt;
439
440 let full_prompt = format!("{prompt}\n\n{input_text}");
441 let mut cmd = build_claude_command(binary, &full_prompt, json_schema, model, max_turns);
442
443 let mut child = spawn_with_memory_limit(&mut cmd).map_err(|e| {
444 AppError::Io(std::io::Error::new(
445 e.kind(),
446 format!("failed to spawn claude: {e}"),
447 ))
448 })?;
449
450 let start = std::time::Instant::now();
451 let timeout = std::time::Duration::from_secs(timeout_secs);
452 let status = child.wait_timeout(timeout).map_err(AppError::Io)?;
453
454 if status.is_none() {
455 #[cfg(unix)]
461 unsafe {
462 libc::kill(child.id() as i32, libc::SIGTERM);
463 }
464 let _ = child.kill();
465 let _ = child.wait();
466 }
467
468 match status {
469 Some(exit_status) => {
470 tracing::debug!(
471 target: "process",
472 exit_code = ?exit_status.code(),
473 elapsed_ms = start.elapsed().as_millis() as u64,
474 "external process completed"
475 );
476
477 let mut stdout_buf = Vec::new();
478 let mut stderr_buf = Vec::new();
479 if let Some(mut out) = child.stdout.take() {
480 std::io::Read::read_to_end(&mut out, &mut stdout_buf).map_err(AppError::Io)?;
481 }
482 if let Some(mut err) = child.stderr.take() {
483 std::io::Read::read_to_end(&mut err, &mut stderr_buf).map_err(AppError::Io)?;
484 }
485
486 let stdout_str = String::from_utf8(stdout_buf)
487 .map_err(|_| AppError::Validation("claude -p stdout is not valid UTF-8".into()))?;
488
489 if !exit_status.success() {
491 if let Ok(result) = parse_claude_output(&stdout_str) {
492 return Ok(result);
493 }
494 let stderr_str = String::from_utf8_lossy(&stderr_buf);
495 if stderr_str.contains("auth") || stderr_str.contains("login") {
496 tracing::warn!(
497 target: "claude_runner",
498 "Claude Code authentication may have failed. Re-authenticate with: claude"
499 );
500 }
501 return Err(AppError::Validation(format!(
502 "claude -p exited with code {:?}: {}",
503 exit_status.code(),
504 stderr_str.trim()
505 )));
506 }
507
508 parse_claude_output(&stdout_str)
509 }
510 None => {
511 tracing::warn!(target: "claude_runner", timeout_secs, "claude -p timed out, terminating");
512 terminate_gracefully(&mut child, 3);
513 Err(AppError::Validation(format!(
514 "claude -p timed out after {timeout_secs} seconds"
515 )))
516 }
517 }
518}
519
520#[cfg(unix)]
522pub fn terminate_gracefully(child: &mut std::process::Child, grace_secs: u64) {
523 use wait_timeout::ChildExt;
524 unsafe {
525 libc::kill(child.id() as i32, libc::SIGTERM);
526 }
527 match child.wait_timeout(std::time::Duration::from_secs(grace_secs)) {
528 Ok(Some(_)) => {}
529 _ => {
530 tracing::warn!(target: "process", pid = child.id(), "child ignored SIGTERM, sending SIGKILL");
531 let _ = child.kill();
532 let _ = child.wait();
533 }
534 }
535}
536
537#[cfg(not(unix))]
539pub fn terminate_gracefully(child: &mut std::process::Child, _grace_secs: u64) {
540 let _ = child.kill();
541 let _ = child.wait();
542}
543
544#[cfg(test)]
545mod tests {
546 use super::*;
547
548 #[test]
549 fn parse_output_detects_max_turns() {
550 let stdout = r#"[{"type":"system","subtype":"init","apiKeySource":"none"},{"type":"result","is_error":false,"terminal_reason":"max_turns","structured_output":{"name":"t"}}]"#;
551 let err = parse_claude_output(stdout).unwrap_err();
552 assert!(
553 format!("{err}").contains("max_turns"),
554 "must detect max_turns in output"
555 );
556 }
557
558 #[test]
559 fn parse_output_extracts_structured_value() {
560 let stdout = r#"[{"type":"system","subtype":"init","apiKeySource":"none"},{"type":"result","is_error":false,"structured_output":{"key":"val"},"total_cost_usd":0.01}]"#;
561 let result = parse_claude_output(stdout).unwrap();
562 assert_eq!(result.value["key"], "val");
563 assert!((result.cost_usd - 0.01).abs() < f64::EPSILON);
564 assert!(result.is_oauth);
565 }
566
567 #[test]
568 fn parse_output_detects_rate_limit() {
569 let stdout = r#"[{"type":"result","is_error":true,"error":"rate_limit exceeded"}]"#;
570 let err = parse_claude_output(stdout).unwrap_err();
571 assert!(
572 matches!(err, AppError::RateLimited { .. }),
573 "expected AppError::RateLimited, got: {err}"
574 );
575 }
576
577 #[test]
581 #[serial_test::serial(env)]
582 fn build_command_oauth_only_mandatory_flags() {
583 unsafe {
585 std::env::remove_var("ANTHROPIC_API_KEY");
586 }
587 let cmd = build_claude_command(
588 std::path::Path::new("/usr/bin/false"),
589 "test prompt",
590 "{}",
591 Some("sonnet"),
592 4,
593 );
594 let args: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
595 assert!(args.contains(&"-p"), "must have -p");
597 assert!(
598 args.contains(&"--strict-mcp-config"),
599 "must have --strict-mcp-config (gaps.md:206)"
600 );
601 assert!(
602 args.contains(&"--mcp-config"),
603 "must have --mcp-config (gaps.md:207)"
604 );
605 assert!(
606 args.contains(&"--dangerously-skip-permissions"),
607 "must have --dangerously-skip-permissions (gaps.md:208)"
608 );
609 assert!(
610 args.contains(&"--settings"),
611 "must have --settings (gaps.md:209)"
612 );
613 assert!(
614 args.contains(&"--output-format"),
615 "must have --output-format json (gaps.md:213)"
616 );
617 assert!(args.contains(&"--json-schema"), "must have --json-schema");
618 assert!(
619 args.contains(&"--max-turns"),
620 "must have --max-turns (gaps.md:212)"
621 );
622 assert!(
623 args.contains(&"--no-session-persistence"),
624 "must have --no-session-persistence"
625 );
626 assert!(
627 args.contains(&"--model"),
628 "must have --model when model is Some"
629 );
630 assert!(
632 !args.contains(&"--bare"),
633 "--bare is PROHIBITED (gaps.md:49)"
634 );
635 }
636
637 #[test]
641 #[serial_test::serial(env)]
642 fn build_command_aborts_when_anthropic_api_key_set() {
643 unsafe {
645 std::env::set_var("ANTHROPIC_API_KEY", "sk-test-violation");
646 }
647 let cmd = build_claude_command(
648 std::path::Path::new("/usr/bin/claude"),
649 "test prompt",
650 "{}",
651 Some("sonnet"),
652 4,
653 );
654 let program = cmd.get_program().to_string_lossy().to_string();
655 let args: Vec<&str> = cmd.get_args().filter_map(|a| a.to_str()).collect();
656 assert_eq!(
657 program, "false",
658 "when ANTHROPIC_API_KEY is set, build_claude_command must abort"
659 );
660 assert!(
661 args.contains(&"--oauth-only-violation-anthropic-api-key-set"),
662 "aborted command must carry violation marker"
663 );
664 unsafe {
665 std::env::remove_var("ANTHROPIC_API_KEY");
666 }
667 }
668}