sqlite_graphrag/commands/
claude_runner.rs1use crate::errors::AppError;
7use std::path::Path;
8use std::process::{Command, Stdio};
9
10const MIN_CLAUDE_VERSION: &str = "2.1.0";
12
13const ENV_WHITELIST: &[&str] = &[
15 "PATH",
16 "HOME",
17 "USER",
18 "SHELL",
19 "TERM",
20 "LANG",
21 "XDG_CONFIG_HOME",
22 "XDG_DATA_HOME",
23 "XDG_RUNTIME_DIR",
24 "ANTHROPIC_API_KEY",
25 "CLAUDE_CONFIG_DIR",
26 "TMPDIR",
27 "TMP",
28 "TEMP",
29 "DYLD_FALLBACK_LIBRARY_PATH",
30];
31
32#[cfg(windows)]
34const ENV_WHITELIST_WINDOWS: &[&str] = &[
35 "LOCALAPPDATA",
36 "APPDATA",
37 "USERPROFILE",
38 "SystemRoot",
39 "COMSPEC",
40 "PATHEXT",
41 "HOMEPATH",
42 "HOMEDRIVE",
43];
44
45const DEFAULT_SUBPROCESS_MEMORY_LIMIT_MB: u64 = 4096;
47
48#[cfg(target_os = "linux")]
55pub fn spawn_with_memory_limit(cmd: &mut Command) -> std::io::Result<std::process::Child> {
56 use std::os::unix::process::CommandExt;
57 let max_mb: u64 = std::env::var("SQLITE_GRAPHRAG_SUBPROCESS_MEMORY_LIMIT_MB")
58 .ok()
59 .and_then(|v| v.parse().ok())
60 .unwrap_or(DEFAULT_SUBPROCESS_MEMORY_LIMIT_MB);
61 let max_bytes = max_mb * 1024 * 1024;
62 unsafe {
69 cmd.pre_exec(move || {
70 let sid = libc::setsid();
71 if sid == -1 {
72 let err = std::io::Error::last_os_error();
73 if err.raw_os_error() != Some(libc::EPERM) {
74 return Err(err);
75 }
76 }
77 let limit = libc::rlimit {
78 rlim_cur: max_bytes,
79 rlim_max: max_bytes,
80 };
81 if libc::setrlimit(libc::RLIMIT_AS, &limit) != 0 {
82 return Err(std::io::Error::last_os_error());
83 }
84 Ok(())
85 });
86 }
87 tracing::debug!(
88 target: "process",
89 program = ?cmd.get_program(),
90 args = ?cmd.get_args().collect::<Vec<_>>(),
91 "spawning external process"
92 );
93 cmd.spawn()
94}
95
96#[cfg(not(target_os = "linux"))]
99pub fn spawn_with_memory_limit(cmd: &mut Command) -> std::io::Result<std::process::Child> {
100 #[cfg(unix)]
101 {
102 use std::os::unix::process::CommandExt;
103 unsafe {
106 cmd.pre_exec(|| {
107 let sid = libc::setsid();
108 if sid == -1 {
109 let err = std::io::Error::last_os_error();
110 if err.raw_os_error() != Some(libc::EPERM) {
111 return Err(err);
112 }
113 }
114 Ok(())
115 });
116 }
117 }
118 tracing::debug!(
119 target: "process",
120 program = ?cmd.get_program(),
121 args = ?cmd.get_args().collect::<Vec<_>>(),
122 "spawning external process"
123 );
124 cmd.spawn()
125}
126
127#[derive(Debug, serde::Deserialize)]
129pub struct ClaudeOutputElement {
130 pub r#type: Option<String>,
131 pub subtype: Option<String>,
132 #[serde(default)]
133 pub is_error: bool,
134 pub structured_output: Option<serde_json::Value>,
135 pub result: Option<String>,
136 pub total_cost_usd: Option<f64>,
137 pub error: Option<String>,
138 pub terminal_reason: Option<String>,
139 #[serde(rename = "apiKeySource")]
140 pub api_key_source: Option<String>,
141}
142
143#[derive(Debug)]
145pub struct ClaudeResult {
146 pub value: serde_json::Value,
147 pub cost_usd: f64,
148 pub is_oauth: bool,
149}
150
151pub fn validate_claude_version(binary: &Path) -> Result<String, AppError> {
153 let resolved = which::which(binary).map_err(|_| {
154 AppError::Validation(format!(
155 "executable '{}' not found in PATH; ensure it is installed and accessible",
156 binary.display()
157 ))
158 })?;
159 let output = Command::new(&resolved)
160 .arg("--version")
161 .stdin(Stdio::null())
162 .stdout(Stdio::piped())
163 .stderr(Stdio::piped())
164 .output()
165 .map_err(AppError::Io)?;
166
167 if !output.status.success() {
168 return Err(AppError::Validation(
169 "failed to run 'claude --version'".to_string(),
170 ));
171 }
172
173 let version_str = String::from_utf8(output.stdout)
174 .map_err(|_| AppError::Validation("claude --version output is not UTF-8".to_string()))?;
175 let version = version_str.trim().to_string();
176 let numeric = version.split([' ', '(']).next().unwrap_or("").trim();
177
178 fn parse_semver(s: &str) -> Option<(u64, u64, u64)> {
179 let parts: Vec<&str> = s.splitn(3, '.').collect();
180 if parts.len() < 2 {
181 return None;
182 }
183 let major = parts[0].parse::<u64>().ok()?;
184 let minor = parts[1].parse::<u64>().ok()?;
185 let patch = parts
186 .get(2)
187 .and_then(|p| p.parse::<u64>().ok())
188 .unwrap_or(0);
189 Some((major, minor, patch))
190 }
191
192 if let (Some(actual), Some(min)) = (parse_semver(numeric), parse_semver(MIN_CLAUDE_VERSION)) {
193 if actual < min {
194 return Err(AppError::Validation(format!(
195 "Claude Code version {numeric} is below minimum required {MIN_CLAUDE_VERSION}"
196 )));
197 }
198 }
199
200 Ok(version)
201}
202
203pub fn build_claude_command(
219 binary: &Path,
220 prompt: &str,
221 json_schema: &str,
222 model: Option<&str>,
223 max_turns: u32,
224) -> Command {
225 let mut cmd = Command::new(binary);
226
227 cmd.env_clear();
228 for var in ENV_WHITELIST {
229 if let Ok(val) = std::env::var(var) {
230 cmd.env(var, val);
231 }
232 }
233
234 #[cfg(windows)]
235 for var in ENV_WHITELIST_WINDOWS {
236 if let Ok(val) = std::env::var(var) {
237 cmd.env(var, val);
238 }
239 }
240
241 if let Ok(empty_dir) = std::env::var("SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR") {
244 if std::path::Path::new(&empty_dir).is_dir() {
245 cmd.env("CLAUDE_CONFIG_DIR", &empty_dir);
246 tracing::debug!(
247 target: "claude_runner",
248 "isolating claude subprocess to CLAUDE_CONFIG_DIR={}",
249 empty_dir
250 );
251 } else {
252 tracing::warn!(
253 target: "claude_runner",
254 path = %empty_dir,
255 "SQLITE_GRAPHRAG_CLAUDE_EMPTY_CONFIG_DIR is set but path is not a directory; \
256 ignoring. MCP isolation will NOT be applied."
257 );
258 }
259 }
260
261 cmd.arg("-p")
262 .arg(prompt)
263 .arg("--output-format")
264 .arg("json")
265 .arg("--json-schema")
266 .arg(json_schema)
267 .arg("--max-turns")
268 .arg(max_turns.to_string())
269 .arg("--no-session-persistence");
270
271 if std::env::var("ANTHROPIC_API_KEY").is_ok() {
272 cmd.arg("--bare");
273 } else {
274 cmd.arg("--dangerously-skip-permissions")
275 .arg("--settings")
276 .arg(r#"{"hooks":{}}"#);
277 }
278
279 if let Some(m) = model {
280 cmd.arg("--model").arg(m);
281 }
282
283 cmd.stdin(Stdio::null())
284 .stdout(Stdio::piped())
285 .stderr(Stdio::piped());
286
287 cmd
288}
289
290pub fn parse_claude_output(stdout: &str) -> Result<ClaudeResult, AppError> {
295 let elements: Vec<ClaudeOutputElement> = serde_json::from_str(stdout).map_err(|e| {
296 AppError::Validation(format!("failed to parse claude output as JSON array: {e}"))
297 })?;
298
299 let is_oauth = elements
300 .iter()
301 .find(|e| e.r#type.as_deref() == Some("system") && e.subtype.as_deref() == Some("init"))
302 .and_then(|e| e.api_key_source.as_deref())
303 .map(|s| s == "none")
304 .unwrap_or(false);
305
306 let result_elem = elements
307 .iter()
308 .find(|e| e.r#type.as_deref() == Some("result"))
309 .ok_or_else(|| {
310 AppError::Validation("claude output missing 'result' element".to_string())
311 })?;
312
313 if result_elem.terminal_reason.as_deref() == Some("max_turns") {
315 tracing::warn!(
316 target: "claude_runner",
317 "claude -p hit max_turns limit — hooks may have consumed turns"
318 );
319 return Err(AppError::Validation(
320 "claude -p hit max_turns: hooks may be consuming turns; increase --max-turns or disable hooks".to_string(),
321 ));
322 }
323
324 if result_elem.is_error {
325 let err_msg = result_elem
326 .error
327 .as_deref()
328 .or(result_elem.result.as_deref())
329 .unwrap_or("unknown error");
330 if err_msg.contains("rate_limit") || err_msg.contains("overloaded") {
331 return Err(AppError::RateLimited {
332 detail: err_msg.to_string(),
333 });
334 }
335 if err_msg.contains("Not logged in") || err_msg.contains("authentication") {
336 tracing::warn!(
337 target: "claude_runner",
338 "Claude Code authentication failed. Re-authenticate interactively with: claude"
339 );
340 }
341 return Err(AppError::Validation(format!(
342 "claude extraction failed: {err_msg}"
343 )));
344 }
345
346 let value = if let Some(v) = result_elem.structured_output.clone() {
347 v
348 } else if let Some(text) = &result_elem.result {
349 serde_json::from_str(text).map_err(|e| {
350 AppError::Validation(format!("failed to parse claude result field as JSON: {e}"))
351 })?
352 } else {
353 return Err(AppError::Validation(
354 "claude result missing structured_output and result field".into(),
355 ));
356 };
357
358 let cost = result_elem.total_cost_usd.unwrap_or(0.0);
359 Ok(ClaudeResult {
360 value,
361 cost_usd: cost,
362 is_oauth,
363 })
364}
365
366pub fn run_claude(
370 binary: &Path,
371 prompt: &str,
372 json_schema: &str,
373 input_text: &str,
374 model: Option<&str>,
375 timeout_secs: u64,
376 max_turns: u32,
377) -> Result<ClaudeResult, AppError> {
378 use wait_timeout::ChildExt;
379
380 let full_prompt = format!("{prompt}\n\n{input_text}");
381 let mut cmd = build_claude_command(binary, &full_prompt, json_schema, model, max_turns);
382
383 let mut child = spawn_with_memory_limit(&mut cmd).map_err(|e| {
384 AppError::Io(std::io::Error::new(
385 e.kind(),
386 format!("failed to spawn claude: {e}"),
387 ))
388 })?;
389
390 let start = std::time::Instant::now();
391 let timeout = std::time::Duration::from_secs(timeout_secs);
392 let status = child.wait_timeout(timeout).map_err(AppError::Io)?;
393
394 match status {
395 Some(exit_status) => {
396 tracing::debug!(
397 target: "process",
398 exit_code = ?exit_status.code(),
399 elapsed_ms = start.elapsed().as_millis() as u64,
400 "external process completed"
401 );
402
403 let mut stdout_buf = Vec::new();
404 let mut stderr_buf = Vec::new();
405 if let Some(mut out) = child.stdout.take() {
406 std::io::Read::read_to_end(&mut out, &mut stdout_buf).map_err(AppError::Io)?;
407 }
408 if let Some(mut err) = child.stderr.take() {
409 std::io::Read::read_to_end(&mut err, &mut stderr_buf).map_err(AppError::Io)?;
410 }
411
412 let stdout_str = String::from_utf8(stdout_buf)
413 .map_err(|_| AppError::Validation("claude -p stdout is not valid UTF-8".into()))?;
414
415 if !exit_status.success() {
417 if let Ok(result) = parse_claude_output(&stdout_str) {
418 return Ok(result);
419 }
420 let stderr_str = String::from_utf8_lossy(&stderr_buf);
421 if stderr_str.contains("auth") || stderr_str.contains("login") {
422 tracing::warn!(
423 target: "claude_runner",
424 "Claude Code authentication may have failed. Re-authenticate with: claude"
425 );
426 }
427 return Err(AppError::Validation(format!(
428 "claude -p exited with code {:?}: {}",
429 exit_status.code(),
430 stderr_str.trim()
431 )));
432 }
433
434 parse_claude_output(&stdout_str)
435 }
436 None => {
437 tracing::warn!(target: "claude_runner", timeout_secs, "claude -p timed out, terminating");
438 terminate_gracefully(&mut child, 3);
439 Err(AppError::Validation(format!(
440 "claude -p timed out after {timeout_secs} seconds"
441 )))
442 }
443 }
444}
445
446#[cfg(unix)]
448pub fn terminate_gracefully(child: &mut std::process::Child, grace_secs: u64) {
449 use wait_timeout::ChildExt;
450 unsafe {
451 libc::kill(child.id() as i32, libc::SIGTERM);
452 }
453 match child.wait_timeout(std::time::Duration::from_secs(grace_secs)) {
454 Ok(Some(_)) => {}
455 _ => {
456 tracing::warn!(target: "process", pid = child.id(), "child ignored SIGTERM, sending SIGKILL");
457 let _ = child.kill();
458 let _ = child.wait();
459 }
460 }
461}
462
463#[cfg(not(unix))]
465pub fn terminate_gracefully(child: &mut std::process::Child, _grace_secs: u64) {
466 let _ = child.kill();
467 let _ = child.wait();
468}
469
470#[cfg(test)]
471mod tests {
472 use super::*;
473
474 #[test]
475 fn parse_output_detects_max_turns() {
476 let stdout = r#"[{"type":"system","subtype":"init","apiKeySource":"none"},{"type":"result","is_error":false,"terminal_reason":"max_turns","structured_output":{"name":"t"}}]"#;
477 let err = parse_claude_output(stdout).unwrap_err();
478 assert!(
479 format!("{err}").contains("max_turns"),
480 "must detect max_turns in output"
481 );
482 }
483
484 #[test]
485 fn parse_output_extracts_structured_value() {
486 let stdout = r#"[{"type":"system","subtype":"init","apiKeySource":"none"},{"type":"result","is_error":false,"structured_output":{"key":"val"},"total_cost_usd":0.01}]"#;
487 let result = parse_claude_output(stdout).unwrap();
488 assert_eq!(result.value["key"], "val");
489 assert!((result.cost_usd - 0.01).abs() < f64::EPSILON);
490 assert!(result.is_oauth);
491 }
492
493 #[test]
494 fn parse_output_detects_rate_limit() {
495 let stdout = r#"[{"type":"result","is_error":true,"error":"rate_limit exceeded"}]"#;
496 let err = parse_claude_output(stdout).unwrap_err();
497 assert!(
498 matches!(err, AppError::RateLimited { .. }),
499 "expected AppError::RateLimited, got: {err}"
500 );
501 }
502}