ai_agent/utils/query_helpers.rs
1//! Query helpers utilities
2//!
3//! Ported from ~/claudecode/openclaudecode/src/utils/queryHelpers.ts
4//! Provides utilities for ripgrep search, file state caching from message history,
5//! and bash tool extraction.
6
7use std::collections::{HashMap, HashSet};
8use std::fs;
9use std::path::{Path, PathBuf};
10use std::process::Command;
11
12// ---------------------------------------------------------------------------
13// Ripgrep helpers
14// ---------------------------------------------------------------------------
15
16/// Parse ripgrep output to extract matched file paths.
17///
18/// Ripgrep's output with `--files-with-matches` flag returns one file path per line.
19pub fn parse_rg_output(output: &str) -> Vec<String> {
20 output
21 .lines()
22 .filter(|line| !line.is_empty())
23 .map(|line| line.trim().to_string())
24 .collect()
25}
26
27/// Search with ripgrep for a pattern in the given path.
28///
29/// Uses ripgrep's `--files-with-matches` to return matching file paths.
30/// Returns an error message if ripgrep is not available or the search fails.
31pub fn search_with_rg(pattern: &str, path: &str) -> Result<String, String> {
32 let output = Command::new("rg")
33 .arg("--files-with-matches")
34 .arg("--no-heading")
35 .arg("--line-number")
36 .arg(pattern)
37 .arg(path)
38 .output()
39 .map_err(|e| format!("Failed to execute ripgrep: {}", e))?;
40
41 if !output.status.success() {
42 let stderr = String::from_utf8_lossy(&output.stderr).to_string();
43 // Exit code 1 from ripgrep means no matches found (not an error)
44 if output.status.code() == Some(1) {
45 return Ok(String::new());
46 }
47 return Err(if !stderr.is_empty() {
48 stderr.trim().to_string()
49 } else {
50 format!(
51 "ripgrep exited with code {}",
52 output.status.code().unwrap_or(-1)
53 )
54 });
55 }
56
57 Ok(String::from_utf8_lossy(&output.stdout).to_string())
58}
59
60// ---------------------------------------------------------------------------
61// File state cache (from extractReadFilesFromMessages)
62// ---------------------------------------------------------------------------
63
64/// Cached state of a file at a point in time.
65#[derive(Debug, Clone)]
66pub struct FileStateEntry {
67 /// Content of the file
68 pub content: String,
69 /// Timestamp when the content was captured (epoch millis)
70 pub timestamp: u64,
71 /// Optional offset if this is a ranged read
72 pub offset: Option<u64>,
73 /// Optional limit if this is a ranged read
74 pub limit: Option<u64>,
75}
76
77/// Cache of file states extracted from message history.
78#[derive(Debug, Clone)]
79pub struct FileStateCache {
80 entries: lru::LruCache<String, FileStateEntry>,
81}
82
83impl FileStateCache {
84 /// Create a new file state cache with the given maximum size.
85 pub fn new(max_size: usize) -> Self {
86 Self {
87 entries: lru::LruCache::new(std::num::NonZero::new(max_size).unwrap()),
88 }
89 }
90
91 /// Insert a file state entry into the cache.
92 pub fn set(&mut self, path: impl Into<String>, entry: FileStateEntry) {
93 self.entries.put(path.into(), entry);
94 }
95
96 /// Get a file state entry from the cache.
97 pub fn get(&self, path: &str) -> Option<&FileStateEntry> {
98 self.entries.peek(path)
99 }
100
101 /// Check if the cache contains an entry for the given path.
102 pub fn contains(&self, path: &str) -> bool {
103 self.entries.contains(path)
104 }
105
106 /// Get the number of entries in the cache.
107 pub fn len(&self) -> usize {
108 self.entries.len()
109 }
110
111 /// Check if the cache is empty.
112 pub fn is_empty(&self) -> bool {
113 self.entries.is_empty()
114 }
115}
116
117/// FileReadTool input schema.
118#[derive(Debug, Clone)]
119struct FileReadInput {
120 file_path: Option<String>,
121 offset: Option<u64>,
122 limit: Option<u64>,
123}
124
125impl FileReadInput {
126 fn from_value(v: &serde_json::Value) -> Option<Self> {
127 Some(FileReadInput {
128 file_path: v
129 .get("file_path")
130 .and_then(|v| v.as_str())
131 .map(String::from),
132 offset: v.get("offset").and_then(|v| v.as_u64()),
133 limit: v.get("limit").and_then(|v| v.as_u64()),
134 })
135 }
136}
137
138/// FileWriteTool input schema.
139#[derive(Debug, Clone)]
140struct FileWriteInput {
141 file_path: Option<String>,
142 content: Option<String>,
143}
144
145impl FileWriteInput {
146 fn from_value(v: &serde_json::Value) -> Option<Self> {
147 Some(FileWriteInput {
148 file_path: v
149 .get("file_path")
150 .and_then(|v| v.as_str())
151 .map(String::from),
152 content: v.get("content").and_then(|v| v.as_str()).map(String::from),
153 })
154 }
155}
156
157/// FileEditTool input schema.
158#[derive(Debug, Clone)]
159struct FileEditInput {
160 file_path: Option<String>,
161}
162
163impl FileEditInput {
164 fn from_value(v: &serde_json::Value) -> Option<Self> {
165 Some(FileEditInput {
166 file_path: v
167 .get("file_path")
168 .and_then(|v| v.as_str())
169 .map(String::from),
170 })
171 }
172}
173
174/// Stub text for unchanged files in read tool results.
175const FILE_UNCHANGED_STUB: &str = "(file unchanged)";
176
177/// Expand a path to an absolute path, resolving `~` and relative paths.
178fn expand_path(path: &str, cwd: &str) -> String {
179 let p = if path.starts_with("~") {
180 if let Some(home) = dirs::home_dir() {
181 let rest = path.trim_start_matches("~");
182 let rest = rest.trim_start_matches('/');
183 home.join(rest)
184 } else {
185 PathBuf::from(path)
186 }
187 } else if Path::new(path).is_relative() {
188 PathBuf::from(cwd).join(path)
189 } else {
190 PathBuf::from(path)
191 };
192
193 p.to_string_lossy().to_string()
194}
195
196/// Strip line number prefix from ripgrep output lines (e.g., "123:content" -> "content").
197fn strip_line_number_prefix(line: &str) -> &str {
198 if let Some(pos) = line.find(':') {
199 if line[..pos].chars().all(|c| c.is_ascii_digit()) {
200 return &line[pos + 1..];
201 }
202 }
203 line
204}
205
206/// Extract read files from messages and build a file state cache.
207///
208/// First pass: find all FileReadTool/FileWriteTool/FileEditTool uses in assistant messages.
209/// Second pass: find corresponding tool results and extract content.
210///
211/// # Arguments
212/// * `messages` - Message history to extract from
213/// * `cwd` - Current working directory for path resolution
214/// * `max_size` - Maximum number of entries in the cache
215pub fn extract_read_files_from_messages(
216 messages: &[serde_json::Value],
217 cwd: &str,
218 max_size: usize,
219) -> FileStateCache {
220 let mut cache = FileStateCache::new(max_size);
221
222 // Tool name constants matching the TS source
223 const FILE_READ_TOOL_NAME: &str = "Read";
224 const FILE_WRITE_TOOL_NAME: &str = "Write";
225 const FILE_EDIT_TOOL_NAME: &str = "Edit";
226
227 // First pass: find all FileReadTool/FileWriteTool/FileEditTool uses in assistant messages
228 let mut file_read_tool_use_ids: HashMap<String, String> = HashMap::new(); // toolUseId -> filePath
229 let mut file_write_tool_use_ids: HashMap<String, (String, String)> = HashMap::new(); // toolUseId -> (filePath, content)
230 let mut file_edit_tool_use_ids: HashMap<String, String> = HashMap::new(); // toolUseId -> filePath
231
232 for message in messages {
233 if let Some(msg_type) = message.get("type").and_then(|v| v.as_str()) {
234 if msg_type == "assistant" {
235 if let Some(content) = message.get("message").and_then(|v| v.get("content")) {
236 if let Some(blocks) = content.as_array() {
237 for block in blocks {
238 if let Some(block_type) = block.get("type").and_then(|v| v.as_str()) {
239 if block_type == "tool_use" {
240 let tool_name =
241 block.get("name").and_then(|v| v.as_str()).unwrap_or("");
242 let tool_id =
243 block.get("id").and_then(|v| v.as_str()).unwrap_or("");
244 let input = block.get("input");
245
246 if let Some(input) = input {
247 match tool_name {
248 FILE_READ_TOOL_NAME => {
249 if let Some(read_input) =
250 FileReadInput::from_value(input)
251 {
252 // Ranged reads are not added to the cache
253 if let Some(fp) = read_input.file_path {
254 if read_input.offset.is_none()
255 && read_input.limit.is_none()
256 {
257 let abs_path = expand_path(&fp, cwd);
258 file_read_tool_use_ids.insert(
259 tool_id.to_string(),
260 abs_path,
261 );
262 }
263 }
264 }
265 }
266 FILE_WRITE_TOOL_NAME => {
267 if let Some(write_input) =
268 FileWriteInput::from_value(input)
269 {
270 if let (Some(fp), Some(content)) =
271 (write_input.file_path, write_input.content)
272 {
273 let abs_path = expand_path(&fp, cwd);
274 file_write_tool_use_ids.insert(
275 tool_id.to_string(),
276 (abs_path, content),
277 );
278 }
279 }
280 }
281 FILE_EDIT_TOOL_NAME => {
282 if let Some(edit_input) =
283 FileEditInput::from_value(input)
284 {
285 if let Some(fp) = edit_input.file_path {
286 let abs_path = expand_path(&fp, cwd);
287 file_edit_tool_use_ids
288 .insert(tool_id.to_string(), abs_path);
289 }
290 }
291 }
292 _ => {}
293 }
294 }
295 }
296 }
297 }
298 }
299 }
300 }
301 }
302 }
303
304 // Second pass: find corresponding tool results and extract content
305 for message in messages {
306 if let Some(msg_type) = message.get("type").and_then(|v| v.as_str()) {
307 if msg_type == "user" {
308 if let Some(content) = message.get("message").and_then(|v| v.get("content")) {
309 if let Some(blocks) = content.as_array() {
310 for block in blocks {
311 if let Some(block_type) = block.get("type").and_then(|v| v.as_str()) {
312 if block_type == "tool_result" {
313 let tool_use_id =
314 block.get("tool_use_id").and_then(|v| v.as_str());
315
316 if let Some(tool_use_id) = tool_use_id {
317 // Handle Read tool results
318 if let Some(read_file_path) =
319 file_read_tool_use_ids.get(tool_use_id)
320 {
321 if let Some(result_content) =
322 block.get("content").and_then(|v| v.as_str())
323 {
324 // Dedup stubs contain no file content
325 if !result_content.starts_with(FILE_UNCHANGED_STUB)
326 {
327 // Remove system-reminder blocks using regex
328 let re = regex::Regex::new(
329 r"<system-reminder>[\s\S]*?</system-reminder>",
330 ).ok();
331 let processed = if let Some(ref re) = re {
332 re.replace_all(result_content, "")
333 .to_string()
334 } else {
335 result_content.to_string()
336 };
337
338 // Strip line number prefixes
339 let file_content: String = processed
340 .lines()
341 .map(strip_line_number_prefix)
342 .collect::<Vec<_>>()
343 .join("\n")
344 .trim()
345 .to_string();
346
347 // Cache the file content
348 let timestamp = message
349 .get("timestamp")
350 .and_then(|v| v.as_str())
351 .and_then(|ts| {
352 chrono::DateTime::parse_from_rfc3339(ts)
353 .ok()
354 .map(|dt| {
355 dt.timestamp_millis() as u64
356 })
357 })
358 .unwrap_or(0);
359
360 cache.set(
361 read_file_path.clone(),
362 FileStateEntry {
363 content: file_content,
364 timestamp,
365 offset: None,
366 limit: None,
367 },
368 );
369 }
370 }
371 }
372
373 // Handle Write tool results
374 if let Some((file_path, content)) =
375 file_write_tool_use_ids.get(tool_use_id)
376 {
377 let timestamp = message
378 .get("timestamp")
379 .and_then(|v| v.as_str())
380 .and_then(|ts| {
381 chrono::DateTime::parse_from_rfc3339(ts)
382 .ok()
383 .map(|dt| dt.timestamp_millis() as u64)
384 })
385 .unwrap_or(0);
386
387 cache.set(
388 file_path.clone(),
389 FileStateEntry {
390 content: content.clone(),
391 timestamp,
392 offset: None,
393 limit: None,
394 },
395 );
396 }
397
398 // Handle Edit tool results
399 if let Some(edit_file_path) =
400 file_edit_tool_use_ids.get(tool_use_id)
401 {
402 let is_error = block
403 .get("is_error")
404 .and_then(|v| v.as_bool())
405 .unwrap_or(false);
406
407 if !is_error {
408 // Read current disk state for edit results
409 if let Ok(disk_content) =
410 fs::read_to_string(edit_file_path)
411 {
412 // Use file mtime as timestamp
413 let timestamp = fs::metadata(edit_file_path)
414 .ok()
415 .and_then(|m| m.modified().ok())
416 .and_then(|t| {
417 t.duration_since(std::time::UNIX_EPOCH)
418 .ok()
419 .map(|d| d.as_millis() as u64)
420 })
421 .unwrap_or(0);
422
423 cache.set(
424 edit_file_path.clone(),
425 FileStateEntry {
426 content: disk_content,
427 timestamp,
428 offset: None,
429 limit: None,
430 },
431 );
432 }
433 }
434 }
435 }
436 }
437 }
438 }
439 }
440 }
441 }
442 }
443 }
444
445 cache
446}
447
448// ---------------------------------------------------------------------------
449// Bash tool extraction (from extractBashToolsFromMessages)
450// ---------------------------------------------------------------------------
451
452/// Stripped command prefixes to skip when extracting CLI names.
453const STRIPPED_COMMANDS: &[&str] = &["sudo"];
454
455/// Extract the top-level CLI tools used in BashTool calls from message history.
456///
457/// Returns a deduplicated set of command names (e.g. 'vercel', 'aws', 'git').
458/// Skips environment variable assignments and prefixes in STRIPPED_COMMANDS.
459pub fn extract_bash_tools_from_messages(messages: &[serde_json::Value]) -> HashSet<String> {
460 let mut tools = HashSet::new();
461
462 for message in messages {
463 if let Some(msg_type) = message.get("type").and_then(|v| v.as_str()) {
464 if msg_type == "assistant" {
465 if let Some(content) = message.get("message").and_then(|v| v.get("content")) {
466 if let Some(blocks) = content.as_array() {
467 for block in blocks {
468 if let Some(block_type) = block.get("type").and_then(|v| v.as_str()) {
469 if block_type == "tool_use" {
470 let tool_name =
471 block.get("name").and_then(|v| v.as_str()).unwrap_or("");
472 if tool_name == "Bash" {
473 if let Some(input) = block.get("input") {
474 if let Some(command) =
475 input.get("command").and_then(|v| v.as_str())
476 {
477 if let Some(cli_name) = extract_cli_name(command) {
478 tools.insert(cli_name);
479 }
480 }
481 }
482 }
483 }
484 }
485 }
486 }
487 }
488 }
489 }
490 }
491
492 tools
493}
494
495/// Extract the actual CLI name from a bash command string, skipping
496/// env var assignments (e.g. `FOO=bar vercel` -> `vercel`) and prefixes
497/// in STRIPPED_COMMANDS.
498fn extract_cli_name(command: &str) -> Option<String> {
499 let tokens: Vec<&str> = command.trim().split_whitespace().collect();
500 for token in tokens {
501 // Skip env var assignments
502 if token.contains('=')
503 && token
504 .chars()
505 .next()
506 .map(|c| c.is_ascii_alphabetic() || c == '_')
507 .unwrap_or(false)
508 {
509 continue;
510 }
511 // Skip stripped commands
512 if STRIPPED_COMMANDS.contains(&token) {
513 continue;
514 }
515 return Some(token.to_string());
516 }
517 None
518}
519
520/// Check if a result should be considered successful based on the last message.
521///
522/// Returns true if:
523/// - Last message is assistant with text/thinking content
524/// - Last message is user with only tool_result blocks
525/// - Last message is the user prompt but the API completed with end_turn
526pub fn is_result_successful(
527 message: Option<&serde_json::Value>,
528 stop_reason: Option<&str>,
529) -> bool {
530 let Some(msg) = message else {
531 return false;
532 };
533
534 if let Some(msg_type) = msg.get("type").and_then(|v| v.as_str()) {
535 if msg_type == "assistant" {
536 if let Some(content) = msg.get("message").and_then(|v| v.get("content")) {
537 if let Some(blocks) = content.as_array() {
538 if let Some(last_block) = blocks.last() {
539 if let Some(block_type) = last_block.get("type").and_then(|v| v.as_str()) {
540 return matches!(block_type, "text" | "thinking" | "redacted_thinking");
541 }
542 }
543 }
544 }
545 }
546
547 if msg_type == "user" {
548 if let Some(content) = msg.get("message").and_then(|v| v.get("content")) {
549 if let Some(blocks) = content.as_array() {
550 if !blocks.is_empty() {
551 return blocks.iter().all(|block| {
552 block
553 .get("type")
554 .and_then(|v| v.as_str())
555 .map(|t| t == "tool_result")
556 .unwrap_or(false)
557 });
558 }
559 }
560 }
561 }
562
563 // API completed with end_turn but yielded no assistant content
564 if stop_reason == Some("end_turn") {
565 return true;
566 }
567 }
568
569 false
570}
571
572#[cfg(test)]
573mod tests {
574 use super::*;
575
576 #[test]
577 fn test_parse_rg_output_empty() {
578 assert!(parse_rg_output("").is_empty());
579 assert!(parse_rg_output("\n\n").is_empty());
580 }
581
582 #[test]
583 fn test_parse_rg_output_with_paths() {
584 let output = "src/file1.rs\nsrc/file2.rs\n\n";
585 let result = parse_rg_output(output);
586 assert_eq!(result, vec!["src/file1.rs", "src/file2.rs"]);
587 }
588
589 #[test]
590 fn test_extract_cli_name_basic() {
591 assert_eq!(extract_cli_name("git status"), Some("git".to_string()));
592 assert_eq!(extract_cli_name("ls -la"), Some("ls".to_string()));
593 }
594
595 #[test]
596 fn test_extract_cli_name_env_vars() {
597 assert_eq!(
598 extract_cli_name("FOO=bar vercel deploy"),
599 Some("vercel".to_string())
600 );
601 }
602
603 #[test]
604 fn test_extract_cli_name_sudo() {
605 assert_eq!(extract_cli_name("sudo rm -rf /tmp"), Some("rm".to_string()));
606 }
607
608 #[test]
609 fn test_strip_line_number_prefix() {
610 assert_eq!(strip_line_number_prefix("123:hello world"), "hello world");
611 assert_eq!(strip_line_number_prefix("hello"), "hello");
612 assert_eq!(
613 strip_line_number_prefix("abc:not a number prefix"),
614 "abc:not a number prefix"
615 );
616 }
617
618 #[test]
619 fn test_expand_path_absolute() {
620 let result = expand_path("/absolute/path", "/cwd");
621 assert_eq!(result, "/absolute/path");
622 }
623
624 #[test]
625 fn test_expand_path_relative() {
626 let result = expand_path("relative/path", "/cwd");
627 assert_eq!(result, "/cwd/relative/path");
628 }
629
630 #[test]
631 fn test_file_state_cache() {
632 let mut cache = FileStateCache::new(5);
633 assert!(cache.is_empty());
634
635 cache.set(
636 "/test/file.rs",
637 FileStateEntry {
638 content: "hello".to_string(),
639 timestamp: 12345,
640 offset: None,
641 limit: None,
642 },
643 );
644
645 assert_eq!(cache.len(), 1);
646 assert!(cache.contains("/test/file.rs"));
647
648 let entry = cache.get("/test/file.rs").unwrap();
649 assert_eq!(entry.content, "hello");
650 }
651
652 #[test]
653 fn test_is_result_successful_assistant() {
654 let msg = serde_json::json!({
655 "type": "assistant",
656 "message": { "content": [{ "type": "text", "text": "Hello" }] }
657 });
658 assert!(is_result_successful(Some(&msg), None));
659
660 let msg2 = serde_json::json!({
661 "type": "assistant",
662 "message": { "content": [{ "type": "thinking", "text": "..." }] }
663 });
664 assert!(is_result_successful(Some(&msg2), None));
665 }
666
667 #[test]
668 fn test_is_result_successful_user_tool_result() {
669 let msg = serde_json::json!({
670 "type": "user",
671 "message": { "content": [{ "type": "tool_result" }] }
672 });
673 assert!(is_result_successful(Some(&msg), None));
674 }
675
676 #[test]
677 fn test_is_result_successful_end_turn() {
678 let msg = serde_json::json!({
679 "type": "user",
680 "message": { "content": "prompt" }
681 });
682 assert!(is_result_successful(Some(&msg), Some("end_turn")));
683 assert!(!is_result_successful(Some(&msg), None));
684 }
685
686 #[test]
687 fn test_is_result_successful_none() {
688 assert!(!is_result_successful(None, None));
689 }
690}