Skip to main content

ai_agent/utils/
query_helpers.rs

1//! Query helpers utilities
2//!
3//! Ported from ~/claudecode/openclaudecode/src/utils/queryHelpers.ts
4//! Provides utilities for ripgrep search, file state caching from message history,
5//! and bash tool extraction.
6
7use std::collections::{HashMap, HashSet};
8use std::fs;
9use std::path::{Path, PathBuf};
10use std::process::Command;
11
12// ---------------------------------------------------------------------------
13// Ripgrep helpers
14// ---------------------------------------------------------------------------
15
16/// Parse ripgrep output to extract matched file paths.
17///
18/// Ripgrep's output with `--files-with-matches` flag returns one file path per line.
19pub fn parse_rg_output(output: &str) -> Vec<String> {
20    output
21        .lines()
22        .filter(|line| !line.is_empty())
23        .map(|line| line.trim().to_string())
24        .collect()
25}
26
27/// Search with ripgrep for a pattern in the given path.
28///
29/// Uses ripgrep's `--files-with-matches` to return matching file paths.
30/// Returns an error message if ripgrep is not available or the search fails.
31pub fn search_with_rg(pattern: &str, path: &str) -> Result<String, String> {
32    let output = Command::new("rg")
33        .arg("--files-with-matches")
34        .arg("--no-heading")
35        .arg("--line-number")
36        .arg(pattern)
37        .arg(path)
38        .output()
39        .map_err(|e| format!("Failed to execute ripgrep: {}", e))?;
40
41    if !output.status.success() {
42        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
43        // Exit code 1 from ripgrep means no matches found (not an error)
44        if output.status.code() == Some(1) {
45            return Ok(String::new());
46        }
47        return Err(if !stderr.is_empty() {
48            stderr.trim().to_string()
49        } else {
50            format!(
51                "ripgrep exited with code {}",
52                output.status.code().unwrap_or(-1)
53            )
54        });
55    }
56
57    Ok(String::from_utf8_lossy(&output.stdout).to_string())
58}
59
60// ---------------------------------------------------------------------------
61// File state cache (from extractReadFilesFromMessages)
62// ---------------------------------------------------------------------------
63
64/// Cached state of a file at a point in time.
65#[derive(Debug, Clone)]
66pub struct FileStateEntry {
67    /// Content of the file
68    pub content: String,
69    /// Timestamp when the content was captured (epoch millis)
70    pub timestamp: u64,
71    /// Optional offset if this is a ranged read
72    pub offset: Option<u64>,
73    /// Optional limit if this is a ranged read
74    pub limit: Option<u64>,
75}
76
77/// Cache of file states extracted from message history.
78#[derive(Debug, Clone)]
79pub struct FileStateCache {
80    entries: lru::LruCache<String, FileStateEntry>,
81}
82
83impl FileStateCache {
84    /// Create a new file state cache with the given maximum size.
85    pub fn new(max_size: usize) -> Self {
86        Self {
87            entries: lru::LruCache::new(std::num::NonZero::new(max_size).unwrap()),
88        }
89    }
90
91    /// Insert a file state entry into the cache.
92    pub fn set(&mut self, path: impl Into<String>, entry: FileStateEntry) {
93        self.entries.put(path.into(), entry);
94    }
95
96    /// Get a file state entry from the cache.
97    pub fn get(&self, path: &str) -> Option<&FileStateEntry> {
98        self.entries.peek(path)
99    }
100
101    /// Check if the cache contains an entry for the given path.
102    pub fn contains(&self, path: &str) -> bool {
103        self.entries.contains(path)
104    }
105
106    /// Get the number of entries in the cache.
107    pub fn len(&self) -> usize {
108        self.entries.len()
109    }
110
111    /// Check if the cache is empty.
112    pub fn is_empty(&self) -> bool {
113        self.entries.is_empty()
114    }
115}
116
117/// FileReadTool input schema.
118#[derive(Debug, Clone)]
119struct FileReadInput {
120    file_path: Option<String>,
121    offset: Option<u64>,
122    limit: Option<u64>,
123}
124
125impl FileReadInput {
126    fn from_value(v: &serde_json::Value) -> Option<Self> {
127        Some(FileReadInput {
128            file_path: v
129                .get("file_path")
130                .and_then(|v| v.as_str())
131                .map(String::from),
132            offset: v.get("offset").and_then(|v| v.as_u64()),
133            limit: v.get("limit").and_then(|v| v.as_u64()),
134        })
135    }
136}
137
138/// FileWriteTool input schema.
139#[derive(Debug, Clone)]
140struct FileWriteInput {
141    file_path: Option<String>,
142    content: Option<String>,
143}
144
145impl FileWriteInput {
146    fn from_value(v: &serde_json::Value) -> Option<Self> {
147        Some(FileWriteInput {
148            file_path: v
149                .get("file_path")
150                .and_then(|v| v.as_str())
151                .map(String::from),
152            content: v.get("content").and_then(|v| v.as_str()).map(String::from),
153        })
154    }
155}
156
157/// FileEditTool input schema.
158#[derive(Debug, Clone)]
159struct FileEditInput {
160    file_path: Option<String>,
161}
162
163impl FileEditInput {
164    fn from_value(v: &serde_json::Value) -> Option<Self> {
165        Some(FileEditInput {
166            file_path: v
167                .get("file_path")
168                .and_then(|v| v.as_str())
169                .map(String::from),
170        })
171    }
172}
173
174/// Stub text for unchanged files in read tool results.
175const FILE_UNCHANGED_STUB: &str = "(file unchanged)";
176
177/// Expand a path to an absolute path, resolving `~` and relative paths.
178fn expand_path(path: &str, cwd: &str) -> String {
179    let p = if path.starts_with("~") {
180        if let Some(home) = dirs::home_dir() {
181            let rest = path.trim_start_matches("~");
182            let rest = rest.trim_start_matches('/');
183            home.join(rest)
184        } else {
185            PathBuf::from(path)
186        }
187    } else if Path::new(path).is_relative() {
188        PathBuf::from(cwd).join(path)
189    } else {
190        PathBuf::from(path)
191    };
192
193    p.to_string_lossy().to_string()
194}
195
196/// Strip line number prefix from ripgrep output lines (e.g., "123:content" -> "content").
197fn strip_line_number_prefix(line: &str) -> &str {
198    if let Some(pos) = line.find(':') {
199        if line[..pos].chars().all(|c| c.is_ascii_digit()) {
200            return &line[pos + 1..];
201        }
202    }
203    line
204}
205
206/// Extract read files from messages and build a file state cache.
207///
208/// First pass: find all FileReadTool/FileWriteTool/FileEditTool uses in assistant messages.
209/// Second pass: find corresponding tool results and extract content.
210///
211/// # Arguments
212/// * `messages` - Message history to extract from
213/// * `cwd` - Current working directory for path resolution
214/// * `max_size` - Maximum number of entries in the cache
215pub fn extract_read_files_from_messages(
216    messages: &[serde_json::Value],
217    cwd: &str,
218    max_size: usize,
219) -> FileStateCache {
220    let mut cache = FileStateCache::new(max_size);
221
222    // Tool name constants matching the TS source
223    const FILE_READ_TOOL_NAME: &str = "Read";
224    const FILE_WRITE_TOOL_NAME: &str = "Write";
225    const FILE_EDIT_TOOL_NAME: &str = "Edit";
226
227    // First pass: find all FileReadTool/FileWriteTool/FileEditTool uses in assistant messages
228    let mut file_read_tool_use_ids: HashMap<String, String> = HashMap::new(); // toolUseId -> filePath
229    let mut file_write_tool_use_ids: HashMap<String, (String, String)> = HashMap::new(); // toolUseId -> (filePath, content)
230    let mut file_edit_tool_use_ids: HashMap<String, String> = HashMap::new(); // toolUseId -> filePath
231
232    for message in messages {
233        if let Some(msg_type) = message.get("type").and_then(|v| v.as_str()) {
234            if msg_type == "assistant" {
235                if let Some(content) = message.get("message").and_then(|v| v.get("content")) {
236                    if let Some(blocks) = content.as_array() {
237                        for block in blocks {
238                            if let Some(block_type) = block.get("type").and_then(|v| v.as_str()) {
239                                if block_type == "tool_use" {
240                                    let tool_name =
241                                        block.get("name").and_then(|v| v.as_str()).unwrap_or("");
242                                    let tool_id =
243                                        block.get("id").and_then(|v| v.as_str()).unwrap_or("");
244                                    let input = block.get("input");
245
246                                    if let Some(input) = input {
247                                        match tool_name {
248                                            FILE_READ_TOOL_NAME => {
249                                                if let Some(read_input) =
250                                                    FileReadInput::from_value(input)
251                                                {
252                                                    // Ranged reads are not added to the cache
253                                                    if let Some(fp) = read_input.file_path {
254                                                        if read_input.offset.is_none()
255                                                            && read_input.limit.is_none()
256                                                        {
257                                                            let abs_path = expand_path(&fp, cwd);
258                                                            file_read_tool_use_ids.insert(
259                                                                tool_id.to_string(),
260                                                                abs_path,
261                                                            );
262                                                        }
263                                                    }
264                                                }
265                                            }
266                                            FILE_WRITE_TOOL_NAME => {
267                                                if let Some(write_input) =
268                                                    FileWriteInput::from_value(input)
269                                                {
270                                                    if let (Some(fp), Some(content)) =
271                                                        (write_input.file_path, write_input.content)
272                                                    {
273                                                        let abs_path = expand_path(&fp, cwd);
274                                                        file_write_tool_use_ids.insert(
275                                                            tool_id.to_string(),
276                                                            (abs_path, content),
277                                                        );
278                                                    }
279                                                }
280                                            }
281                                            FILE_EDIT_TOOL_NAME => {
282                                                if let Some(edit_input) =
283                                                    FileEditInput::from_value(input)
284                                                {
285                                                    if let Some(fp) = edit_input.file_path {
286                                                        let abs_path = expand_path(&fp, cwd);
287                                                        file_edit_tool_use_ids
288                                                            .insert(tool_id.to_string(), abs_path);
289                                                    }
290                                                }
291                                            }
292                                            _ => {}
293                                        }
294                                    }
295                                }
296                            }
297                        }
298                    }
299                }
300            }
301        }
302    }
303
304    // Second pass: find corresponding tool results and extract content
305    for message in messages {
306        if let Some(msg_type) = message.get("type").and_then(|v| v.as_str()) {
307            if msg_type == "user" {
308                if let Some(content) = message.get("message").and_then(|v| v.get("content")) {
309                    if let Some(blocks) = content.as_array() {
310                        for block in blocks {
311                            if let Some(block_type) = block.get("type").and_then(|v| v.as_str()) {
312                                if block_type == "tool_result" {
313                                    let tool_use_id =
314                                        block.get("tool_use_id").and_then(|v| v.as_str());
315
316                                    if let Some(tool_use_id) = tool_use_id {
317                                        // Handle Read tool results
318                                        if let Some(read_file_path) =
319                                            file_read_tool_use_ids.get(tool_use_id)
320                                        {
321                                            if let Some(result_content) =
322                                                block.get("content").and_then(|v| v.as_str())
323                                            {
324                                                // Dedup stubs contain no file content
325                                                if !result_content.starts_with(FILE_UNCHANGED_STUB)
326                                                {
327                                                    // Remove system-reminder blocks using regex
328                                                    let re = regex::Regex::new(
329                                                        r"<system-reminder>[\s\S]*?</system-reminder>",
330                                                    ).ok();
331                                                    let processed = if let Some(ref re) = re {
332                                                        re.replace_all(result_content, "")
333                                                            .to_string()
334                                                    } else {
335                                                        result_content.to_string()
336                                                    };
337
338                                                    // Strip line number prefixes
339                                                    let file_content: String = processed
340                                                        .lines()
341                                                        .map(strip_line_number_prefix)
342                                                        .collect::<Vec<_>>()
343                                                        .join("\n")
344                                                        .trim()
345                                                        .to_string();
346
347                                                    // Cache the file content
348                                                    let timestamp = message
349                                                        .get("timestamp")
350                                                        .and_then(|v| v.as_str())
351                                                        .and_then(|ts| {
352                                                            chrono::DateTime::parse_from_rfc3339(ts)
353                                                                .ok()
354                                                                .map(|dt| {
355                                                                    dt.timestamp_millis() as u64
356                                                                })
357                                                        })
358                                                        .unwrap_or(0);
359
360                                                    cache.set(
361                                                        read_file_path.clone(),
362                                                        FileStateEntry {
363                                                            content: file_content,
364                                                            timestamp,
365                                                            offset: None,
366                                                            limit: None,
367                                                        },
368                                                    );
369                                                }
370                                            }
371                                        }
372
373                                        // Handle Write tool results
374                                        if let Some((file_path, content)) =
375                                            file_write_tool_use_ids.get(tool_use_id)
376                                        {
377                                            let timestamp = message
378                                                .get("timestamp")
379                                                .and_then(|v| v.as_str())
380                                                .and_then(|ts| {
381                                                    chrono::DateTime::parse_from_rfc3339(ts)
382                                                        .ok()
383                                                        .map(|dt| dt.timestamp_millis() as u64)
384                                                })
385                                                .unwrap_or(0);
386
387                                            cache.set(
388                                                file_path.clone(),
389                                                FileStateEntry {
390                                                    content: content.clone(),
391                                                    timestamp,
392                                                    offset: None,
393                                                    limit: None,
394                                                },
395                                            );
396                                        }
397
398                                        // Handle Edit tool results
399                                        if let Some(edit_file_path) =
400                                            file_edit_tool_use_ids.get(tool_use_id)
401                                        {
402                                            let is_error = block
403                                                .get("is_error")
404                                                .and_then(|v| v.as_bool())
405                                                .unwrap_or(false);
406
407                                            if !is_error {
408                                                // Read current disk state for edit results
409                                                if let Ok(disk_content) =
410                                                    fs::read_to_string(edit_file_path)
411                                                {
412                                                    // Use file mtime as timestamp
413                                                    let timestamp = fs::metadata(edit_file_path)
414                                                        .ok()
415                                                        .and_then(|m| m.modified().ok())
416                                                        .and_then(|t| {
417                                                            t.duration_since(std::time::UNIX_EPOCH)
418                                                                .ok()
419                                                                .map(|d| d.as_millis() as u64)
420                                                        })
421                                                        .unwrap_or(0);
422
423                                                    cache.set(
424                                                        edit_file_path.clone(),
425                                                        FileStateEntry {
426                                                            content: disk_content,
427                                                            timestamp,
428                                                            offset: None,
429                                                            limit: None,
430                                                        },
431                                                    );
432                                                }
433                                            }
434                                        }
435                                    }
436                                }
437                            }
438                        }
439                    }
440                }
441            }
442        }
443    }
444
445    cache
446}
447
448// ---------------------------------------------------------------------------
449// Bash tool extraction (from extractBashToolsFromMessages)
450// ---------------------------------------------------------------------------
451
452/// Stripped command prefixes to skip when extracting CLI names.
453const STRIPPED_COMMANDS: &[&str] = &["sudo"];
454
455/// Extract the top-level CLI tools used in BashTool calls from message history.
456///
457/// Returns a deduplicated set of command names (e.g. 'vercel', 'aws', 'git').
458/// Skips environment variable assignments and prefixes in STRIPPED_COMMANDS.
459pub fn extract_bash_tools_from_messages(messages: &[serde_json::Value]) -> HashSet<String> {
460    let mut tools = HashSet::new();
461
462    for message in messages {
463        if let Some(msg_type) = message.get("type").and_then(|v| v.as_str()) {
464            if msg_type == "assistant" {
465                if let Some(content) = message.get("message").and_then(|v| v.get("content")) {
466                    if let Some(blocks) = content.as_array() {
467                        for block in blocks {
468                            if let Some(block_type) = block.get("type").and_then(|v| v.as_str()) {
469                                if block_type == "tool_use" {
470                                    let tool_name =
471                                        block.get("name").and_then(|v| v.as_str()).unwrap_or("");
472                                    if tool_name == "Bash" {
473                                        if let Some(input) = block.get("input") {
474                                            if let Some(command) =
475                                                input.get("command").and_then(|v| v.as_str())
476                                            {
477                                                if let Some(cli_name) = extract_cli_name(command) {
478                                                    tools.insert(cli_name);
479                                                }
480                                            }
481                                        }
482                                    }
483                                }
484                            }
485                        }
486                    }
487                }
488            }
489        }
490    }
491
492    tools
493}
494
495/// Extract the actual CLI name from a bash command string, skipping
496/// env var assignments (e.g. `FOO=bar vercel` -> `vercel`) and prefixes
497/// in STRIPPED_COMMANDS.
498fn extract_cli_name(command: &str) -> Option<String> {
499    let tokens: Vec<&str> = command.trim().split_whitespace().collect();
500    for token in tokens {
501        // Skip env var assignments
502        if token.contains('=')
503            && token
504                .chars()
505                .next()
506                .map(|c| c.is_ascii_alphabetic() || c == '_')
507                .unwrap_or(false)
508        {
509            continue;
510        }
511        // Skip stripped commands
512        if STRIPPED_COMMANDS.contains(&token) {
513            continue;
514        }
515        return Some(token.to_string());
516    }
517    None
518}
519
520/// Check if a result should be considered successful based on the last message.
521///
522/// Returns true if:
523/// - Last message is assistant with text/thinking content
524/// - Last message is user with only tool_result blocks
525/// - Last message is the user prompt but the API completed with end_turn
526pub fn is_result_successful(
527    message: Option<&serde_json::Value>,
528    stop_reason: Option<&str>,
529) -> bool {
530    let Some(msg) = message else {
531        return false;
532    };
533
534    if let Some(msg_type) = msg.get("type").and_then(|v| v.as_str()) {
535        if msg_type == "assistant" {
536            if let Some(content) = msg.get("message").and_then(|v| v.get("content")) {
537                if let Some(blocks) = content.as_array() {
538                    if let Some(last_block) = blocks.last() {
539                        if let Some(block_type) = last_block.get("type").and_then(|v| v.as_str()) {
540                            return matches!(block_type, "text" | "thinking" | "redacted_thinking");
541                        }
542                    }
543                }
544            }
545        }
546
547        if msg_type == "user" {
548            if let Some(content) = msg.get("message").and_then(|v| v.get("content")) {
549                if let Some(blocks) = content.as_array() {
550                    if !blocks.is_empty() {
551                        return blocks.iter().all(|block| {
552                            block
553                                .get("type")
554                                .and_then(|v| v.as_str())
555                                .map(|t| t == "tool_result")
556                                .unwrap_or(false)
557                        });
558                    }
559                }
560            }
561        }
562
563        // API completed with end_turn but yielded no assistant content
564        if stop_reason == Some("end_turn") {
565            return true;
566        }
567    }
568
569    false
570}
571
572#[cfg(test)]
573mod tests {
574    use super::*;
575
576    #[test]
577    fn test_parse_rg_output_empty() {
578        assert!(parse_rg_output("").is_empty());
579        assert!(parse_rg_output("\n\n").is_empty());
580    }
581
582    #[test]
583    fn test_parse_rg_output_with_paths() {
584        let output = "src/file1.rs\nsrc/file2.rs\n\n";
585        let result = parse_rg_output(output);
586        assert_eq!(result, vec!["src/file1.rs", "src/file2.rs"]);
587    }
588
589    #[test]
590    fn test_extract_cli_name_basic() {
591        assert_eq!(extract_cli_name("git status"), Some("git".to_string()));
592        assert_eq!(extract_cli_name("ls -la"), Some("ls".to_string()));
593    }
594
595    #[test]
596    fn test_extract_cli_name_env_vars() {
597        assert_eq!(
598            extract_cli_name("FOO=bar vercel deploy"),
599            Some("vercel".to_string())
600        );
601    }
602
603    #[test]
604    fn test_extract_cli_name_sudo() {
605        assert_eq!(extract_cli_name("sudo rm -rf /tmp"), Some("rm".to_string()));
606    }
607
608    #[test]
609    fn test_strip_line_number_prefix() {
610        assert_eq!(strip_line_number_prefix("123:hello world"), "hello world");
611        assert_eq!(strip_line_number_prefix("hello"), "hello");
612        assert_eq!(
613            strip_line_number_prefix("abc:not a number prefix"),
614            "abc:not a number prefix"
615        );
616    }
617
618    #[test]
619    fn test_expand_path_absolute() {
620        let result = expand_path("/absolute/path", "/cwd");
621        assert_eq!(result, "/absolute/path");
622    }
623
624    #[test]
625    fn test_expand_path_relative() {
626        let result = expand_path("relative/path", "/cwd");
627        assert_eq!(result, "/cwd/relative/path");
628    }
629
630    #[test]
631    fn test_file_state_cache() {
632        let mut cache = FileStateCache::new(5);
633        assert!(cache.is_empty());
634
635        cache.set(
636            "/test/file.rs",
637            FileStateEntry {
638                content: "hello".to_string(),
639                timestamp: 12345,
640                offset: None,
641                limit: None,
642            },
643        );
644
645        assert_eq!(cache.len(), 1);
646        assert!(cache.contains("/test/file.rs"));
647
648        let entry = cache.get("/test/file.rs").unwrap();
649        assert_eq!(entry.content, "hello");
650    }
651
652    #[test]
653    fn test_is_result_successful_assistant() {
654        let msg = serde_json::json!({
655            "type": "assistant",
656            "message": { "content": [{ "type": "text", "text": "Hello" }] }
657        });
658        assert!(is_result_successful(Some(&msg), None));
659
660        let msg2 = serde_json::json!({
661            "type": "assistant",
662            "message": { "content": [{ "type": "thinking", "text": "..." }] }
663        });
664        assert!(is_result_successful(Some(&msg2), None));
665    }
666
667    #[test]
668    fn test_is_result_successful_user_tool_result() {
669        let msg = serde_json::json!({
670            "type": "user",
671            "message": { "content": [{ "type": "tool_result" }] }
672        });
673        assert!(is_result_successful(Some(&msg), None));
674    }
675
676    #[test]
677    fn test_is_result_successful_end_turn() {
678        let msg = serde_json::json!({
679            "type": "user",
680            "message": { "content": "prompt" }
681        });
682        assert!(is_result_successful(Some(&msg), Some("end_turn")));
683        assert!(!is_result_successful(Some(&msg), None));
684    }
685
686    #[test]
687    fn test_is_result_successful_none() {
688        assert!(!is_result_successful(None, None));
689    }
690}