llm_coding_tools_core/operations/
grep.rs

1//! Grep content search operation.
2
3use crate::error::{ToolError, ToolResult};
4use crate::path::PathResolver;
5use globset::Glob;
6use grep_regex::RegexMatcher;
7use grep_searcher::sinks::UTF8;
8use grep_searcher::{BinaryDetection, Searcher, SearcherBuilder};
9use ignore::WalkBuilder;
10use serde::Serialize;
11use std::fmt::Write;
12use std::path::Path;
13use std::time::SystemTime;
14
15/// Default maximum line length (in bytes) for formatted grep output.
16pub const DEFAULT_MAX_LINE_LENGTH: usize = 2000;
17
18/// Above average length of a file path.
19const ESTIMATED_CHARS_PER_LINE: usize = 128;
20
21/// A single line match within a file.
22#[derive(Debug, Clone, Serialize)]
23pub struct GrepLineMatch {
24    /// 1-indexed line number.
25    pub line_num: u64,
26    /// Content of the matched line.
27    pub line_text: String,
28}
29
30/// All matches within a single file.
31#[derive(Debug, Clone, Serialize)]
32pub struct GrepFileMatches {
33    /// File path.
34    pub path: String,
35    /// Matches in this file, in line order.
36    pub matches: Vec<GrepLineMatch>,
37    #[serde(skip)]
38    pub(crate) mtime: SystemTime,
39}
40
41/// Output from grep search.
42#[derive(Debug, Serialize)]
43pub struct GrepOutput {
44    /// Files with matches, sorted by modification time (newest first).
45    pub files: Vec<GrepFileMatches>,
46    /// Total match count across all files.
47    pub match_count: usize,
48    /// Whether results were truncated due to limit.
49    pub truncated: bool,
50}
51
52impl GrepOutput {
53    /// Formats grep results as human-readable text.
54    ///
55    /// # Type Parameters
56    ///
57    /// * `LINE_NUMBERS` - When `true`, prefixes each match with `L{num}: `
58    ///
59    /// # Arguments
60    ///
61    /// * `limit` - The original match limit (used in truncation message)
62    /// * `max_line_len` - Truncate lines exceeding this byte length at UTF-8 boundary
63    pub fn format<const LINE_NUMBERS: bool>(&self, limit: usize, max_line_len: usize) -> String {
64        let estimated_capacity = self.match_count * ESTIMATED_CHARS_PER_LINE;
65        let mut output = String::with_capacity(estimated_capacity);
66
67        let _ = writeln!(&mut output, "Found {} matches", self.match_count);
68
69        for file in &self.files {
70            let _ = writeln!(&mut output, "\n{}:", file.path);
71            for m in &file.matches {
72                let truncated_text = if m.line_text.len() > max_line_len {
73                    &m.line_text[..m.line_text.floor_char_boundary(max_line_len)]
74                } else {
75                    &m.line_text
76                };
77                if LINE_NUMBERS {
78                    let _ = writeln!(&mut output, "  L{}: {}", m.line_num, truncated_text);
79                } else {
80                    let _ = writeln!(&mut output, "  {}", truncated_text);
81                }
82            }
83        }
84
85        if self.truncated {
86            let _ = write!(&mut output, "\n(Results truncated at {} matches)", limit);
87        }
88
89        output
90    }
91}
92
93/// Searches for content matching a regex pattern.
94///
95/// Results are sorted by modification time (newest first).
96/// Binary files are automatically skipped.
97pub fn grep_search<R: PathResolver>(
98    resolver: &R,
99    pattern: &str,
100    include: Option<&str>,
101    search_path: &str,
102    limit: usize,
103) -> ToolResult<GrepOutput> {
104    let path = resolver.resolve(search_path)?;
105
106    let matcher =
107        RegexMatcher::new(pattern).map_err(|e| ToolError::InvalidPattern(e.to_string()))?;
108
109    // Optional filename filter via glob.
110    let glob_matcher = include
111        .map(|pattern| Glob::new(pattern).map(|glob| glob.compile_matcher()))
112        .transpose()?;
113
114    let mut searcher = SearcherBuilder::new()
115        .binary_detection(BinaryDetection::quit(0))
116        .build();
117
118    let mut files: Vec<GrepFileMatches> = Vec::with_capacity(64);
119
120    let walker = WalkBuilder::new(&path)
121        .hidden(false)
122        .git_ignore(true)
123        .git_global(true)
124        .git_exclude(true)
125        .build();
126
127    for entry_result in walker {
128        let entry = match entry_result {
129            Ok(e) => e,
130            Err(_) => continue,
131        };
132
133        // Skip directories and non-regular files.
134        match entry.file_type() {
135            Some(ft) if ft.is_file() => {}
136            _ => continue,
137        }
138
139        let entry_path = entry.path();
140
141        // Apply include glob to basename when requested.
142        if let Some(ref matcher) = glob_matcher {
143            let file_name = match entry_path.file_name().and_then(|n| n.to_str()) {
144                Some(name) => name,
145                None => continue,
146            };
147            if !matcher.is_match(file_name) {
148                continue;
149            }
150        }
151
152        let matches = collect_file_matches(&matcher, &mut searcher, entry_path);
153        if matches.is_empty() {
154            continue;
155        }
156
157        let mtime = entry
158            .metadata()
159            .ok()
160            .and_then(|m| m.modified().ok())
161            .unwrap_or(SystemTime::UNIX_EPOCH);
162
163        files.push(GrepFileMatches {
164            path: entry_path.to_string_lossy().into_owned(),
165            matches,
166            mtime,
167        });
168    }
169
170    // Sort newest files first.
171    files.sort_by(|a, b| b.mtime.cmp(&a.mtime));
172
173    let mut match_count = 0;
174    let mut truncate_at = files.len();
175    let mut truncated = false;
176
177    // Enforce overall match limit across files.
178    for (x, file) in files.iter_mut().enumerate() {
179        let remaining = limit - match_count;
180        if file.matches.len() > remaining {
181            file.matches.truncate(remaining);
182            match_count += remaining;
183            truncate_at = x + 1;
184            truncated = true;
185            break;
186        }
187        match_count += file.matches.len();
188    }
189
190    files.truncate(truncate_at);
191
192    Ok(GrepOutput {
193        files,
194        match_count,
195        truncated,
196    })
197}
198
199#[inline]
200fn collect_file_matches(
201    matcher: &RegexMatcher,
202    searcher: &mut Searcher,
203    path: &Path,
204) -> Vec<GrepLineMatch> {
205    let mut matches = Vec::new();
206
207    let _ = searcher.search_path(
208        matcher,
209        path,
210        UTF8(|line_num, line| {
211            matches.push(GrepLineMatch {
212                line_num,
213                line_text: line.trim_end().to_string(),
214            });
215            Ok(true)
216        }),
217    );
218
219    matches
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use crate::path::AbsolutePathResolver;
226    use tempfile::tempdir;
227
228    #[test]
229    fn grep_finds_matches() {
230        let temp = tempdir().unwrap();
231        std::fs::write(temp.path().join("match.txt"), "hello world").unwrap();
232        let resolver = AbsolutePathResolver;
233
234        let result =
235            grep_search(&resolver, "hello", None, temp.path().to_str().unwrap(), 10).unwrap();
236
237        assert_eq!(result.files.len(), 1);
238        assert_eq!(result.match_count, 1);
239    }
240
241    #[test]
242    fn grep_respects_glob_filter() {
243        let temp = tempdir().unwrap();
244        std::fs::write(temp.path().join("match.rs"), "hello").unwrap();
245        std::fs::write(temp.path().join("match.txt"), "hello").unwrap();
246        let resolver = AbsolutePathResolver;
247
248        let result = grep_search(
249            &resolver,
250            "hello",
251            Some("*.rs"),
252            temp.path().to_str().unwrap(),
253            10,
254        )
255        .unwrap();
256
257        assert_eq!(result.files.len(), 1);
258        assert!(result.files[0].path.ends_with(".rs"));
259    }
260}