llm_coding_tools_core/operations/
grep.rs1use crate::error::{ToolError, ToolResult};
4use crate::path::PathResolver;
5use globset::Glob;
6use grep_regex::RegexMatcher;
7use grep_searcher::sinks::UTF8;
8use grep_searcher::{BinaryDetection, Searcher, SearcherBuilder};
9use ignore::WalkBuilder;
10use serde::Serialize;
11use std::fmt::Write;
12use std::path::Path;
13use std::time::SystemTime;
14
15pub const DEFAULT_MAX_LINE_LENGTH: usize = 2000;
17
18const ESTIMATED_CHARS_PER_LINE: usize = 128;
20
21#[derive(Debug, Clone, Serialize)]
23pub struct GrepLineMatch {
24 pub line_num: u64,
26 pub line_text: String,
28}
29
30#[derive(Debug, Clone, Serialize)]
32pub struct GrepFileMatches {
33 pub path: String,
35 pub matches: Vec<GrepLineMatch>,
37 #[serde(skip)]
38 pub(crate) mtime: SystemTime,
39}
40
41#[derive(Debug, Serialize)]
43pub struct GrepOutput {
44 pub files: Vec<GrepFileMatches>,
46 pub match_count: usize,
48 pub truncated: bool,
50}
51
52impl GrepOutput {
53 pub fn format<const LINE_NUMBERS: bool>(&self, limit: usize, max_line_len: usize) -> String {
64 let estimated_capacity = self.match_count * ESTIMATED_CHARS_PER_LINE;
65 let mut output = String::with_capacity(estimated_capacity);
66
67 let _ = writeln!(&mut output, "Found {} matches", self.match_count);
68
69 for file in &self.files {
70 let _ = writeln!(&mut output, "\n{}:", file.path);
71 for m in &file.matches {
72 let truncated_text = if m.line_text.len() > max_line_len {
73 &m.line_text[..m.line_text.floor_char_boundary(max_line_len)]
74 } else {
75 &m.line_text
76 };
77 if LINE_NUMBERS {
78 let _ = writeln!(&mut output, " L{}: {}", m.line_num, truncated_text);
79 } else {
80 let _ = writeln!(&mut output, " {}", truncated_text);
81 }
82 }
83 }
84
85 if self.truncated {
86 let _ = write!(&mut output, "\n(Results truncated at {} matches)", limit);
87 }
88
89 output
90 }
91}
92
93pub fn grep_search<R: PathResolver>(
98 resolver: &R,
99 pattern: &str,
100 include: Option<&str>,
101 search_path: &str,
102 limit: usize,
103) -> ToolResult<GrepOutput> {
104 let path = resolver.resolve(search_path)?;
105
106 let matcher =
107 RegexMatcher::new(pattern).map_err(|e| ToolError::InvalidPattern(e.to_string()))?;
108
109 let glob_matcher = include
111 .map(|pattern| Glob::new(pattern).map(|glob| glob.compile_matcher()))
112 .transpose()?;
113
114 let mut searcher = SearcherBuilder::new()
115 .binary_detection(BinaryDetection::quit(0))
116 .build();
117
118 let mut files: Vec<GrepFileMatches> = Vec::with_capacity(64);
119
120 let walker = WalkBuilder::new(&path)
121 .hidden(false)
122 .git_ignore(true)
123 .git_global(true)
124 .git_exclude(true)
125 .build();
126
127 for entry_result in walker {
128 let entry = match entry_result {
129 Ok(e) => e,
130 Err(_) => continue,
131 };
132
133 match entry.file_type() {
135 Some(ft) if ft.is_file() => {}
136 _ => continue,
137 }
138
139 let entry_path = entry.path();
140
141 if let Some(ref matcher) = glob_matcher {
143 let file_name = match entry_path.file_name().and_then(|n| n.to_str()) {
144 Some(name) => name,
145 None => continue,
146 };
147 if !matcher.is_match(file_name) {
148 continue;
149 }
150 }
151
152 let matches = collect_file_matches(&matcher, &mut searcher, entry_path);
153 if matches.is_empty() {
154 continue;
155 }
156
157 let mtime = entry
158 .metadata()
159 .ok()
160 .and_then(|m| m.modified().ok())
161 .unwrap_or(SystemTime::UNIX_EPOCH);
162
163 files.push(GrepFileMatches {
164 path: entry_path.to_string_lossy().into_owned(),
165 matches,
166 mtime,
167 });
168 }
169
170 files.sort_by(|a, b| b.mtime.cmp(&a.mtime));
172
173 let mut match_count = 0;
174 let mut truncate_at = files.len();
175 let mut truncated = false;
176
177 for (x, file) in files.iter_mut().enumerate() {
179 let remaining = limit - match_count;
180 if file.matches.len() > remaining {
181 file.matches.truncate(remaining);
182 match_count += remaining;
183 truncate_at = x + 1;
184 truncated = true;
185 break;
186 }
187 match_count += file.matches.len();
188 }
189
190 files.truncate(truncate_at);
191
192 Ok(GrepOutput {
193 files,
194 match_count,
195 truncated,
196 })
197}
198
199#[inline]
200fn collect_file_matches(
201 matcher: &RegexMatcher,
202 searcher: &mut Searcher,
203 path: &Path,
204) -> Vec<GrepLineMatch> {
205 let mut matches = Vec::new();
206
207 let _ = searcher.search_path(
208 matcher,
209 path,
210 UTF8(|line_num, line| {
211 matches.push(GrepLineMatch {
212 line_num,
213 line_text: line.trim_end().to_string(),
214 });
215 Ok(true)
216 }),
217 );
218
219 matches
220}
221
222#[cfg(test)]
223mod tests {
224 use super::*;
225 use crate::path::AbsolutePathResolver;
226 use tempfile::tempdir;
227
228 #[test]
229 fn grep_finds_matches() {
230 let temp = tempdir().unwrap();
231 std::fs::write(temp.path().join("match.txt"), "hello world").unwrap();
232 let resolver = AbsolutePathResolver;
233
234 let result =
235 grep_search(&resolver, "hello", None, temp.path().to_str().unwrap(), 10).unwrap();
236
237 assert_eq!(result.files.len(), 1);
238 assert_eq!(result.match_count, 1);
239 }
240
241 #[test]
242 fn grep_respects_glob_filter() {
243 let temp = tempdir().unwrap();
244 std::fs::write(temp.path().join("match.rs"), "hello").unwrap();
245 std::fs::write(temp.path().join("match.txt"), "hello").unwrap();
246 let resolver = AbsolutePathResolver;
247
248 let result = grep_search(
249 &resolver,
250 "hello",
251 Some("*.rs"),
252 temp.path().to_str().unwrap(),
253 10,
254 )
255 .unwrap();
256
257 assert_eq!(result.files.len(), 1);
258 assert!(result.files[0].path.ends_with(".rs"));
259 }
260}