1use anyhow::{Context, Result};
7use glob::Pattern as GlobPattern;
8use ignore::WalkBuilder;
9use nucleo_matcher::pattern::{AtomKind, CaseMatching, Normalization, Pattern as FuzzyPattern};
10use nucleo_matcher::{Matcher, Utf32Str};
11use serde_json::{Value, json};
12use std::collections::HashSet;
13use std::fs::{self, Metadata};
14use std::path::{Path, PathBuf};
15
16#[derive(Debug, Clone)]
18pub struct FileSearchConfig {
19 pub max_results: usize,
21 pub follow_links: bool,
23 pub include_hidden: bool,
25 pub include_extensions: HashSet<String>,
27 pub exclude_extensions: HashSet<String>,
29 pub exclude_patterns: Vec<GlobPattern>,
31 pub max_file_size: u64,
33}
34
35impl Default for FileSearchConfig {
36 fn default() -> Self {
37 Self {
38 max_results: 1000,
39 follow_links: false,
40 include_hidden: false,
41 include_extensions: HashSet::new(),
42 exclude_extensions: HashSet::new(),
43 exclude_patterns: Vec::new(),
44 max_file_size: 0,
45 }
46 }
47}
48
49#[derive(Debug, Clone)]
51pub struct FileSearchResult {
52 pub path: PathBuf,
54 pub name: String,
56 pub extension: Option<String>,
58 pub size: u64,
60 pub is_dir: bool,
62 pub content_matches: Vec<ContentMatch>,
64}
65
66#[derive(Debug, Clone)]
68pub struct ContentMatch {
69 pub line_number: usize,
71 pub content: String,
73 pub column: usize,
75}
76
77pub struct FileSearcher {
79 root: PathBuf,
80 config: FileSearchConfig,
81}
82
83impl FileSearcher {
84 pub fn new(root: PathBuf, config: FileSearchConfig) -> Self {
86 Self { root, config }
87 }
88
89 pub fn with_default_config(root: PathBuf) -> Self {
91 Self::new(root, FileSearchConfig::default())
92 }
93
94 fn build_walk_builder(&self) -> WalkBuilder {
95 let mut builder = WalkBuilder::new(&self.root);
96 builder.follow_links(self.config.follow_links);
97 builder.hidden(!self.config.include_hidden);
98 builder.require_git(false);
99 builder.git_ignore(true);
100 builder.git_global(true);
101 builder.git_exclude(true);
102 builder
103 }
104
105 fn relative_path_string(&self, path: &Path) -> String {
106 path.strip_prefix(&self.root)
107 .unwrap_or(path)
108 .to_string_lossy()
109 .into_owned()
110 }
111
112 pub fn search_files(&self, pattern: Option<&str>) -> Result<Vec<FileSearchResult>> {
114 let mut entries: Vec<(String, FileSearchResult)> = Vec::new();
115 let max_results = self.config.max_results;
116 let compiled_pattern = pattern.and_then(compile_fuzzy_pattern);
117
118 for entry_result in self.build_walk_builder().build() {
119 let entry = match entry_result {
120 Ok(entry) => entry,
121 Err(_) => continue,
122 };
123
124 if entry.depth() == 0 {
125 continue;
126 }
127
128 let file_type = match entry.file_type() {
129 Some(file_type) => file_type,
130 None => continue,
131 };
132
133 let metadata = match entry.metadata() {
134 Ok(meta) => meta,
135 Err(_) => continue,
136 };
137
138 if self.should_exclude_entry(entry.path(), Some(&file_type), &metadata)? {
139 continue;
140 }
141
142 let path = entry.path();
143 let result = FileSearchResult {
144 path: path.to_path_buf(),
145 name: path
146 .file_name()
147 .and_then(|n| n.to_str())
148 .unwrap_or("")
149 .to_string(),
150 extension: path
151 .extension()
152 .and_then(|ext| ext.to_str())
153 .map(|ext| ext.to_string()),
154 size: metadata.len(),
155 is_dir: file_type.is_dir(),
156 content_matches: Vec::new(),
157 };
158
159 let rel_path = self.relative_path_string(path);
160 entries.push((rel_path, result));
161 }
162
163 if let Some(pattern) = compiled_pattern {
164 let mut matcher = Matcher::new(nucleo_matcher::Config::DEFAULT);
165 let mut buffer = Vec::<char>::new();
166 let mut scored = Vec::new();
167
168 for (rel_path, result) in entries {
169 buffer.clear();
170 let haystack = Utf32Str::new(rel_path.as_str(), &mut buffer);
171 if let Some(score) = pattern.score(haystack, &mut matcher) {
172 scored.push((score, rel_path, result));
173 }
174 }
175
176 scored.sort_by(|a, b| b.0.cmp(&a.0).then_with(|| a.1.cmp(&b.1)));
177 Ok(scored
178 .into_iter()
179 .take(max_results)
180 .map(|(_, _, result)| result)
181 .collect())
182 } else {
183 entries.sort_by(|a, b| a.0.cmp(&b.0));
184 Ok(entries
185 .into_iter()
186 .take(max_results)
187 .map(|(_, result)| result)
188 .collect())
189 }
190 }
191
192 pub fn search_files_with_content(
194 &self,
195 content_pattern: &str,
196 file_pattern: Option<&str>,
197 ) -> Result<Vec<FileSearchResult>> {
198 let mut results = Vec::new();
199 let max_results = self.config.max_results;
200 for entry_result in self.build_walk_builder().build() {
201 if results.len() >= max_results {
202 break;
203 }
204
205 let entry = match entry_result {
206 Ok(entry) => entry,
207 Err(_) => continue,
208 };
209
210 if entry.depth() == 0 {
211 continue;
212 }
213
214 let path = entry.path();
215
216 let file_type = match entry.file_type() {
217 Some(file_type) if file_type.is_file() => file_type,
218 _ => continue,
219 };
220
221 let metadata = match entry.metadata() {
222 Ok(meta) => meta,
223 Err(_) => continue,
224 };
225
226 if self.should_exclude_entry(path, Some(&file_type), &metadata)? {
227 continue;
228 }
229
230 if let Some(pattern) = file_pattern {
231 if !self.path_matches_pattern(path, pattern)? {
232 continue;
233 }
234 }
235
236 match self.search_content_in_file(path, content_pattern) {
237 Ok(content_matches) => {
238 if content_matches.is_empty() {
239 continue;
240 }
241
242 let file_result = FileSearchResult {
243 path: path.to_path_buf(),
244 name: path
245 .file_name()
246 .and_then(|n| n.to_str())
247 .unwrap_or("")
248 .to_string(),
249 extension: path
250 .extension()
251 .and_then(|ext| ext.to_str())
252 .map(|ext| ext.to_string()),
253 size: metadata.len(),
254 is_dir: false,
255 content_matches,
256 };
257
258 results.push(file_result);
259 }
260 Err(_) => continue,
261 }
262 }
263
264 Ok(results)
265 }
266
267 pub fn find_file_by_name(&self, file_name: &str) -> Result<Option<PathBuf>> {
269 for entry_result in self.build_walk_builder().build() {
270 let entry = match entry_result {
271 Ok(entry) => entry,
272 Err(_) => continue,
273 };
274
275 if entry.depth() == 0 {
276 continue;
277 }
278
279 let path = entry.path();
280
281 let file_type = match entry.file_type() {
282 Some(file_type) => file_type,
283 None => continue,
284 };
285
286 let metadata = match entry.metadata() {
287 Ok(meta) => meta,
288 Err(_) => continue,
289 };
290
291 if self.should_exclude_entry(path, Some(&file_type), &metadata)? {
292 continue;
293 }
294
295 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
296 if name == file_name {
297 return Ok(Some(path.to_path_buf()));
298 }
299 }
300 }
301
302 Ok(None)
303 }
304
305 fn should_exclude_entry(
307 &self,
308 path: &Path,
309 file_type: Option<&std::fs::FileType>,
310 metadata: &Metadata,
311 ) -> Result<bool> {
312 let path_str = path.to_string_lossy();
313
314 let is_effective_file = metadata.is_file() || file_type.map_or(false, |ft| ft.is_file());
315
316 if let Some(extension) = path.extension().and_then(|ext| ext.to_str()) {
317 let extension_lower = extension.to_lowercase();
318
319 if self.config.exclude_extensions.contains(&extension_lower) {
320 return Ok(true);
321 }
322
323 if !self.config.include_extensions.is_empty()
324 && !self.config.include_extensions.contains(&extension_lower)
325 {
326 return Ok(true);
327 }
328 } else if !self.config.include_extensions.is_empty() && is_effective_file {
329 return Ok(true);
330 }
331
332 for pattern in &self.config.exclude_patterns {
333 if pattern.matches(path_str.as_ref()) {
334 return Ok(true);
335 }
336 }
337
338 if is_effective_file
339 && self.config.max_file_size > 0
340 && metadata.len() > self.config.max_file_size
341 {
342 return Ok(true);
343 }
344
345 Ok(false)
346 }
347
348 fn path_matches_pattern(&self, path: &Path, pattern: &str) -> Result<bool> {
350 if let Some(compiled) = compile_fuzzy_pattern(pattern) {
351 let mut matcher = Matcher::new(nucleo_matcher::Config::DEFAULT);
352 let mut buffer = Vec::<char>::new();
353 let relative = self.relative_path_string(path);
354 let haystack = Utf32Str::new(relative.as_str(), &mut buffer);
355 Ok(compiled.score(haystack, &mut matcher).is_some())
356 } else {
357 Ok(true)
358 }
359 }
360
361 fn search_content_in_file(&self, path: &Path, pattern: &str) -> Result<Vec<ContentMatch>> {
363 let content = fs::read_to_string(path)
364 .with_context(|| format!("Failed to read file: {}", path.display()))?;
365
366 let mut matches = Vec::new();
367 let pattern_lower = pattern.to_lowercase();
368
369 for (line_num, line) in content.lines().enumerate() {
370 let line_lower = line.to_lowercase();
371 if line_lower.contains(&pattern_lower) {
372 let mut start = 0;
374 while let Some(pos) = line_lower[start..].find(&pattern_lower) {
375 let actual_pos = start + pos;
376 matches.push(ContentMatch {
377 line_number: line_num + 1,
378 content: line.to_string(),
379 column: actual_pos,
380 });
381 start = actual_pos + pattern.len();
382 }
383 }
384 }
385
386 Ok(matches)
387 }
388
389 pub fn results_to_json(results: Vec<FileSearchResult>) -> Value {
391 let json_results: Vec<Value> = results
392 .into_iter()
393 .map(|result| {
394 json!({
395 "path": result.path.to_string_lossy(),
396 "name": result.name,
397 "extension": result.extension,
398 "size": result.size,
399 "is_dir": result.is_dir,
400 "content_matches": result.content_matches.iter().map(|m| json!({
401 "line_number": m.line_number,
402 "content": m.content,
403 "column": m.column,
404 })).collect::<Vec<Value>>()
405 })
406 })
407 .collect();
408
409 json!({
410 "success": true,
411 "results": json_results,
412 "count": json_results.len()
413 })
414 }
415}
416
417fn compile_fuzzy_pattern(pattern: &str) -> Option<FuzzyPattern> {
418 let trimmed = pattern.trim();
419 if trimmed.is_empty() {
420 None
421 } else {
422 Some(FuzzyPattern::new(
423 trimmed,
424 CaseMatching::Smart,
425 Normalization::Smart,
426 AtomKind::Fuzzy,
427 ))
428 }
429}
430
431#[cfg(test)]
432mod tests {
433 use super::*;
434 use std::path::{Path, PathBuf};
435 use tempfile::TempDir;
436
437 fn collect_relative_paths(results: &[FileSearchResult], root: &Path) -> Vec<PathBuf> {
438 results
439 .iter()
440 .filter_map(|result| result.path.strip_prefix(root).ok())
441 .map(PathBuf::from)
442 .collect()
443 }
444
445 #[test]
446 fn test_file_searcher_creation() {
447 let temp_dir = TempDir::new().unwrap();
448 let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
449 assert_eq!(searcher.root, temp_dir.path());
450 }
451
452 #[test]
453 fn test_find_file_by_name() -> Result<()> {
454 let temp_dir = TempDir::new().unwrap();
455 let test_file = temp_dir.path().join("test.txt");
456 fs::write(&test_file, "test content").unwrap();
457
458 let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
459 let result = searcher.find_file_by_name("test.txt")?;
460
461 assert!(result.is_some());
462 assert_eq!(result.unwrap(), test_file);
463
464 Ok(())
465 }
466
467 #[test]
468 fn test_search_files_without_pattern_returns_sorted_entries() -> Result<()> {
469 let temp_dir = TempDir::new().unwrap();
470
471 fs::write(temp_dir.path().join("b_file.rs"), "content").unwrap();
472 fs::write(temp_dir.path().join("a_file.txt"), "content").unwrap();
473 fs::create_dir(temp_dir.path().join("subdir")).unwrap();
474 fs::write(temp_dir.path().join("subdir").join("nested.txt"), "content").unwrap();
475
476 let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
477 let results = searcher.search_files(None)?;
478
479 let relative = collect_relative_paths(&results, temp_dir.path());
480 let expected = vec![
481 PathBuf::from("a_file.txt"),
482 PathBuf::from("b_file.rs"),
483 PathBuf::from("subdir"),
484 PathBuf::from("subdir/nested.txt"),
485 ];
486
487 assert_eq!(relative, expected);
488
489 Ok(())
490 }
491
492 #[test]
493 fn test_search_files_uses_fuzzy_matching() -> Result<()> {
494 let temp_dir = TempDir::new().unwrap();
495
496 fs::create_dir(temp_dir.path().join("src")).unwrap();
497 fs::write(temp_dir.path().join("src").join("lib.rs"), "content").unwrap();
498 fs::write(temp_dir.path().join("README.md"), "docs").unwrap();
499
500 let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
501 let results = searcher.search_files(Some("srlb"))?;
502
503 let file_paths: Vec<PathBuf> = results
504 .into_iter()
505 .filter(|result| !result.is_dir)
506 .filter_map(|result| {
507 result
508 .path
509 .strip_prefix(temp_dir.path())
510 .ok()
511 .map(PathBuf::from)
512 })
513 .collect();
514
515 assert!(file_paths.contains(&PathBuf::from("src/lib.rs")));
516 assert!(!file_paths.contains(&PathBuf::from("README.md")));
517
518 Ok(())
519 }
520
521 #[test]
522 fn test_search_files_respects_gitignore() -> Result<()> {
523 let temp_dir = TempDir::new().unwrap();
524
525 fs::write(temp_dir.path().join(".gitignore"), "ignored/\n").unwrap();
526 fs::create_dir(temp_dir.path().join("ignored")).unwrap();
527 fs::write(temp_dir.path().join("ignored").join("skip.txt"), "skip").unwrap();
528 fs::write(temp_dir.path().join("include.txt"), "include").unwrap();
529
530 let searcher = FileSearcher::with_default_config(temp_dir.path().to_path_buf());
531 let results = searcher.search_files(None)?;
532
533 let relative = collect_relative_paths(&results, temp_dir.path());
534
535 assert!(relative.contains(&PathBuf::from("include.txt")));
536 assert!(!relative.contains(&PathBuf::from("ignored/skip.txt")));
537
538 Ok(())
539 }
540}