1pub mod classifier;
3pub mod fallback;
5pub mod fuzzy;
7pub mod matcher;
9pub mod ranking;
11pub mod simd;
13pub mod trigram;
15
16use anyhow::{Context, Result};
17use grep_regex::RegexMatcher;
18use grep_searcher::{BinaryDetection, Searcher as GrepSearcher, SearcherBuilder, Sink, SinkMatch};
19use ignore::{
20 WalkBuilder,
21 overrides::{Override, OverrideBuilder},
22};
23use std::io;
24use std::path::{Path, PathBuf};
25
26const UNSUPPORTED_TEXT_SEARCHER_MODE: &str = "is not supported by the text searcher. Use FallbackSearchEngine for semantic search or CandidateGenerator for fuzzy search.";
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum SearchMode {
34 Text,
36 Regex,
38 Semantic,
40 Fuzzy,
42}
43
44#[derive(Debug, Clone, serde::Serialize)]
46pub struct Match {
47 pub path: PathBuf,
49 pub line: u32,
51 pub line_text: String,
53 pub byte_offset: usize,
55}
56
57#[derive(Debug, Clone)]
59pub struct SearchConfig {
60 pub mode: SearchMode,
62 pub case_insensitive: bool,
64 pub include_hidden: bool,
66 pub follow_symlinks: bool,
68 pub max_depth: Option<usize>,
70 pub file_types: Vec<String>,
72 pub exclude_patterns: Vec<String>,
74 pub before_context: usize,
76 pub after_context: usize,
78}
79
80impl Default for SearchConfig {
81 fn default() -> Self {
82 Self {
83 mode: SearchMode::Regex,
84 case_insensitive: false,
85 include_hidden: false,
86 follow_symlinks: false,
87 max_depth: None,
88 file_types: Vec::new(),
89 exclude_patterns: Vec::new(),
90 before_context: 2,
91 after_context: 2,
92 }
93 }
94}
95
96pub struct Searcher {
98 searcher: grep_searcher::Searcher,
99}
100
101struct MatchSink<'a> {
103 path: &'a Path,
104 matches: Vec<Match>,
105}
106
107impl<'a> MatchSink<'a> {
108 fn new(path: &'a Path) -> Self {
109 Self {
110 path,
111 matches: Vec::new(),
112 }
113 }
114
115 fn into_matches(self) -> Vec<Match> {
116 self.matches
117 }
118}
119
120impl Sink for MatchSink<'_> {
121 type Error = io::Error;
122
123 fn matched(
124 &mut self,
125 _searcher: &GrepSearcher,
126 mat: &SinkMatch<'_>,
127 ) -> Result<bool, io::Error> {
128 let line_text = String::from_utf8_lossy(mat.bytes()).to_string();
129 let line_number = mat
131 .line_number()
132 .unwrap_or(1)
133 .min(u64::from(u32::MAX))
134 .try_into()
135 .unwrap_or(u32::MAX);
136 let byte_offset = mat.absolute_byte_offset().try_into().unwrap_or(usize::MAX);
138
139 self.matches.push(Match {
140 path: self.path.to_path_buf(),
141 line: line_number,
142 line_text,
143 byte_offset,
144 });
145
146 Ok(true)
147 }
148}
149
150impl Searcher {
151 pub fn new() -> Result<Self> {
157 let searcher = SearcherBuilder::new()
158 .binary_detection(BinaryDetection::quit(0))
159 .line_number(true)
160 .build();
161
162 Ok(Self { searcher })
163 }
164
165 pub fn search<P: AsRef<Path>>(
172 &self,
173 pattern: &str,
174 paths: &[P],
175 config: &SearchConfig,
176 ) -> Result<Vec<Match>> {
177 let mut all_matches = Vec::new();
178 let matcher = Self::build_matcher(pattern, config)?;
179
180 for path in paths {
181 let path_matches = self.search_path(&matcher, path.as_ref(), config)?;
182 all_matches.extend(path_matches);
183 }
184
185 Ok(all_matches)
186 }
187
188 fn build_matcher(pattern: &str, config: &SearchConfig) -> Result<RegexMatcher> {
189 let mut matcher_builder = grep_regex::RegexMatcherBuilder::new();
190 matcher_builder.case_insensitive(config.case_insensitive);
191
192 let pattern_to_use = Self::pattern_for_mode(pattern, config.mode)?;
193
194 matcher_builder.build(&pattern_to_use).map_err(Into::into)
195 }
196
197 fn pattern_for_mode(pattern: &str, mode: SearchMode) -> Result<String> {
198 match mode {
199 SearchMode::Text => Ok(regex::escape(pattern)),
200 SearchMode::Regex => Ok(pattern.to_string()),
201 SearchMode::Semantic | SearchMode::Fuzzy => Err(anyhow::anyhow!(
202 "SearchMode::{mode:?} {UNSUPPORTED_TEXT_SEARCHER_MODE}"
203 )),
204 }
205 }
206
207 fn search_path(
208 &self,
209 matcher: &RegexMatcher,
210 path: &Path,
211 config: &SearchConfig,
212 ) -> Result<Vec<Match>> {
213 let walker = Self::build_walker(path, config)?;
214 let mut match_results = Vec::new();
215
216 for entry in walker {
217 let entry = entry?;
218 if !Self::is_searchable_entry(&entry, config) {
219 continue;
220 }
221
222 let path = entry.path();
223 let file_matches = self
224 .search_file(matcher, path)
225 .with_context(|| format!("Failed to search file: {}", path.display()))?;
226 match_results.extend(file_matches);
227 }
228
229 Ok(match_results)
230 }
231
232 fn matches_file_type(path: &Path, config: &SearchConfig) -> bool {
233 if config.file_types.is_empty() {
234 return true;
235 }
236
237 let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
238 return false;
239 };
240
241 config.file_types.iter().any(|candidate| candidate == ext)
242 }
243
244 fn build_walker(path: &Path, config: &SearchConfig) -> Result<ignore::Walk> {
246 let mut builder = WalkBuilder::new(path);
247
248 Self::configure_walker(&mut builder, config);
249 Self::apply_exclude_overrides(&mut builder, path, &config.exclude_patterns)?;
250
251 Ok(builder.build())
252 }
253
254 fn configure_walker(builder: &mut WalkBuilder, config: &SearchConfig) {
255 builder
256 .hidden(!config.include_hidden)
257 .git_ignore(true)
258 .git_global(true)
259 .git_exclude(true)
260 .follow_links(config.follow_symlinks);
261
262 if let Some(max_depth) = config.max_depth {
263 builder.max_depth(Some(max_depth));
264 }
265 }
266
267 fn apply_exclude_overrides(
268 builder: &mut WalkBuilder,
269 path: &Path,
270 exclude_patterns: &[String],
271 ) -> Result<()> {
272 if exclude_patterns.is_empty() {
273 return Ok(());
274 }
275
276 let overrides = Self::build_exclude_overrides(path, exclude_patterns)?;
277 builder.overrides(overrides);
278 Ok(())
279 }
280
281 fn build_exclude_overrides(path: &Path, exclude_patterns: &[String]) -> Result<Override> {
282 let mut override_builder = OverrideBuilder::new(path);
294 for pattern in exclude_patterns {
295 override_builder
297 .add(&format!("!{pattern}"))
298 .with_context(|| format!("Invalid exclude pattern: {pattern}"))?;
299 }
300 override_builder
301 .build()
302 .context("Failed to build exclude overrides")
303 }
304
305 fn is_searchable_entry(entry: &ignore::DirEntry, config: &SearchConfig) -> bool {
306 let path = entry.path();
307 path.is_file() && Self::matches_file_type(path, config)
308 }
309
310 fn search_file(&self, matcher: &RegexMatcher, path: &Path) -> Result<Vec<Match>> {
312 let mut searcher = self.searcher.clone();
313 let mut sink = MatchSink::new(path);
314
315 searcher
316 .search_path(matcher, path, &mut sink)
317 .map_err(|e| anyhow::anyhow!("Search failed: {e}"))?;
318
319 Ok(sink.into_matches())
320 }
321}
322
323#[cfg(test)]
324mod tests {
325 use super::*;
326 use std::io::Write;
327 use tempfile::TempDir;
328
329 #[test]
331 fn test_search_mode_equality() {
332 assert_eq!(SearchMode::Text, SearchMode::Text);
333 assert_eq!(SearchMode::Regex, SearchMode::Regex);
334 assert_eq!(SearchMode::Semantic, SearchMode::Semantic);
335 assert_eq!(SearchMode::Fuzzy, SearchMode::Fuzzy);
336 }
337
338 #[test]
339 fn test_search_mode_inequality() {
340 assert_ne!(SearchMode::Text, SearchMode::Regex);
341 assert_ne!(SearchMode::Semantic, SearchMode::Fuzzy);
342 }
343
344 #[test]
345 fn test_search_mode_clone() {
346 let mode = SearchMode::Regex;
347 let cloned = mode;
348 assert_eq!(mode, cloned);
349 }
350
351 #[test]
352 fn test_search_mode_debug() {
353 let debug = format!("{:?}", SearchMode::Text);
354 assert!(debug.contains("Text"));
355 }
356
357 #[test]
359 fn test_match_creation() {
360 let m = Match {
361 path: PathBuf::from("test.rs"),
362 line: 42,
363 line_text: "fn test() {}".to_string(),
364 byte_offset: 100,
365 };
366
367 assert_eq!(m.path, PathBuf::from("test.rs"));
368 assert_eq!(m.line, 42);
369 assert_eq!(m.line_text, "fn test() {}");
370 assert_eq!(m.byte_offset, 100);
371 }
372
373 #[test]
374 fn test_match_clone() {
375 let m = Match {
376 path: PathBuf::from("test.rs"),
377 line: 1,
378 line_text: "test".to_string(),
379 byte_offset: 0,
380 };
381
382 let cloned = m.clone();
383 assert_eq!(m.path, cloned.path);
384 assert_eq!(m.line, cloned.line);
385 }
386
387 #[test]
388 fn test_match_debug() {
389 let m = Match {
390 path: PathBuf::from("test.rs"),
391 line: 1,
392 line_text: "test".to_string(),
393 byte_offset: 0,
394 };
395
396 let debug = format!("{:?}", m);
397 assert!(debug.contains("Match"));
398 assert!(debug.contains("test.rs"));
399 }
400
401 #[test]
402 fn test_match_serialize() {
403 let m = Match {
404 path: PathBuf::from("test.rs"),
405 line: 10,
406 line_text: "hello".to_string(),
407 byte_offset: 50,
408 };
409
410 let json = serde_json::to_string(&m).unwrap();
411 assert!(json.contains("test.rs"));
412 assert!(json.contains("hello"));
413 assert!(json.contains("10"));
414 }
415
416 #[test]
418 fn test_search_config_default() {
419 let config = SearchConfig::default();
420
421 assert_eq!(config.mode, SearchMode::Regex);
422 assert!(!config.case_insensitive);
423 assert!(!config.include_hidden);
424 assert!(!config.follow_symlinks);
425 assert!(config.max_depth.is_none());
426 assert!(config.file_types.is_empty());
427 assert!(config.exclude_patterns.is_empty());
428 assert_eq!(config.before_context, 2);
429 assert_eq!(config.after_context, 2);
430 }
431
432 #[test]
433 fn test_search_config_custom() {
434 let config = SearchConfig {
435 mode: SearchMode::Text,
436 case_insensitive: true,
437 include_hidden: true,
438 follow_symlinks: true,
439 max_depth: Some(5),
440 file_types: vec!["rs".to_string(), "js".to_string()],
441 exclude_patterns: vec!["*.min.js".to_string()],
442 before_context: 3,
443 after_context: 3,
444 };
445
446 assert_eq!(config.mode, SearchMode::Text);
447 assert!(config.case_insensitive);
448 assert!(config.include_hidden);
449 assert!(config.follow_symlinks);
450 assert_eq!(config.max_depth, Some(5));
451 assert_eq!(config.file_types.len(), 2);
452 assert_eq!(config.exclude_patterns.len(), 1);
453 }
454
455 #[test]
456 fn test_search_config_clone() {
457 let config = SearchConfig {
458 mode: SearchMode::Fuzzy,
459 case_insensitive: true,
460 ..Default::default()
461 };
462
463 let cloned = config.clone();
464 assert_eq!(config.mode, cloned.mode);
465 assert_eq!(config.case_insensitive, cloned.case_insensitive);
466 }
467
468 #[test]
469 fn test_search_config_debug() {
470 let config = SearchConfig::default();
471 let debug = format!("{:?}", config);
472 assert!(debug.contains("SearchConfig"));
473 assert!(debug.contains("mode"));
474 }
475
476 #[test]
478 fn test_searcher_new() {
479 let searcher = Searcher::new();
480 assert!(searcher.is_ok());
481 }
482
483 #[test]
484 fn test_searcher_text_search() {
485 let tmp_dir = TempDir::new().unwrap();
486 let file_path = tmp_dir.path().join("test.rs");
487 let mut file = std::fs::File::create(&file_path).unwrap();
488 writeln!(file, "fn main() {{").unwrap();
489 writeln!(file, " println!(\"hello world\");").unwrap();
490 writeln!(file, "}}").unwrap();
491 drop(file);
492
493 let searcher = Searcher::new().unwrap();
494 let config = SearchConfig {
495 mode: SearchMode::Text,
496 ..Default::default()
497 };
498
499 let matches = searcher
500 .search("hello", &[tmp_dir.path()], &config)
501 .unwrap();
502
503 assert_eq!(matches.len(), 1);
504 assert!(matches[0].line_text.contains("hello world"));
505 assert_eq!(matches[0].line, 2);
506 }
507
508 #[test]
509 fn test_searcher_regex_search() {
510 let tmp_dir = TempDir::new().unwrap();
511 let file_path = tmp_dir.path().join("test.rs");
512 let mut file = std::fs::File::create(&file_path).unwrap();
513 writeln!(file, "let x = 123;").unwrap();
514 writeln!(file, "let y = 456;").unwrap();
515 writeln!(file, "let z = abc;").unwrap();
516 drop(file);
517
518 let searcher = Searcher::new().unwrap();
519 let config = SearchConfig {
520 mode: SearchMode::Regex,
521 ..Default::default()
522 };
523
524 let matches = searcher.search(r"\d+", &[tmp_dir.path()], &config).unwrap();
526
527 assert_eq!(matches.len(), 2);
528 }
529
530 #[test]
531 fn test_searcher_case_insensitive() {
532 let tmp_dir = TempDir::new().unwrap();
533 let file_path = tmp_dir.path().join("test.txt");
534 let mut file = std::fs::File::create(&file_path).unwrap();
535 writeln!(file, "Hello World").unwrap();
536 writeln!(file, "HELLO WORLD").unwrap();
537 writeln!(file, "hello world").unwrap();
538 drop(file);
539
540 let searcher = Searcher::new().unwrap();
541 let config = SearchConfig {
542 mode: SearchMode::Text,
543 case_insensitive: true,
544 ..Default::default()
545 };
546
547 let matches = searcher
548 .search("hello", &[tmp_dir.path()], &config)
549 .unwrap();
550
551 assert_eq!(matches.len(), 3);
552 }
553
554 #[test]
555 fn test_searcher_file_type_filter() {
556 let tmp_dir = TempDir::new().unwrap();
557
558 let rs_file = tmp_dir.path().join("test.rs");
559 std::fs::write(&rs_file, "fn test() {}").unwrap();
560
561 let js_file = tmp_dir.path().join("test.js");
562 std::fs::write(&js_file, "function test() {}").unwrap();
563
564 let searcher = Searcher::new().unwrap();
565 let config = SearchConfig {
566 mode: SearchMode::Text,
567 file_types: vec!["rs".to_string()],
568 ..Default::default()
569 };
570
571 let matches = searcher.search("test", &[tmp_dir.path()], &config).unwrap();
572
573 assert_eq!(matches.len(), 1);
575 assert!(matches[0].path.to_string_lossy().ends_with(".rs"));
576 }
577
578 #[test]
579 fn test_searcher_no_matches() {
580 let tmp_dir = TempDir::new().unwrap();
581 let file_path = tmp_dir.path().join("test.rs");
582 std::fs::write(&file_path, "fn main() {}").unwrap();
583
584 let searcher = Searcher::new().unwrap();
585 let config = SearchConfig::default();
586
587 let matches = searcher
588 .search("nonexistent_pattern_xyz", &[tmp_dir.path()], &config)
589 .unwrap();
590
591 assert!(matches.is_empty());
592 }
593
594 #[test]
595 fn test_searcher_semantic_mode_unsupported() {
596 let tmp_dir = TempDir::new().unwrap();
597 let file_path = tmp_dir.path().join("test.rs");
598 std::fs::write(&file_path, "fn main() {}").unwrap();
599
600 let searcher = Searcher::new().unwrap();
601 let config = SearchConfig {
602 mode: SearchMode::Semantic,
603 ..Default::default()
604 };
605
606 let result = searcher.search("test", &[tmp_dir.path()], &config);
607
608 assert!(result.is_err());
609 let err_msg = result.unwrap_err().to_string();
610 assert!(err_msg.contains("Semantic"));
611 assert!(err_msg.contains("not supported by the text searcher"));
612 }
613
614 #[test]
615 fn test_searcher_fuzzy_mode_unsupported() {
616 let tmp_dir = TempDir::new().unwrap();
617 let file_path = tmp_dir.path().join("test.rs");
618 std::fs::write(&file_path, "fn main() {}").unwrap();
619
620 let searcher = Searcher::new().unwrap();
621 let config = SearchConfig {
622 mode: SearchMode::Fuzzy,
623 ..Default::default()
624 };
625
626 let result = searcher.search("test", &[tmp_dir.path()], &config);
627
628 assert!(result.is_err());
629 let err_msg = result.unwrap_err().to_string();
630 assert!(err_msg.contains("Fuzzy"));
631 assert!(err_msg.contains("not supported by the text searcher"));
632 }
633
634 #[test]
635 fn test_searcher_multiple_files() {
636 let tmp_dir = TempDir::new().unwrap();
637
638 std::fs::write(tmp_dir.path().join("a.rs"), "fn test_a() {}").unwrap();
639 std::fs::write(tmp_dir.path().join("b.rs"), "fn test_b() {}").unwrap();
640 std::fs::write(tmp_dir.path().join("c.rs"), "fn other() {}").unwrap();
641
642 let searcher = Searcher::new().unwrap();
643 let config = SearchConfig::default();
644
645 let matches = searcher
646 .search("test_", &[tmp_dir.path()], &config)
647 .unwrap();
648
649 assert_eq!(matches.len(), 2);
650 }
651
652 #[test]
653 fn test_searcher_max_depth() {
654 let tmp_dir = TempDir::new().unwrap();
655
656 let nested = tmp_dir.path().join("level1").join("level2");
658 std::fs::create_dir_all(&nested).unwrap();
659
660 std::fs::write(tmp_dir.path().join("root.rs"), "fn test() {}").unwrap();
661 std::fs::write(tmp_dir.path().join("level1/mid.rs"), "fn test() {}").unwrap();
662 std::fs::write(nested.join("deep.rs"), "fn test() {}").unwrap();
663
664 let searcher = Searcher::new().unwrap();
665 let config = SearchConfig {
666 max_depth: Some(1),
667 ..Default::default()
668 };
669
670 let matches = searcher.search("test", &[tmp_dir.path()], &config).unwrap();
671
672 assert_eq!(matches.len(), 1);
674 }
675
676 #[test]
678 fn test_match_sink_new() {
679 let path = Path::new("test.rs");
680 let sink = MatchSink::new(path);
681
682 assert_eq!(sink.path, path);
683 assert!(sink.matches.is_empty());
684 }
685
686 #[test]
687 fn test_match_sink_into_matches() {
688 let path = Path::new("test.rs");
689 let sink = MatchSink::new(path);
690 let matches = sink.into_matches();
691
692 assert!(matches.is_empty());
693 }
694
695 #[test]
697 fn test_unsupported_mode_error_message() {
698 assert!(UNSUPPORTED_TEXT_SEARCHER_MODE.contains("not supported by the text searcher"));
699 assert!(UNSUPPORTED_TEXT_SEARCHER_MODE.contains("FallbackSearchEngine"));
700 assert!(UNSUPPORTED_TEXT_SEARCHER_MODE.contains("CandidateGenerator"));
701 }
702}