Skip to main content

sqry_core/search/
mod.rs

1/// Query classification for semantic/text/hybrid search modes
2pub mod classifier;
3/// Fallback search (AST → ripgrep) - distinct from embedding-based hybrid search
4pub mod fallback;
5/// Fuzzy search implementation
6pub mod fuzzy;
7/// Pattern matching utilities
8pub mod matcher;
9/// Result ranking and relevance scoring
10pub mod ranking;
11/// SIMD-accelerated search operations
12pub mod simd;
13/// Trigram-based indexing
14pub mod trigram;
15
16use anyhow::{Context, Result};
17use grep_regex::RegexMatcher;
18use grep_searcher::{BinaryDetection, Searcher as GrepSearcher, SearcherBuilder, Sink, SinkMatch};
19use ignore::{
20    WalkBuilder,
21    overrides::{Override, OverrideBuilder},
22};
23use std::io;
24use std::path::{Path, PathBuf};
25
26/// Error message for `SearchMode` variants unsupported by the ripgrep text searcher.
27/// Semantic search is handled by `FallbackSearchEngine` / `QueryExecutor`;
28/// fuzzy search is handled by `CandidateGenerator` + `FuzzyMatcher`.
29const UNSUPPORTED_TEXT_SEARCHER_MODE: &str = "is not supported by the text searcher. Use FallbackSearchEngine for semantic search or CandidateGenerator for fuzzy search.";
30
31/// Search mode determines how the search is performed
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum SearchMode {
34    /// Basic text search
35    Text,
36    /// Regex pattern search
37    Regex,
38    /// AST-aware semantic search
39    Semantic,
40    /// Fuzzy matching search
41    Fuzzy,
42}
43
44/// A search match
45#[derive(Debug, Clone, serde::Serialize)]
46pub struct Match {
47    /// Path to the file containing the match
48    pub path: PathBuf,
49    /// 1-based line number (compatible with grep output)
50    pub line: u32,
51    /// Text content of the matching line
52    pub line_text: String,
53    /// Byte position from start of file where this line begins
54    pub byte_offset: usize,
55}
56
57/// Search configuration
58#[derive(Debug, Clone)]
59pub struct SearchConfig {
60    /// Search mode (text, regex, semantic, fuzzy)
61    pub mode: SearchMode,
62    /// Whether to perform case-insensitive search
63    pub case_insensitive: bool,
64    /// Whether to include hidden files
65    pub include_hidden: bool,
66    /// Whether to follow symbolic links
67    pub follow_symlinks: bool,
68    /// Maximum directory depth for recursion
69    pub max_depth: Option<usize>,
70    /// File extensions to include (e.g., "rs", "js")
71    pub file_types: Vec<String>,
72    /// Patterns to exclude from search
73    pub exclude_patterns: Vec<String>,
74    /// Number of lines to show before each match
75    pub before_context: usize,
76    /// Number of lines to show after each match
77    pub after_context: usize,
78}
79
80impl Default for SearchConfig {
81    fn default() -> Self {
82        Self {
83            mode: SearchMode::Regex,
84            case_insensitive: false,
85            include_hidden: false,
86            follow_symlinks: false,
87            max_depth: None,
88            file_types: Vec::new(),
89            exclude_patterns: Vec::new(),
90            before_context: 2,
91            after_context: 2,
92        }
93    }
94}
95
96/// Main searcher using ripgrep's engine
97pub struct Searcher {
98    searcher: grep_searcher::Searcher,
99}
100
101/// Custom sink to collect matches with byte offsets
102struct MatchSink<'a> {
103    path: &'a Path,
104    matches: Vec<Match>,
105}
106
107impl<'a> MatchSink<'a> {
108    fn new(path: &'a Path) -> Self {
109        Self {
110            path,
111            matches: Vec::new(),
112        }
113    }
114
115    fn into_matches(self) -> Vec<Match> {
116        self.matches
117    }
118}
119
120impl Sink for MatchSink<'_> {
121    type Error = io::Error;
122
123    fn matched(
124        &mut self,
125        _searcher: &GrepSearcher,
126        mat: &SinkMatch<'_>,
127    ) -> Result<bool, io::Error> {
128        let line_text = String::from_utf8_lossy(mat.bytes()).to_string();
129        // Line numbers beyond u32::MAX are impractical; clamp to max
130        let line_number = mat
131            .line_number()
132            .unwrap_or(1)
133            .min(u64::from(u32::MAX))
134            .try_into()
135            .unwrap_or(u32::MAX);
136        // Byte offsets must fit in usize (architecture-dependent)
137        let byte_offset = mat.absolute_byte_offset().try_into().unwrap_or(usize::MAX);
138
139        self.matches.push(Match {
140            path: self.path.to_path_buf(),
141            line: line_number,
142            line_text,
143            byte_offset,
144        });
145
146        Ok(true)
147    }
148}
149
150impl Searcher {
151    /// Create a new searcher instance
152    ///
153    /// # Errors
154    ///
155    /// Returns [`anyhow::Error`] if the underlying ripgrep searcher fails to initialise.
156    pub fn new() -> Result<Self> {
157        let searcher = SearcherBuilder::new()
158            .binary_detection(BinaryDetection::quit(0))
159            .line_number(true)
160            .build();
161
162        Ok(Self { searcher })
163    }
164
165    /// Search for a pattern in the given paths
166    ///
167    /// # Errors
168    ///
169    /// Returns [`anyhow::Error`] when walker construction fails, when a file cannot be read,
170    /// or when the requested search mode is unsupported.
171    pub fn search<P: AsRef<Path>>(
172        &self,
173        pattern: &str,
174        paths: &[P],
175        config: &SearchConfig,
176    ) -> Result<Vec<Match>> {
177        let mut all_matches = Vec::new();
178        let matcher = Self::build_matcher(pattern, config)?;
179
180        for path in paths {
181            let path_matches = self.search_path(&matcher, path.as_ref(), config)?;
182            all_matches.extend(path_matches);
183        }
184
185        Ok(all_matches)
186    }
187
188    fn build_matcher(pattern: &str, config: &SearchConfig) -> Result<RegexMatcher> {
189        let mut matcher_builder = grep_regex::RegexMatcherBuilder::new();
190        matcher_builder.case_insensitive(config.case_insensitive);
191
192        let pattern_to_use = Self::pattern_for_mode(pattern, config.mode)?;
193
194        matcher_builder.build(&pattern_to_use).map_err(Into::into)
195    }
196
197    fn pattern_for_mode(pattern: &str, mode: SearchMode) -> Result<String> {
198        match mode {
199            SearchMode::Text => Ok(regex::escape(pattern)),
200            SearchMode::Regex => Ok(pattern.to_string()),
201            SearchMode::Semantic | SearchMode::Fuzzy => Err(anyhow::anyhow!(
202                "SearchMode::{mode:?} {UNSUPPORTED_TEXT_SEARCHER_MODE}"
203            )),
204        }
205    }
206
207    fn search_path(
208        &self,
209        matcher: &RegexMatcher,
210        path: &Path,
211        config: &SearchConfig,
212    ) -> Result<Vec<Match>> {
213        let walker = Self::build_walker(path, config)?;
214        let mut match_results = Vec::new();
215
216        for entry in walker {
217            let entry = entry?;
218            if !Self::is_searchable_entry(&entry, config) {
219                continue;
220            }
221
222            let path = entry.path();
223            let file_matches = self
224                .search_file(matcher, path)
225                .with_context(|| format!("Failed to search file: {}", path.display()))?;
226            match_results.extend(file_matches);
227        }
228
229        Ok(match_results)
230    }
231
232    fn matches_file_type(path: &Path, config: &SearchConfig) -> bool {
233        if config.file_types.is_empty() {
234            return true;
235        }
236
237        let Some(ext) = path.extension().and_then(|e| e.to_str()) else {
238            return false;
239        };
240
241        config.file_types.iter().any(|candidate| candidate == ext)
242    }
243
244    /// Build a file walker with the given configuration
245    fn build_walker(path: &Path, config: &SearchConfig) -> Result<ignore::Walk> {
246        let mut builder = WalkBuilder::new(path);
247
248        Self::configure_walker(&mut builder, config);
249        Self::apply_exclude_overrides(&mut builder, path, &config.exclude_patterns)?;
250
251        Ok(builder.build())
252    }
253
254    fn configure_walker(builder: &mut WalkBuilder, config: &SearchConfig) {
255        builder
256            .hidden(!config.include_hidden)
257            .git_ignore(true)
258            .git_global(true)
259            .git_exclude(true)
260            .follow_links(config.follow_symlinks);
261
262        if let Some(max_depth) = config.max_depth {
263            builder.max_depth(Some(max_depth));
264        }
265    }
266
267    fn apply_exclude_overrides(
268        builder: &mut WalkBuilder,
269        path: &Path,
270        exclude_patterns: &[String],
271    ) -> Result<()> {
272        if exclude_patterns.is_empty() {
273            return Ok(());
274        }
275
276        let overrides = Self::build_exclude_overrides(path, exclude_patterns)?;
277        builder.overrides(overrides);
278        Ok(())
279    }
280
281    fn build_exclude_overrides(path: &Path, exclude_patterns: &[String]) -> Result<Override> {
282        // Build exclude patterns using OverrideBuilder
283        //
284        // Note on gitignore syntax: The `ignore` crate's OverrideBuilder requires `!` prefix
285        // for exclusions (e.g., `!*.test.js`). This differs from standard gitignore semantics
286        // where patterns without `!` are exclusions by default. Here, we automatically prepend
287        // `!` to user-provided patterns to match expected gitignore behavior.
288        //
289        // Example transformations:
290        //   User input: "*.min.js"     -> OverrideBuilder: "!*.min.js" (exclude minified files)
291        //   User input: "target/**"    -> OverrideBuilder: "!target/**" (exclude target directory)
292        //   User input: "node_modules" -> OverrideBuilder: "!node_modules" (exclude node_modules)
293        let mut override_builder = OverrideBuilder::new(path);
294        for pattern in exclude_patterns {
295            // Patterns should be in gitignore format (e.g., "*.min.js", "target/**")
296            override_builder
297                .add(&format!("!{pattern}"))
298                .with_context(|| format!("Invalid exclude pattern: {pattern}"))?;
299        }
300        override_builder
301            .build()
302            .context("Failed to build exclude overrides")
303    }
304
305    fn is_searchable_entry(entry: &ignore::DirEntry, config: &SearchConfig) -> bool {
306        let path = entry.path();
307        path.is_file() && Self::matches_file_type(path, config)
308    }
309
310    /// Search a single file
311    fn search_file(&self, matcher: &RegexMatcher, path: &Path) -> Result<Vec<Match>> {
312        let mut searcher = self.searcher.clone();
313        let mut sink = MatchSink::new(path);
314
315        searcher
316            .search_path(matcher, path, &mut sink)
317            .map_err(|e| anyhow::anyhow!("Search failed: {e}"))?;
318
319        Ok(sink.into_matches())
320    }
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326    use std::io::Write;
327    use tempfile::TempDir;
328
329    // SearchMode tests
330    #[test]
331    fn test_search_mode_equality() {
332        assert_eq!(SearchMode::Text, SearchMode::Text);
333        assert_eq!(SearchMode::Regex, SearchMode::Regex);
334        assert_eq!(SearchMode::Semantic, SearchMode::Semantic);
335        assert_eq!(SearchMode::Fuzzy, SearchMode::Fuzzy);
336    }
337
338    #[test]
339    fn test_search_mode_inequality() {
340        assert_ne!(SearchMode::Text, SearchMode::Regex);
341        assert_ne!(SearchMode::Semantic, SearchMode::Fuzzy);
342    }
343
344    #[test]
345    fn test_search_mode_clone() {
346        let mode = SearchMode::Regex;
347        let cloned = mode;
348        assert_eq!(mode, cloned);
349    }
350
351    #[test]
352    fn test_search_mode_debug() {
353        let debug = format!("{:?}", SearchMode::Text);
354        assert!(debug.contains("Text"));
355    }
356
357    // Match tests
358    #[test]
359    fn test_match_creation() {
360        let m = Match {
361            path: PathBuf::from("test.rs"),
362            line: 42,
363            line_text: "fn test() {}".to_string(),
364            byte_offset: 100,
365        };
366
367        assert_eq!(m.path, PathBuf::from("test.rs"));
368        assert_eq!(m.line, 42);
369        assert_eq!(m.line_text, "fn test() {}");
370        assert_eq!(m.byte_offset, 100);
371    }
372
373    #[test]
374    fn test_match_clone() {
375        let m = Match {
376            path: PathBuf::from("test.rs"),
377            line: 1,
378            line_text: "test".to_string(),
379            byte_offset: 0,
380        };
381
382        let cloned = m.clone();
383        assert_eq!(m.path, cloned.path);
384        assert_eq!(m.line, cloned.line);
385    }
386
387    #[test]
388    fn test_match_debug() {
389        let m = Match {
390            path: PathBuf::from("test.rs"),
391            line: 1,
392            line_text: "test".to_string(),
393            byte_offset: 0,
394        };
395
396        let debug = format!("{:?}", m);
397        assert!(debug.contains("Match"));
398        assert!(debug.contains("test.rs"));
399    }
400
401    #[test]
402    fn test_match_serialize() {
403        let m = Match {
404            path: PathBuf::from("test.rs"),
405            line: 10,
406            line_text: "hello".to_string(),
407            byte_offset: 50,
408        };
409
410        let json = serde_json::to_string(&m).unwrap();
411        assert!(json.contains("test.rs"));
412        assert!(json.contains("hello"));
413        assert!(json.contains("10"));
414    }
415
416    // SearchConfig tests
417    #[test]
418    fn test_search_config_default() {
419        let config = SearchConfig::default();
420
421        assert_eq!(config.mode, SearchMode::Regex);
422        assert!(!config.case_insensitive);
423        assert!(!config.include_hidden);
424        assert!(!config.follow_symlinks);
425        assert!(config.max_depth.is_none());
426        assert!(config.file_types.is_empty());
427        assert!(config.exclude_patterns.is_empty());
428        assert_eq!(config.before_context, 2);
429        assert_eq!(config.after_context, 2);
430    }
431
432    #[test]
433    fn test_search_config_custom() {
434        let config = SearchConfig {
435            mode: SearchMode::Text,
436            case_insensitive: true,
437            include_hidden: true,
438            follow_symlinks: true,
439            max_depth: Some(5),
440            file_types: vec!["rs".to_string(), "js".to_string()],
441            exclude_patterns: vec!["*.min.js".to_string()],
442            before_context: 3,
443            after_context: 3,
444        };
445
446        assert_eq!(config.mode, SearchMode::Text);
447        assert!(config.case_insensitive);
448        assert!(config.include_hidden);
449        assert!(config.follow_symlinks);
450        assert_eq!(config.max_depth, Some(5));
451        assert_eq!(config.file_types.len(), 2);
452        assert_eq!(config.exclude_patterns.len(), 1);
453    }
454
455    #[test]
456    fn test_search_config_clone() {
457        let config = SearchConfig {
458            mode: SearchMode::Fuzzy,
459            case_insensitive: true,
460            ..Default::default()
461        };
462
463        let cloned = config.clone();
464        assert_eq!(config.mode, cloned.mode);
465        assert_eq!(config.case_insensitive, cloned.case_insensitive);
466    }
467
468    #[test]
469    fn test_search_config_debug() {
470        let config = SearchConfig::default();
471        let debug = format!("{:?}", config);
472        assert!(debug.contains("SearchConfig"));
473        assert!(debug.contains("mode"));
474    }
475
476    // Searcher tests
477    #[test]
478    fn test_searcher_new() {
479        let searcher = Searcher::new();
480        assert!(searcher.is_ok());
481    }
482
483    #[test]
484    fn test_searcher_text_search() {
485        let tmp_dir = TempDir::new().unwrap();
486        let file_path = tmp_dir.path().join("test.rs");
487        let mut file = std::fs::File::create(&file_path).unwrap();
488        writeln!(file, "fn main() {{").unwrap();
489        writeln!(file, "    println!(\"hello world\");").unwrap();
490        writeln!(file, "}}").unwrap();
491        drop(file);
492
493        let searcher = Searcher::new().unwrap();
494        let config = SearchConfig {
495            mode: SearchMode::Text,
496            ..Default::default()
497        };
498
499        let matches = searcher
500            .search("hello", &[tmp_dir.path()], &config)
501            .unwrap();
502
503        assert_eq!(matches.len(), 1);
504        assert!(matches[0].line_text.contains("hello world"));
505        assert_eq!(matches[0].line, 2);
506    }
507
508    #[test]
509    fn test_searcher_regex_search() {
510        let tmp_dir = TempDir::new().unwrap();
511        let file_path = tmp_dir.path().join("test.rs");
512        let mut file = std::fs::File::create(&file_path).unwrap();
513        writeln!(file, "let x = 123;").unwrap();
514        writeln!(file, "let y = 456;").unwrap();
515        writeln!(file, "let z = abc;").unwrap();
516        drop(file);
517
518        let searcher = Searcher::new().unwrap();
519        let config = SearchConfig {
520            mode: SearchMode::Regex,
521            ..Default::default()
522        };
523
524        // Match lines with numbers
525        let matches = searcher.search(r"\d+", &[tmp_dir.path()], &config).unwrap();
526
527        assert_eq!(matches.len(), 2);
528    }
529
530    #[test]
531    fn test_searcher_case_insensitive() {
532        let tmp_dir = TempDir::new().unwrap();
533        let file_path = tmp_dir.path().join("test.txt");
534        let mut file = std::fs::File::create(&file_path).unwrap();
535        writeln!(file, "Hello World").unwrap();
536        writeln!(file, "HELLO WORLD").unwrap();
537        writeln!(file, "hello world").unwrap();
538        drop(file);
539
540        let searcher = Searcher::new().unwrap();
541        let config = SearchConfig {
542            mode: SearchMode::Text,
543            case_insensitive: true,
544            ..Default::default()
545        };
546
547        let matches = searcher
548            .search("hello", &[tmp_dir.path()], &config)
549            .unwrap();
550
551        assert_eq!(matches.len(), 3);
552    }
553
554    #[test]
555    fn test_searcher_file_type_filter() {
556        let tmp_dir = TempDir::new().unwrap();
557
558        let rs_file = tmp_dir.path().join("test.rs");
559        std::fs::write(&rs_file, "fn test() {}").unwrap();
560
561        let js_file = tmp_dir.path().join("test.js");
562        std::fs::write(&js_file, "function test() {}").unwrap();
563
564        let searcher = Searcher::new().unwrap();
565        let config = SearchConfig {
566            mode: SearchMode::Text,
567            file_types: vec!["rs".to_string()],
568            ..Default::default()
569        };
570
571        let matches = searcher.search("test", &[tmp_dir.path()], &config).unwrap();
572
573        // Should only match .rs file
574        assert_eq!(matches.len(), 1);
575        assert!(matches[0].path.to_string_lossy().ends_with(".rs"));
576    }
577
578    #[test]
579    fn test_searcher_no_matches() {
580        let tmp_dir = TempDir::new().unwrap();
581        let file_path = tmp_dir.path().join("test.rs");
582        std::fs::write(&file_path, "fn main() {}").unwrap();
583
584        let searcher = Searcher::new().unwrap();
585        let config = SearchConfig::default();
586
587        let matches = searcher
588            .search("nonexistent_pattern_xyz", &[tmp_dir.path()], &config)
589            .unwrap();
590
591        assert!(matches.is_empty());
592    }
593
594    #[test]
595    fn test_searcher_semantic_mode_unsupported() {
596        let tmp_dir = TempDir::new().unwrap();
597        let file_path = tmp_dir.path().join("test.rs");
598        std::fs::write(&file_path, "fn main() {}").unwrap();
599
600        let searcher = Searcher::new().unwrap();
601        let config = SearchConfig {
602            mode: SearchMode::Semantic,
603            ..Default::default()
604        };
605
606        let result = searcher.search("test", &[tmp_dir.path()], &config);
607
608        assert!(result.is_err());
609        let err_msg = result.unwrap_err().to_string();
610        assert!(err_msg.contains("Semantic"));
611        assert!(err_msg.contains("not supported by the text searcher"));
612    }
613
614    #[test]
615    fn test_searcher_fuzzy_mode_unsupported() {
616        let tmp_dir = TempDir::new().unwrap();
617        let file_path = tmp_dir.path().join("test.rs");
618        std::fs::write(&file_path, "fn main() {}").unwrap();
619
620        let searcher = Searcher::new().unwrap();
621        let config = SearchConfig {
622            mode: SearchMode::Fuzzy,
623            ..Default::default()
624        };
625
626        let result = searcher.search("test", &[tmp_dir.path()], &config);
627
628        assert!(result.is_err());
629        let err_msg = result.unwrap_err().to_string();
630        assert!(err_msg.contains("Fuzzy"));
631        assert!(err_msg.contains("not supported by the text searcher"));
632    }
633
634    #[test]
635    fn test_searcher_multiple_files() {
636        let tmp_dir = TempDir::new().unwrap();
637
638        std::fs::write(tmp_dir.path().join("a.rs"), "fn test_a() {}").unwrap();
639        std::fs::write(tmp_dir.path().join("b.rs"), "fn test_b() {}").unwrap();
640        std::fs::write(tmp_dir.path().join("c.rs"), "fn other() {}").unwrap();
641
642        let searcher = Searcher::new().unwrap();
643        let config = SearchConfig::default();
644
645        let matches = searcher
646            .search("test_", &[tmp_dir.path()], &config)
647            .unwrap();
648
649        assert_eq!(matches.len(), 2);
650    }
651
652    #[test]
653    fn test_searcher_max_depth() {
654        let tmp_dir = TempDir::new().unwrap();
655
656        // Create nested structure
657        let nested = tmp_dir.path().join("level1").join("level2");
658        std::fs::create_dir_all(&nested).unwrap();
659
660        std::fs::write(tmp_dir.path().join("root.rs"), "fn test() {}").unwrap();
661        std::fs::write(tmp_dir.path().join("level1/mid.rs"), "fn test() {}").unwrap();
662        std::fs::write(nested.join("deep.rs"), "fn test() {}").unwrap();
663
664        let searcher = Searcher::new().unwrap();
665        let config = SearchConfig {
666            max_depth: Some(1),
667            ..Default::default()
668        };
669
670        let matches = searcher.search("test", &[tmp_dir.path()], &config).unwrap();
671
672        // Should only match root file with depth 1
673        assert_eq!(matches.len(), 1);
674    }
675
676    // MatchSink tests
677    #[test]
678    fn test_match_sink_new() {
679        let path = Path::new("test.rs");
680        let sink = MatchSink::new(path);
681
682        assert_eq!(sink.path, path);
683        assert!(sink.matches.is_empty());
684    }
685
686    #[test]
687    fn test_match_sink_into_matches() {
688        let path = Path::new("test.rs");
689        let sink = MatchSink::new(path);
690        let matches = sink.into_matches();
691
692        assert!(matches.is_empty());
693    }
694
695    // Error message constant test
696    #[test]
697    fn test_unsupported_mode_error_message() {
698        assert!(UNSUPPORTED_TEXT_SEARCHER_MODE.contains("not supported by the text searcher"));
699        assert!(UNSUPPORTED_TEXT_SEARCHER_MODE.contains("FallbackSearchEngine"));
700        assert!(UNSUPPORTED_TEXT_SEARCHER_MODE.contains("CandidateGenerator"));
701    }
702}