cs/search/
text_search.rs

1use crate::error::{Result, SearchError};
2use grep_regex::RegexMatcherBuilder;
3use grep_searcher::sinks::UTF8;
4use grep_searcher::SearcherBuilder;
5use ignore::overrides::OverrideBuilder;
6use ignore::WalkBuilder;
7use std::path::PathBuf;
8use std::sync::mpsc;
9
10/// Represents a single match from a text search
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct Match {
13    /// File path where the match was found
14    pub file: PathBuf,
15    /// Line number (1-indexed)
16    pub line: usize,
17    /// Content of the matching line
18    pub content: String,
19}
20
21/// Text searcher that uses ripgrep as a library for fast text searching
22pub struct TextSearcher {
23    /// Whether to respect .gitignore files
24    respect_gitignore: bool,
25    /// Whether search is case-sensitive
26    case_sensitive: bool,
27    /// Whether to match whole words only
28    word_match: bool,
29    /// Whether to treat the query as a regex
30    is_regex: bool,
31    /// Glob patterns to include
32    globs: Vec<String>,
33    /// Patterns to exclude from search
34    exclusions: Vec<String>,
35    /// The base directory to search in
36    base_dir: PathBuf,
37}
38
39impl TextSearcher {
40    /// Create a new TextSearcher with default settings
41    pub fn new(base_dir: PathBuf) -> Self {
42        Self {
43            respect_gitignore: true,
44            case_sensitive: false,
45            word_match: false,
46            is_regex: false,
47            globs: Vec::new(),
48            exclusions: Vec::new(),
49            base_dir,
50        }
51    }
52
53    /// Set whether to respect .gitignore files (default: true)
54    pub fn respect_gitignore(mut self, value: bool) -> Self {
55        self.respect_gitignore = value;
56        self
57    }
58
59    /// Set whether search is case-sensitive (default: false)
60    pub fn case_sensitive(mut self, value: bool) -> Self {
61        self.case_sensitive = value;
62        self
63    }
64
65    /// Set whether to match whole words only (default: false)
66    pub fn word_match(mut self, value: bool) -> Self {
67        self.word_match = value;
68        self
69    }
70
71    /// Set whether to treat the query as a regex (default: false)
72    pub fn is_regex(mut self, value: bool) -> Self {
73        self.is_regex = value;
74        self
75    }
76
77    /// Add glob patterns to include
78    pub fn add_globs(mut self, globs: Vec<String>) -> Self {
79        self.globs.extend(globs);
80        self
81    }
82
83    /// Add exclusion patterns
84    pub fn add_exclusions(mut self, exclusions: Vec<String>) -> Self {
85        self.exclusions.extend(exclusions);
86        self
87    }
88
89    /// Search for text and return all matches
90    ///
91    /// # Arguments
92    /// * `text` - The text to search for
93    ///
94    /// # Returns
95    /// A vector of Match structs containing file path, line number, and content
96    pub fn search(&self, text: &str) -> Result<Vec<Match>> {
97        // Build the regex matcher with fixed string (literal) matching
98
99        let matcher = RegexMatcherBuilder::new()
100            .case_insensitive(!self.case_sensitive)
101            .word(self.word_match)
102            .fixed_strings(!self.is_regex) // Use fixed strings unless regex is enabled
103            .build(text)
104            .map_err(|e| SearchError::Generic(format!("Failed to build matcher: {}", e)))?;
105
106        // Create a channel for collecting matches from parallel threads
107        let (tx, rx) = mpsc::channel();
108
109        // Build parallel walker with .gitignore support
110        // Build overrides if any globs are provided
111        let mut builder = WalkBuilder::new(&self.base_dir);
112        let mut walk_builder = builder
113            .git_ignore(self.respect_gitignore)
114            .git_global(self.respect_gitignore)
115            .git_exclude(self.respect_gitignore)
116            .hidden(false); // Don't skip hidden files by default
117
118        if !self.globs.is_empty() {
119            let mut override_builder = OverrideBuilder::new(&self.base_dir);
120            for glob in &self.globs {
121                if let Err(e) = override_builder.add(glob) {
122                    return Err(SearchError::Generic(format!(
123                        "Invalid glob pattern '{}': {}",
124                        glob, e
125                    )));
126                }
127            }
128            if let Ok(overrides) = override_builder.build() {
129                walk_builder = walk_builder.overrides(overrides);
130            }
131        }
132
133        walk_builder.build_parallel().run(|| {
134            // Each thread gets its own sender and matcher
135            let tx = tx.clone();
136            let matcher = matcher.clone();
137
138            Box::new(move |entry| {
139                use ignore::WalkState;
140
141                let entry = match entry {
142                    Ok(e) => e,
143                    Err(_) => return WalkState::Continue,
144                };
145
146                // Skip directories
147                if entry.file_type().is_none_or(|ft| ft.is_dir()) {
148                    return WalkState::Continue;
149                }
150
151                let path = entry.path();
152                let path_buf = path.to_path_buf();
153
154                // Thread-local vector to collect matches for this file
155                let mut file_matches = Vec::new();
156
157                // Build searcher
158                let mut searcher = SearcherBuilder::new().line_number(true).build();
159
160                // Search the file
161                let result = searcher.search_path(
162                    &matcher,
163                    path,
164                    UTF8(|line_num, line_content| {
165                        file_matches.push(Match {
166                            file: path_buf.clone(),
167                            line: line_num as usize,
168                            content: line_content.trim_end().to_string(),
169                        });
170                        Ok(true) // Continue searching
171                    }),
172                );
173
174                // Send matches for this file (if any) through the channel
175                if result.is_ok() && !file_matches.is_empty() {
176                    let _ = tx.send(file_matches);
177                }
178
179                WalkState::Continue
180            })
181        });
182
183        // Drop the original sender so rx.iter() will terminate
184        drop(tx);
185
186        // Collect all matches from all threads
187        let mut all_matches = Vec::new();
188        for file_matches in rx {
189            all_matches.extend(file_matches);
190        }
191
192        Ok(all_matches)
193    }
194}
195
196impl Default for TextSearcher {
197    fn default() -> Self {
198        Self::new(std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205    use std::fs;
206    use tempfile::TempDir;
207
208    #[test]
209    fn test_basic_search() {
210        let temp_dir = TempDir::new().unwrap();
211        fs::write(
212            temp_dir.path().join("test.txt"),
213            "hello world\nfoo bar\nhello again",
214        )
215        .unwrap();
216
217        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
218        let matches = searcher.search("hello").unwrap();
219
220        assert_eq!(matches.len(), 2);
221        assert_eq!(matches[0].line, 1);
222        assert_eq!(matches[0].content, "hello world");
223        assert_eq!(matches[1].line, 3);
224        assert_eq!(matches[1].content, "hello again");
225    }
226
227    #[test]
228    fn test_case_insensitive_default() {
229        let temp_dir = TempDir::new().unwrap();
230        fs::write(
231            temp_dir.path().join("test.txt"),
232            "Hello World\nHELLO\nhello",
233        )
234        .unwrap();
235
236        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
237        let matches = searcher.search("hello").unwrap();
238
239        assert_eq!(matches.len(), 3); // Should match all variations
240    }
241
242    #[test]
243    fn test_case_sensitive() {
244        let temp_dir = TempDir::new().unwrap();
245        fs::write(
246            temp_dir.path().join("test.txt"),
247            "Hello World\nHELLO\nhello",
248        )
249        .unwrap();
250
251        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).case_sensitive(true);
252        let matches = searcher.search("hello").unwrap();
253
254        assert_eq!(matches.len(), 1); // Should only match exact case
255        assert_eq!(matches[0].content, "hello");
256    }
257
258    #[test]
259    fn test_no_matches() {
260        let temp_dir = TempDir::new().unwrap();
261        fs::write(temp_dir.path().join("test.txt"), "foo bar baz").unwrap();
262
263        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
264        let matches = searcher.search("notfound").unwrap();
265
266        assert_eq!(matches.len(), 0);
267    }
268
269    #[test]
270    fn test_multiple_files() {
271        let temp_dir = TempDir::new().unwrap();
272        fs::write(temp_dir.path().join("file1.txt"), "target line 1").unwrap();
273        fs::write(temp_dir.path().join("file2.txt"), "target line 2").unwrap();
274        fs::write(temp_dir.path().join("file3.txt"), "other content").unwrap();
275
276        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
277        let matches = searcher.search("target").unwrap();
278
279        assert_eq!(matches.len(), 2);
280    }
281
282    #[test]
283    fn test_gitignore_respected() {
284        let temp_dir = TempDir::new().unwrap();
285
286        // Initialize git repository (required for .gitignore to work)
287        fs::create_dir(temp_dir.path().join(".git")).unwrap();
288
289        // Create .gitignore
290        fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
291
292        // Create files
293        fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
294        fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
295
296        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(true);
297        let matches = searcher.search("target").unwrap();
298
299        // Should only find in tracked.txt
300        assert_eq!(matches.len(), 1);
301        assert!(matches[0].file.ends_with("tracked.txt"));
302    }
303
304    #[test]
305    fn test_gitignore_disabled() {
306        let temp_dir = TempDir::new().unwrap();
307
308        // Initialize git repository
309        fs::create_dir(temp_dir.path().join(".git")).unwrap();
310
311        // Create .gitignore
312        fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
313
314        // Create files
315        fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
316        fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
317
318        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(false);
319        let matches = searcher.search("target").unwrap();
320
321        // Should find in both files
322        assert_eq!(matches.len(), 2);
323    }
324
325    #[test]
326    fn test_builder_pattern() {
327        let searcher = TextSearcher::new(std::env::current_dir().unwrap())
328            .case_sensitive(true)
329            .respect_gitignore(false);
330
331        assert!(searcher.case_sensitive);
332        assert!(!searcher.respect_gitignore);
333    }
334
335    #[test]
336    fn test_default() {
337        let searcher = TextSearcher::default();
338
339        assert!(!searcher.case_sensitive);
340        assert!(searcher.respect_gitignore);
341    }
342
343    #[test]
344    fn test_special_characters() {
345        let temp_dir = TempDir::new().unwrap();
346        fs::write(
347            temp_dir.path().join("test.txt"),
348            "price: $19.99\nurl: http://example.com",
349        )
350        .unwrap();
351
352        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
353
354        // Test with special regex characters (should be treated as literals)
355        let matches = searcher.search("$19.99").unwrap();
356        assert_eq!(matches.len(), 1);
357
358        let matches = searcher.search("http://").unwrap();
359        assert_eq!(matches.len(), 1);
360    }
361
362    #[test]
363    fn test_line_numbers_accurate() {
364        let temp_dir = TempDir::new().unwrap();
365        let content = "line 1\nline 2\ntarget line 3\nline 4\ntarget line 5\nline 6";
366        fs::write(temp_dir.path().join("test.txt"), content).unwrap();
367
368        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
369        let matches = searcher.search("target").unwrap();
370
371        assert_eq!(matches.len(), 2);
372        assert_eq!(matches[0].line, 3);
373        assert_eq!(matches[1].line, 5);
374    }
375}