cs/search/
text_search.rs

1use crate::error::{Result, SearchError};
2use grep_regex::RegexMatcherBuilder;
3use grep_searcher::sinks::UTF8;
4use grep_searcher::SearcherBuilder;
5use ignore::overrides::OverrideBuilder;
6use ignore::WalkBuilder;
7use std::path::PathBuf;
8use std::sync::mpsc;
9
10/// Represents a single match from a text search
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct Match {
13    /// File path where the match was found
14    pub file: PathBuf,
15    /// Line number (1-indexed)
16    pub line: usize,
17    /// Content of the matching line
18    pub content: String,
19}
20
21/// Text searcher that uses ripgrep as a library for fast text searching
22pub struct TextSearcher {
23    /// Whether to respect .gitignore files
24    respect_gitignore: bool,
25    /// Whether search is case-sensitive
26    /// Whether search is case-sensitive
27    case_sensitive: bool,
28    /// Whether to match whole words only
29    word_match: bool,
30    /// Whether to treat the query as a regex
31    is_regex: bool,
32    /// Glob patterns to include
33    globs: Vec<String>,
34    /// The base directory to search in
35    base_dir: PathBuf,
36}
37
38impl TextSearcher {
39    /// Create a new TextSearcher with default settings
40    pub fn new(base_dir: PathBuf) -> Self {
41        Self {
42            respect_gitignore: true,
43            case_sensitive: false,
44            word_match: false,
45            is_regex: false,
46            globs: Vec::new(),
47            base_dir,
48        }
49    }
50
51    /// Set whether to respect .gitignore files (default: true)
52    pub fn respect_gitignore(mut self, value: bool) -> Self {
53        self.respect_gitignore = value;
54        self
55    }
56
57    /// Set whether search is case-sensitive (default: false)
58    pub fn case_sensitive(mut self, value: bool) -> Self {
59        self.case_sensitive = value;
60        self
61    }
62
63    /// Set whether to match whole words only (default: false)
64    pub fn word_match(mut self, value: bool) -> Self {
65        self.word_match = value;
66        self
67    }
68
69    /// Set whether to treat the query as a regex (default: false)
70    pub fn is_regex(mut self, value: bool) -> Self {
71        self.is_regex = value;
72        self
73    }
74
75    /// Add glob patterns to include
76    pub fn add_globs(mut self, globs: Vec<String>) -> Self {
77        self.globs.extend(globs);
78        self
79    }
80
81    /// Search for text and return all matches
82    ///
83    /// # Arguments
84    /// * `text` - The text to search for
85    ///
86    /// # Returns
87    /// A vector of Match structs containing file path, line number, and content
88    pub fn search(&self, text: &str) -> Result<Vec<Match>> {
89        // Build the regex matcher with fixed string (literal) matching
90
91        let matcher = RegexMatcherBuilder::new()
92            .case_insensitive(!self.case_sensitive)
93            .word(self.word_match)
94            .fixed_strings(!self.is_regex) // Use fixed strings unless regex is enabled
95            .build(text)
96            .map_err(|e| SearchError::Generic(format!("Failed to build matcher: {}", e)))?;
97
98        // Create a channel for collecting matches from parallel threads
99        let (tx, rx) = mpsc::channel();
100
101        // Build parallel walker with .gitignore support
102        // Build overrides if any globs are provided
103        let mut builder = WalkBuilder::new(&self.base_dir);
104        let mut walk_builder = builder
105            .git_ignore(self.respect_gitignore)
106            .git_global(self.respect_gitignore)
107            .git_exclude(self.respect_gitignore)
108            .hidden(false); // Don't skip hidden files by default
109
110        if !self.globs.is_empty() {
111            let mut override_builder = OverrideBuilder::new(&self.base_dir);
112            for glob in &self.globs {
113                if let Err(e) = override_builder.add(glob) {
114                    return Err(SearchError::Generic(format!(
115                        "Invalid glob pattern '{}': {}",
116                        glob, e
117                    )));
118                }
119            }
120            if let Ok(overrides) = override_builder.build() {
121                walk_builder = walk_builder.overrides(overrides);
122            }
123        }
124
125        walk_builder.build_parallel().run(|| {
126            // Each thread gets its own sender and matcher
127            let tx = tx.clone();
128            let matcher = matcher.clone();
129
130            Box::new(move |entry| {
131                use ignore::WalkState;
132
133                let entry = match entry {
134                    Ok(e) => e,
135                    Err(_) => return WalkState::Continue,
136                };
137
138                // Skip directories
139                if entry.file_type().is_none_or(|ft| ft.is_dir()) {
140                    return WalkState::Continue;
141                }
142
143                let path = entry.path();
144                let path_buf = path.to_path_buf();
145
146                // Thread-local vector to collect matches for this file
147                let mut file_matches = Vec::new();
148
149                // Build searcher
150                let mut searcher = SearcherBuilder::new().line_number(true).build();
151
152                // Search the file
153                let result = searcher.search_path(
154                    &matcher,
155                    path,
156                    UTF8(|line_num, line_content| {
157                        file_matches.push(Match {
158                            file: path_buf.clone(),
159                            line: line_num as usize,
160                            content: line_content.trim_end().to_string(),
161                        });
162                        Ok(true) // Continue searching
163                    }),
164                );
165
166                // Send matches for this file (if any) through the channel
167                if result.is_ok() && !file_matches.is_empty() {
168                    let _ = tx.send(file_matches);
169                }
170
171                WalkState::Continue
172            })
173        });
174
175        // Drop the original sender so rx.iter() will terminate
176        drop(tx);
177
178        // Collect all matches from all threads
179        let mut all_matches = Vec::new();
180        for file_matches in rx {
181            all_matches.extend(file_matches);
182        }
183
184        Ok(all_matches)
185    }
186}
187
188impl Default for TextSearcher {
189    fn default() -> Self {
190        Self::new(std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use std::fs;
198    use tempfile::TempDir;
199
200    #[test]
201    fn test_basic_search() {
202        let temp_dir = TempDir::new().unwrap();
203        fs::write(
204            temp_dir.path().join("test.txt"),
205            "hello world\nfoo bar\nhello again",
206        )
207        .unwrap();
208
209        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
210        let matches = searcher.search("hello").unwrap();
211
212        assert_eq!(matches.len(), 2);
213        assert_eq!(matches[0].line, 1);
214        assert_eq!(matches[0].content, "hello world");
215        assert_eq!(matches[1].line, 3);
216        assert_eq!(matches[1].content, "hello again");
217    }
218
219    #[test]
220    fn test_case_insensitive_default() {
221        let temp_dir = TempDir::new().unwrap();
222        fs::write(
223            temp_dir.path().join("test.txt"),
224            "Hello World\nHELLO\nhello",
225        )
226        .unwrap();
227
228        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
229        let matches = searcher.search("hello").unwrap();
230
231        assert_eq!(matches.len(), 3); // Should match all variations
232    }
233
234    #[test]
235    fn test_case_sensitive() {
236        let temp_dir = TempDir::new().unwrap();
237        fs::write(
238            temp_dir.path().join("test.txt"),
239            "Hello World\nHELLO\nhello",
240        )
241        .unwrap();
242
243        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).case_sensitive(true);
244        let matches = searcher.search("hello").unwrap();
245
246        assert_eq!(matches.len(), 1); // Should only match exact case
247        assert_eq!(matches[0].content, "hello");
248    }
249
250    #[test]
251    fn test_no_matches() {
252        let temp_dir = TempDir::new().unwrap();
253        fs::write(temp_dir.path().join("test.txt"), "foo bar baz").unwrap();
254
255        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
256        let matches = searcher.search("notfound").unwrap();
257
258        assert_eq!(matches.len(), 0);
259    }
260
261    #[test]
262    fn test_multiple_files() {
263        let temp_dir = TempDir::new().unwrap();
264        fs::write(temp_dir.path().join("file1.txt"), "target line 1").unwrap();
265        fs::write(temp_dir.path().join("file2.txt"), "target line 2").unwrap();
266        fs::write(temp_dir.path().join("file3.txt"), "other content").unwrap();
267
268        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
269        let matches = searcher.search("target").unwrap();
270
271        assert_eq!(matches.len(), 2);
272    }
273
274    #[test]
275    fn test_gitignore_respected() {
276        let temp_dir = TempDir::new().unwrap();
277
278        // Initialize git repository (required for .gitignore to work)
279        fs::create_dir(temp_dir.path().join(".git")).unwrap();
280
281        // Create .gitignore
282        fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
283
284        // Create files
285        fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
286        fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
287
288        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(true);
289        let matches = searcher.search("target").unwrap();
290
291        // Should only find in tracked.txt
292        assert_eq!(matches.len(), 1);
293        assert!(matches[0].file.ends_with("tracked.txt"));
294    }
295
296    #[test]
297    fn test_gitignore_disabled() {
298        let temp_dir = TempDir::new().unwrap();
299
300        // Initialize git repository
301        fs::create_dir(temp_dir.path().join(".git")).unwrap();
302
303        // Create .gitignore
304        fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
305
306        // Create files
307        fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
308        fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
309
310        let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(false);
311        let matches = searcher.search("target").unwrap();
312
313        // Should find in both files
314        assert_eq!(matches.len(), 2);
315    }
316
317    #[test]
318    fn test_builder_pattern() {
319        let searcher = TextSearcher::new(std::env::current_dir().unwrap())
320            .case_sensitive(true)
321            .respect_gitignore(false);
322
323        assert!(searcher.case_sensitive);
324        assert!(!searcher.respect_gitignore);
325    }
326
327    #[test]
328    fn test_default() {
329        let searcher = TextSearcher::default();
330
331        assert!(!searcher.case_sensitive);
332        assert!(searcher.respect_gitignore);
333    }
334
335    #[test]
336    fn test_special_characters() {
337        let temp_dir = TempDir::new().unwrap();
338        fs::write(
339            temp_dir.path().join("test.txt"),
340            "price: $19.99\nurl: http://example.com",
341        )
342        .unwrap();
343
344        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
345
346        // Test with special regex characters (should be treated as literals)
347        let matches = searcher.search("$19.99").unwrap();
348        assert_eq!(matches.len(), 1);
349
350        let matches = searcher.search("http://").unwrap();
351        assert_eq!(matches.len(), 1);
352    }
353
354    #[test]
355    fn test_line_numbers_accurate() {
356        let temp_dir = TempDir::new().unwrap();
357        let content = "line 1\nline 2\ntarget line 3\nline 4\ntarget line 5\nline 6";
358        fs::write(temp_dir.path().join("test.txt"), content).unwrap();
359
360        let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
361        let matches = searcher.search("target").unwrap();
362
363        assert_eq!(matches.len(), 2);
364        assert_eq!(matches[0].line, 3);
365        assert_eq!(matches[1].line, 5);
366    }
367}