rust_filesearch/fs/
content.rs

1#[cfg(feature = "grep")]
2use crate::errors::{FsError, Result};
3#[cfg(feature = "grep")]
4use crate::models::{ContentMatch, Entry};
5#[cfg(feature = "grep")]
6use grep_matcher::Matcher;
7#[cfg(feature = "grep")]
8use grep_regex::RegexMatcherBuilder;
9#[cfg(feature = "grep")]
10use grep_searcher::{sinks, BinaryDetection, SearcherBuilder};
11#[cfg(feature = "grep")]
12use std::fs::File;
13#[cfg(feature = "grep")]
14use std::io::{BufRead, BufReader};
15#[cfg(feature = "grep")]
16use std::path::Path;
17
18#[cfg(feature = "grep")]
19pub struct ContentSearcher {
20    matcher: grep_regex::RegexMatcher,
21    context_lines: usize,
22    #[allow(dead_code)]
23    line_numbers: bool,
24}
25
26#[cfg(feature = "grep")]
27impl ContentSearcher {
28    /// Create a new content searcher
29    pub fn new(
30        pattern: &str,
31        is_regex: bool,
32        case_insensitive: bool,
33        context_lines: usize,
34        line_numbers: bool,
35    ) -> Result<Self> {
36        let pattern_to_use = if is_regex {
37            pattern.to_string()
38        } else {
39            regex::escape(pattern)
40        };
41
42        let matcher = RegexMatcherBuilder::new()
43            .case_insensitive(case_insensitive)
44            .build(&pattern_to_use)
45            .map_err(|e| FsError::InvalidFormat {
46                format: format!("Invalid regex pattern '{}': {}", pattern, e),
47            })?;
48
49        Ok(Self {
50            matcher,
51            context_lines,
52            line_numbers,
53        })
54    }
55
56    /// Search a single file for matches
57    pub fn search_file(&self, entry: &Entry) -> Result<Vec<ContentMatch>> {
58        let path = &entry.path;
59
60        // Skip directories and symlinks
61        if !path.is_file() {
62            return Ok(Vec::new());
63        }
64
65        let mut matches = Vec::new();
66        let mut searcher = SearcherBuilder::new()
67            .binary_detection(BinaryDetection::quit(b'\x00'))
68            .line_number(true)
69            .build();
70
71        let result = searcher.search_path(
72            &self.matcher,
73            path,
74            sinks::UTF8(|lnum, line| {
75                // Extract context if needed
76                let (context_before, context_after) = if self.context_lines > 0 {
77                    self.extract_context(path, lnum as usize, self.context_lines)
78                        .unwrap_or_else(|_| (Vec::new(), Vec::new()))
79                } else {
80                    (Vec::new(), Vec::new())
81                };
82
83                // Find the match column
84                let column = self
85                    .matcher
86                    .find(line.as_bytes())
87                    .ok()
88                    .and_then(|m| m.map(|m| m.start() + 1))
89                    .unwrap_or(1);
90
91                matches.push(ContentMatch {
92                    entry: entry.clone(),
93                    line_number: lnum as usize,
94                    column,
95                    matched_text: line.trim_end().to_string(),
96                    context_before,
97                    context_after,
98                });
99
100                Ok(true)
101            }),
102        );
103
104        // Ignore binary file errors and permission denied
105        match result {
106            Ok(_) => Ok(matches),
107            Err(e) => {
108                if e.to_string().contains("binary") || e.to_string().contains("Permission denied") {
109                    Ok(Vec::new())
110                } else {
111                    Err(FsError::Io(std::io::Error::other(e)))
112                }
113            }
114        }
115    }
116
117    /// Extract context lines around a match
118    fn extract_context(
119        &self,
120        path: &Path,
121        match_line: usize,
122        context: usize,
123    ) -> Result<(Vec<String>, Vec<String>)> {
124        let file = File::open(path)?;
125        let reader = BufReader::new(file);
126
127        let mut before = Vec::new();
128        let mut after = Vec::new();
129        let mut current_line = 1;
130
131        for line in reader.lines() {
132            let line = line?;
133
134            if current_line < match_line && current_line >= match_line.saturating_sub(context) {
135                before.push(line);
136            } else if current_line > match_line && current_line <= match_line + context {
137                after.push(line);
138            } else if current_line > match_line + context {
139                break;
140            }
141
142            current_line += 1;
143        }
144
145        Ok((before, after))
146    }
147}
148
149#[cfg(feature = "grep")]
150/// Search multiple files in parallel
151pub fn search_files(entries: &[Entry], searcher: &ContentSearcher) -> Result<Vec<ContentMatch>> {
152    #[cfg(feature = "parallel")]
153    {
154        use rayon::prelude::*;
155        let matches: Vec<ContentMatch> = entries
156            .par_iter()
157            .filter_map(|entry| searcher.search_file(entry).ok())
158            .flatten()
159            .collect();
160        Ok(matches)
161    }
162
163    #[cfg(not(feature = "parallel"))]
164    {
165        let mut matches = Vec::new();
166        for entry in entries {
167            if let Ok(mut entry_matches) = searcher.search_file(entry) {
168                matches.append(&mut entry_matches);
169            }
170        }
171        Ok(matches)
172    }
173}
174
175#[cfg(test)]
176#[cfg(feature = "grep")]
177mod tests {
178    use super::*;
179    use crate::models::{Entry, EntryKind};
180    use chrono::Utc;
181    use std::fs;
182    use std::path::PathBuf;
183    use tempfile::tempdir;
184
185    fn make_test_entry(path: PathBuf) -> Entry {
186        Entry {
187            path: path.clone(),
188            name: path.file_name().unwrap().to_string_lossy().to_string(),
189            size: 0,
190            kind: EntryKind::File,
191            mtime: Utc::now(),
192            perms: None,
193            owner: None,
194            depth: 0,
195        }
196    }
197
198    #[test]
199    fn test_literal_search() {
200        let dir = tempdir().unwrap();
201        let file_path = dir.path().join("test.txt");
202        fs::write(&file_path, "Hello World\nThis is a test\nHello again").unwrap();
203
204        let entry = make_test_entry(file_path);
205        let searcher = ContentSearcher::new("Hello", false, false, 0, false).unwrap();
206        let matches = searcher.search_file(&entry).unwrap();
207
208        assert_eq!(matches.len(), 2);
209        assert_eq!(matches[0].line_number, 1);
210        assert_eq!(matches[1].line_number, 3);
211    }
212
213    #[test]
214    fn test_regex_search() {
215        let dir = tempdir().unwrap();
216        let file_path = dir.path().join("test.txt");
217        fs::write(&file_path, "test123\ntest456\nabc789").unwrap();
218
219        let entry = make_test_entry(file_path);
220        let searcher = ContentSearcher::new(r"test\d+", true, false, 0, false).unwrap();
221        let matches = searcher.search_file(&entry).unwrap();
222
223        assert_eq!(matches.len(), 2);
224    }
225
226    #[test]
227    fn test_case_insensitive() {
228        let dir = tempdir().unwrap();
229        let file_path = dir.path().join("test.txt");
230        fs::write(&file_path, "Hello\nhello\nHELLO").unwrap();
231
232        let entry = make_test_entry(file_path);
233        let searcher = ContentSearcher::new("hello", false, true, 0, false).unwrap();
234        let matches = searcher.search_file(&entry).unwrap();
235
236        assert_eq!(matches.len(), 3);
237    }
238
239    #[test]
240    fn test_context_lines() {
241        let dir = tempdir().unwrap();
242        let file_path = dir.path().join("test.txt");
243        fs::write(&file_path, "line1\nline2\nmatch\nline4\nline5").unwrap();
244
245        let entry = make_test_entry(file_path);
246        let searcher = ContentSearcher::new("match", false, false, 1, false).unwrap();
247        let matches = searcher.search_file(&entry).unwrap();
248
249        assert_eq!(matches.len(), 1);
250        assert_eq!(matches[0].context_before.len(), 1);
251        assert_eq!(matches[0].context_after.len(), 1);
252        assert_eq!(matches[0].context_before[0], "line2");
253        assert_eq!(matches[0].context_after[0], "line4");
254    }
255}