Skip to main content

par_term/search/
engine.rs

1//! Search engine for terminal scrollback.
2
3use super::types::{SearchConfig, SearchMatch};
4use regex::{Regex, RegexBuilder};
5
6/// Search engine that performs text searches on terminal content.
7pub struct SearchEngine {
8    /// Cached compiled regex for the current query.
9    cached_regex: Option<(String, bool, Regex)>, // (pattern, case_sensitive, compiled)
10}
11
12impl Default for SearchEngine {
13    fn default() -> Self {
14        Self::new()
15    }
16}
17
18impl SearchEngine {
19    /// Create a new search engine.
20    pub fn new() -> Self {
21        Self { cached_regex: None }
22    }
23
24    /// Search through lines of text and return all matches.
25    ///
26    /// # Arguments
27    /// * `lines` - Iterator of (line_index, line_text) pairs
28    /// * `query` - The search query
29    /// * `config` - Search configuration options
30    ///
31    /// # Returns
32    /// A vector of SearchMatch containing all matches found.
33    pub fn search<I>(&mut self, lines: I, query: &str, config: &SearchConfig) -> Vec<SearchMatch>
34    where
35        I: Iterator<Item = (usize, String)>,
36    {
37        if query.is_empty() {
38            return Vec::new();
39        }
40
41        let mut matches = Vec::new();
42
43        if config.use_regex {
44            self.search_regex(lines, query, config, &mut matches);
45        } else {
46            self.search_plain(lines, query, config, &mut matches);
47        }
48
49        matches
50    }
51
52    /// Perform plain text search.
53    fn search_plain<I>(
54        &self,
55        lines: I,
56        query: &str,
57        config: &SearchConfig,
58        matches: &mut Vec<SearchMatch>,
59    ) where
60        I: Iterator<Item = (usize, String)>,
61    {
62        let query_lower = if config.case_sensitive {
63            query.to_string()
64        } else {
65            query.to_lowercase()
66        };
67
68        // Query length in characters (not bytes)
69        let query_char_len = query.chars().count();
70
71        for (line_idx, line) in lines {
72            let search_line = if config.case_sensitive {
73                line.clone()
74            } else {
75                line.to_lowercase()
76            };
77
78            let mut start_byte = 0;
79            while let Some(pos) = search_line[start_byte..].find(&query_lower) {
80                let byte_offset = start_byte + pos;
81
82                // Convert byte offset to character offset for cell positioning
83                let char_column = Self::byte_offset_to_char_offset(&search_line, byte_offset);
84
85                // Check whole word matching if enabled (using byte offset for string slicing)
86                if config.whole_word
87                    && !Self::is_whole_word_static(&line, byte_offset, query_lower.len())
88                {
89                    start_byte = byte_offset + 1;
90                    continue;
91                }
92
93                matches.push(SearchMatch::new(line_idx, char_column, query_char_len));
94                start_byte = byte_offset + query_lower.len().max(1);
95
96                // Avoid infinite loops on empty matches
97                if query.is_empty() {
98                    break;
99                }
100            }
101        }
102    }
103
104    /// Perform regex search.
105    fn search_regex<I>(
106        &mut self,
107        lines: I,
108        query: &str,
109        config: &SearchConfig,
110        matches: &mut Vec<SearchMatch>,
111    ) where
112        I: Iterator<Item = (usize, String)>,
113    {
114        // Try to compile or reuse cached regex
115        let regex = match self.get_or_compile_regex(query, config.case_sensitive) {
116            Ok(re) => re.clone(), // Clone to avoid borrow issues
117            Err(e) => {
118                log::debug!("Invalid regex pattern '{}': {}", query, e);
119                return;
120            }
121        };
122
123        for (line_idx, line) in lines {
124            for mat in regex.find_iter(&line) {
125                let byte_start = mat.start();
126                let byte_end = mat.end();
127
128                // Convert byte offsets to character offsets for cell positioning
129                let char_column = Self::byte_offset_to_char_offset(&line, byte_start);
130                let char_length = Self::byte_offset_to_char_offset(&line, byte_end) - char_column;
131
132                // Check whole word matching if enabled (using byte offsets for string slicing)
133                if config.whole_word
134                    && !Self::is_whole_word_static(&line, byte_start, byte_end - byte_start)
135                {
136                    continue;
137                }
138
139                matches.push(SearchMatch::new(line_idx, char_column, char_length));
140            }
141        }
142    }
143
144    /// Convert a byte offset to a character offset in a string.
145    /// This is needed because String::find() returns byte offsets, but we need
146    /// character offsets for cell positioning (each cell = 1 character).
147    fn byte_offset_to_char_offset(s: &str, byte_offset: usize) -> usize {
148        s[..byte_offset].chars().count()
149    }
150
151    /// Get cached regex or compile a new one.
152    fn get_or_compile_regex(
153        &mut self,
154        pattern: &str,
155        case_sensitive: bool,
156    ) -> Result<&Regex, regex::Error> {
157        // Check if we have a cached regex that matches
158        let needs_recompile = match &self.cached_regex {
159            Some((cached_pattern, cached_case, _)) => {
160                cached_pattern != pattern || *cached_case != case_sensitive
161            }
162            None => true,
163        };
164
165        if needs_recompile {
166            let regex = RegexBuilder::new(pattern)
167                .case_insensitive(!case_sensitive)
168                .build()?;
169            self.cached_regex = Some((pattern.to_string(), case_sensitive, regex));
170        }
171
172        Ok(&self.cached_regex.as_ref().unwrap().2)
173    }
174
175    /// Check if a match at the given position is a whole word.
176    fn is_whole_word_static(line: &str, start: usize, length: usize) -> bool {
177        let end = start + length;
178
179        // Check character before the match
180        if start > 0
181            && let Some(c) = line[..start].chars().last()
182            && (c.is_alphanumeric() || c == '_')
183        {
184            return false;
185        }
186
187        // Check character after the match
188        if end < line.len()
189            && let Some(c) = line[end..].chars().next()
190            && (c.is_alphanumeric() || c == '_')
191        {
192            return false;
193        }
194
195        true
196    }
197
198    /// Clear the cached regex.
199    pub fn clear_cache(&mut self) {
200        self.cached_regex = None;
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    fn make_lines<'a>(texts: &'a [&'a str]) -> impl Iterator<Item = (usize, String)> + 'a {
209        texts.iter().enumerate().map(|(i, s)| (i, s.to_string()))
210    }
211
212    #[test]
213    fn test_plain_search_case_insensitive() {
214        let mut engine = SearchEngine::new();
215        let lines: Vec<&str> = vec!["Hello World", "hello there", "HELLO WORLD"];
216        let config = SearchConfig::default();
217
218        let matches = engine.search(make_lines(&lines), "hello", &config);
219
220        assert_eq!(matches.len(), 3);
221        assert_eq!(matches[0], SearchMatch::new(0, 0, 5));
222        assert_eq!(matches[1], SearchMatch::new(1, 0, 5));
223        assert_eq!(matches[2], SearchMatch::new(2, 0, 5));
224    }
225
226    #[test]
227    fn test_plain_search_case_sensitive() {
228        let mut engine = SearchEngine::new();
229        let lines: Vec<&str> = vec!["Hello World", "hello there", "HELLO WORLD"];
230        let config = SearchConfig {
231            case_sensitive: true,
232            ..Default::default()
233        };
234
235        let matches = engine.search(make_lines(&lines), "hello", &config);
236
237        assert_eq!(matches.len(), 1);
238        assert_eq!(matches[0], SearchMatch::new(1, 0, 5));
239    }
240
241    #[test]
242    fn test_plain_search_multiple_matches_per_line() {
243        let mut engine = SearchEngine::new();
244        let lines: Vec<&str> = vec!["foo bar foo baz foo"];
245        let config = SearchConfig::default();
246
247        let matches = engine.search(make_lines(&lines), "foo", &config);
248
249        assert_eq!(matches.len(), 3);
250        assert_eq!(matches[0], SearchMatch::new(0, 0, 3));
251        assert_eq!(matches[1], SearchMatch::new(0, 8, 3));
252        assert_eq!(matches[2], SearchMatch::new(0, 16, 3));
253    }
254
255    #[test]
256    fn test_whole_word_matching() {
257        let mut engine = SearchEngine::new();
258        let lines: Vec<&str> = vec!["foobar foo barfoo"];
259        let config = SearchConfig {
260            whole_word: true,
261            ..Default::default()
262        };
263
264        let matches = engine.search(make_lines(&lines), "foo", &config);
265
266        assert_eq!(matches.len(), 1);
267        assert_eq!(matches[0], SearchMatch::new(0, 7, 3));
268    }
269
270    #[test]
271    fn test_regex_search() {
272        let mut engine = SearchEngine::new();
273        let lines: Vec<&str> = vec![
274            "error: something failed",
275            "warning: check this",
276            "error: again",
277        ];
278        let config = SearchConfig {
279            use_regex: true,
280            ..Default::default()
281        };
282
283        let matches = engine.search(make_lines(&lines), "error:", &config);
284
285        assert_eq!(matches.len(), 2);
286        assert_eq!(matches[0], SearchMatch::new(0, 0, 6));
287        assert_eq!(matches[1], SearchMatch::new(2, 0, 6));
288    }
289
290    #[test]
291    fn test_regex_pattern() {
292        let mut engine = SearchEngine::new();
293        let lines: Vec<&str> = vec!["test123", "test456", "notest"];
294        let config = SearchConfig {
295            use_regex: true,
296            ..Default::default()
297        };
298
299        let matches = engine.search(make_lines(&lines), r"test\d+", &config);
300
301        assert_eq!(matches.len(), 2);
302        assert_eq!(matches[0], SearchMatch::new(0, 0, 7));
303        assert_eq!(matches[1], SearchMatch::new(1, 0, 7));
304    }
305
306    #[test]
307    fn test_empty_query() {
308        let mut engine = SearchEngine::new();
309        let lines: Vec<&str> = vec!["some text"];
310        let config = SearchConfig::default();
311
312        let matches = engine.search(make_lines(&lines), "", &config);
313
314        assert!(matches.is_empty());
315    }
316
317    #[test]
318    fn test_unicode_character_offsets() {
319        let mut engine = SearchEngine::new();
320        // Emoji folder icon (4 bytes in UTF-8) followed by space and text
321        let lines: Vec<&str> = vec!["📁 Downloads", "normal text"];
322        let config = SearchConfig::default();
323
324        let matches = engine.search(make_lines(&lines), "down", &config);
325
326        // "down" should be found at character position 2 (after "📁 ")
327        // NOT byte position 5 (4 bytes for emoji + 1 for space)
328        assert_eq!(matches.len(), 1);
329        assert_eq!(matches[0].line, 0);
330        assert_eq!(matches[0].column, 2); // Character offset, not byte offset
331        assert_eq!(matches[0].length, 4);
332    }
333
334    #[test]
335    fn test_unicode_multiple_emoji() {
336        let mut engine = SearchEngine::new();
337        // Multiple emoji before the search term
338        let lines: Vec<&str> = vec!["🎉🎊🎁 party time"];
339        let config = SearchConfig::default();
340
341        let matches = engine.search(make_lines(&lines), "party", &config);
342
343        // "party" starts at character 4 (3 emoji + 1 space)
344        // NOT byte position 13 (3*4 bytes + 1 space)
345        assert_eq!(matches.len(), 1);
346        assert_eq!(matches[0].column, 4);
347        assert_eq!(matches[0].length, 5);
348    }
349
350    #[test]
351    fn test_invalid_regex() {
352        let mut engine = SearchEngine::new();
353        let lines: Vec<&str> = vec!["some text"];
354        let config = SearchConfig {
355            use_regex: true,
356            ..Default::default()
357        };
358
359        // Invalid regex should return empty results
360        let matches = engine.search(make_lines(&lines), "[invalid", &config);
361
362        assert!(matches.is_empty());
363    }
364}