Skip to main content

mdcat/mdless/
search.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5//! Regex search over the rendered document.
6//!
7//! [`SearchState`] matches against the plain buffer and maps hits
8//! to styled byte ranges, skipping SGR introducers so highlights
9//! land on visible bytes.
10
11use std::ops::Range;
12
13use anyhow::{anyhow, Result};
14use regex::Regex;
15
16use super::buffer::RenderedDoc;
17
18/// One match inside the rendered document.
19#[derive(Debug, Clone)]
20pub struct Match {
21    /// Byte range within `RenderedDoc::plain`.
22    pub plain: Range<usize>,
23    /// Byte range within `RenderedDoc::styled`, mapped from `plain`.
24    pub styled: Range<usize>,
25    /// Rendered line containing the first byte of the match.
26    pub line: usize,
27}
28
29/// Case-sensitivity policy for a compiled search.
30#[derive(Debug, Clone, Copy, Eq, PartialEq)]
31pub enum CaseMode {
32    /// Case-insensitive unless the pattern contains an uppercase ASCII
33    /// letter.
34    Smart,
35    /// Always case-sensitive.
36    Sensitive,
37    /// Always case-insensitive.
38    Insensitive,
39}
40
41/// Search direction for `next` / `previous`.
42#[derive(Debug, Clone, Copy, Eq, PartialEq)]
43pub enum Direction {
44    /// Advance to the next match after the cursor.
45    Forward,
46    /// Retreat to the previous match before the cursor.
47    Backward,
48}
49
50/// Compiled query + cached match list + cycle cursor.
51#[derive(Debug)]
52pub struct SearchState {
53    matches: Vec<Match>,
54    cursor: Option<usize>,
55}
56
57impl SearchState {
58    /// Compile `pattern` against `doc`, returning the initial state.
59    ///
60    /// `pattern` is a literal by default; callers pass `regex = true` to
61    /// interpret it as a regular expression. Invalid regex patterns
62    /// surface as `Err`.
63    pub fn compile(doc: &RenderedDoc, pattern: &str, regex: bool, case: CaseMode) -> Result<Self> {
64        let insensitive = match case {
65            CaseMode::Sensitive => false,
66            CaseMode::Insensitive => true,
67            CaseMode::Smart => !pattern.chars().any(|c| c.is_ascii_uppercase()),
68        };
69
70        // Build the compiled regex. Literal mode escapes metacharacters.
71        let body = if regex {
72            pattern.to_string()
73        } else {
74            regex::escape(pattern)
75        };
76        let compiled = regex::RegexBuilder::new(&body)
77            .case_insensitive(insensitive)
78            .build()
79            .map_err(|e| anyhow!("invalid search pattern: {e}"))?;
80
81        let matches = scan(doc, &compiled);
82        let cursor = if matches.is_empty() { None } else { Some(0) };
83        Ok(Self { matches, cursor })
84    }
85
86    /// Number of matches found.
87    pub fn len(&self) -> usize {
88        self.matches.len()
89    }
90
91    /// True when the pattern didn't match anywhere.
92    pub fn is_empty(&self) -> bool {
93        self.matches.is_empty()
94    }
95
96    /// Currently-selected match, if any.
97    pub fn current(&self) -> Option<&Match> {
98        self.cursor.and_then(|i| self.matches.get(i))
99    }
100
101    /// All matches, stable source order.
102    pub fn all(&self) -> &[Match] {
103        &self.matches
104    }
105
106    /// Advance or retreat the cursor. Returns the newly-selected match,
107    /// and `true` when the cursor wrapped around the buffer end.
108    pub fn step(&mut self, dir: Direction) -> Option<(&Match, bool)> {
109        let n = self.matches.len();
110        if n == 0 {
111            return None;
112        }
113        let (next, wrapped) = match (self.cursor, dir) {
114            (None, Direction::Forward) => (0, false),
115            (None, Direction::Backward) => (n - 1, false),
116            (Some(i), Direction::Forward) => {
117                if i + 1 >= n {
118                    (0, true)
119                } else {
120                    (i + 1, false)
121                }
122            }
123            (Some(i), Direction::Backward) => {
124                if i == 0 {
125                    (n - 1, true)
126                } else {
127                    (i - 1, false)
128                }
129            }
130        };
131        self.cursor = Some(next);
132        self.matches.get(next).map(|m| (m, wrapped))
133    }
134}
135
136fn scan(doc: &RenderedDoc, re: &Regex) -> Vec<Match> {
137    re.find_iter(&doc.plain)
138        .map(|m| {
139            let plain = m.range();
140            // Start must land on a visible byte: if the mapping leaves
141            // us on an SGR introducer we consume it so the highlight
142            // wraps "bar" rather than "\x1b[1mbar".
143            let mut styled_start = plain_to_styled(doc, plain.start);
144            while doc.styled.get(styled_start) == Some(&0x1b) {
145                styled_start = skip_escape(&doc.styled, styled_start);
146            }
147            let styled_end = plain_to_styled(doc, plain.end);
148            let line = doc.line_for_plain_offset(plain.start);
149            Match {
150                plain,
151                styled: styled_start..styled_end,
152                line,
153            }
154        })
155        .collect()
156}
157
158/// Map a plain-buffer byte offset to the matching offset in the styled
159/// buffer. Uses the parallel line starts to pin the line, then walks the
160/// styled bytes skipping escape sequences until the plain-relative offset
161/// is reached.
162fn plain_to_styled(doc: &RenderedDoc, plain_offset: usize) -> usize {
163    let line = doc.line_for_plain_offset(plain_offset);
164    let plain_base = doc.line_starts[line];
165    let styled_base = doc.styled_line_starts[line];
166    let target = plain_offset - plain_base;
167
168    let mut plain_cursor = 0;
169    let mut styled_cursor = styled_base;
170    while plain_cursor < target && styled_cursor < doc.styled.len() {
171        let b = doc.styled[styled_cursor];
172        if b == 0x1b {
173            styled_cursor = skip_escape(&doc.styled, styled_cursor);
174            continue;
175        }
176        plain_cursor += 1;
177        styled_cursor += 1;
178    }
179    styled_cursor
180}
181
182/// Duplicate of `buffer::skip_escape` — factored here so the search pass
183/// stays self-contained. If both pagers ever share an ANSI parser we can
184/// pull this up into a common module.
185fn skip_escape(input: &[u8], start: usize) -> usize {
186    let Some(&next) = input.get(start + 1) else {
187        return start + 1;
188    };
189    match next {
190        b'[' => {
191            let mut i = start + 2;
192            while input.get(i).is_some_and(|&b| !(0x40..=0x7e).contains(&b)) {
193                i += 1;
194            }
195            i + usize::from(i < input.len())
196        }
197        b']' | b'P' | b'_' | b'^' => {
198            let mut i = start + 2;
199            while let Some(&b) = input.get(i) {
200                if b == 0x07 {
201                    return i + 1;
202                }
203                if b == 0x1b && input.get(i + 1) == Some(&b'\\') {
204                    return i + 2;
205                }
206                i += 1;
207            }
208            i
209        }
210        _ => start + 2,
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use crate::mdless::buffer::build;
218
219    fn doc(s: &str) -> RenderedDoc {
220        build(s.as_bytes().to_vec(), Vec::new())
221    }
222
223    #[test]
224    fn literal_pattern_finds_all_occurrences() {
225        let d = doc("foo bar foo\nbaz foo\n");
226        let s = SearchState::compile(&d, "foo", false, CaseMode::Sensitive).unwrap();
227        assert_eq!(s.len(), 3);
228        assert_eq!(s.all()[0].plain, 0..3);
229        assert_eq!(s.all()[1].plain, 8..11);
230        assert_eq!(s.all()[2].plain, 16..19);
231    }
232
233    #[test]
234    fn smart_case_is_insensitive_until_uppercase_in_pattern() {
235        let d = doc("FOO foo Foo\n");
236        let lower = SearchState::compile(&d, "foo", false, CaseMode::Smart).unwrap();
237        assert_eq!(lower.len(), 3);
238        let mixed = SearchState::compile(&d, "Foo", false, CaseMode::Smart).unwrap();
239        assert_eq!(mixed.len(), 1);
240    }
241
242    #[test]
243    fn regex_flag_enables_metacharacters() {
244        let d = doc("foo123 bar45 baz6\n");
245        let literal = SearchState::compile(&d, r"\d+", false, CaseMode::Sensitive).unwrap();
246        assert_eq!(literal.len(), 0);
247        let re = SearchState::compile(&d, r"\d+", true, CaseMode::Sensitive).unwrap();
248        assert_eq!(re.len(), 3);
249    }
250
251    #[test]
252    fn invalid_regex_surfaces_as_err() {
253        let d = doc("x\n");
254        let err = SearchState::compile(&d, "[", true, CaseMode::Sensitive).unwrap_err();
255        assert!(err.to_string().contains("invalid search pattern"));
256    }
257
258    #[test]
259    fn step_cycles_and_reports_wrap() {
260        let d = doc("foo foo foo\n");
261        let mut s = SearchState::compile(&d, "foo", false, CaseMode::Sensitive).unwrap();
262        // Fresh cursor points at index 0 after compile.
263        assert_eq!(s.current().unwrap().plain, 0..3);
264        assert_eq!(s.step(Direction::Forward).unwrap().0.plain, 4..7);
265        assert_eq!(s.step(Direction::Forward).unwrap().0.plain, 8..11);
266        // Third Forward wraps.
267        let (m, wrapped) = s.step(Direction::Forward).unwrap();
268        assert!(wrapped);
269        assert_eq!(m.plain, 0..3);
270        // Backward from head wraps.
271        let (m, wrapped) = s.step(Direction::Backward).unwrap();
272        assert!(wrapped);
273        assert_eq!(m.plain, 8..11);
274    }
275
276    #[test]
277    fn styled_range_skips_escape_sequences() {
278        // "foo \x1b[1mbar\x1b[0m foo" — plain is "foo bar foo".
279        let styled = b"foo \x1b[1mbar\x1b[0m foo\n".to_vec();
280        let d = build(styled, Vec::new());
281        let s = SearchState::compile(&d, "bar", false, CaseMode::Sensitive).unwrap();
282        let m = &s.all()[0];
283        assert_eq!(m.plain, 4..7);
284        // Styled range should include the SGR-on prefix but not the
285        // SGR-off suffix (escapes before the match are consumed in the
286        // mapping, escapes at the end are handled by highlighting).
287        assert_eq!(&d.styled[m.styled.clone()], b"bar");
288    }
289
290    #[test]
291    fn no_matches_yields_none_cursor() {
292        let d = doc("nothing here\n");
293        let mut s = SearchState::compile(&d, "missing", false, CaseMode::Sensitive).unwrap();
294        assert!(s.is_empty());
295        assert!(s.current().is_none());
296        assert!(s.step(Direction::Forward).is_none());
297    }
298}