Skip to main content

redox_core/buffer/text_buffer/
search.rs

1//! Plain-text search helpers for `TextBuffer`.
2//!
3//! These helpers intentionally implement only literal substring and same-line
4//! character search semantics for now. Regex-aware search can layer on later
5//! for stuff like `:s/` commands.
6
7use super::TextBuffer;
8use crate::buffer::Pos;
9
10impl TextBuffer {
11    /// Find the next occurrence of `needle` after `pos` on the same line.
12    pub fn find_char_after_on_line(&self, pos: Pos, needle: char) -> Option<Pos> {
13        let pos = self.clamp_pos(pos);
14        let line = self.clamp_line(pos.line);
15        let line_text = self.line_slice(line);
16
17        for (col, ch) in line_text
18            .chars()
19            .enumerate()
20            .skip(pos.col.saturating_add(1))
21        {
22            if ch == needle {
23                return Some(Pos::new(line, col));
24            }
25        }
26
27        None
28    }
29
30    /// Find the previous occurrence of `needle` before `pos` on the same line.
31    pub fn find_char_before_on_line(&self, pos: Pos, needle: char) -> Option<Pos> {
32        let pos = self.clamp_pos(pos);
33        let line = self.clamp_line(pos.line);
34        let line_text = self.line_slice(line);
35        let mut found = None;
36
37        for (col, ch) in line_text.chars().enumerate().take(pos.col) {
38            if ch == needle {
39                found = Some(Pos::new(line, col));
40            }
41        }
42
43        found
44    }
45
46    /// Find the delimiter paired with the delimiter under `pos`.
47    pub fn matching_delimiter(&self, pos: Pos) -> Option<Pos> {
48        let pos = self.clamp_pos(pos);
49        let char_idx = self.pos_to_char(pos);
50        let ch = self.char_at(pos)?;
51        if self.char_is_escaped_for_pairing(char_idx) {
52            return None;
53        }
54
55        match delimiter_pair_for(ch)? {
56            DelimiterPairKind::Asymmetric { open, close } if ch == open => {
57                self.match_asymmetric_delimiter_forward(char_idx, open, close)
58            }
59            DelimiterPairKind::Asymmetric { open, close } if ch == close => {
60                self.match_asymmetric_delimiter_backward(char_idx, open, close)
61            }
62            DelimiterPairKind::Symmetric { delimiter } => {
63                self.match_symmetric_delimiter(char_idx, delimiter)
64            }
65            DelimiterPairKind::Asymmetric { .. } => None,
66        }
67    }
68
69    fn match_asymmetric_delimiter_forward(
70        &self,
71        start_idx: usize,
72        open: char,
73        close: char,
74    ) -> Option<Pos> {
75        let mut depth = 0usize;
76        for idx in start_idx.saturating_add(1)..self.len_chars() {
77            if self.char_is_escaped_for_pairing(idx) {
78                continue;
79            }
80            match self.rope().char(idx) {
81                ch if ch == open => depth = depth.saturating_add(1),
82                ch if ch == close => {
83                    if depth == 0 {
84                        return Some(self.char_to_pos(idx));
85                    }
86                    depth = depth.saturating_sub(1);
87                }
88                _ => {}
89            }
90        }
91        None
92    }
93
94    fn match_asymmetric_delimiter_backward(
95        &self,
96        end_idx: usize,
97        open: char,
98        close: char,
99    ) -> Option<Pos> {
100        let mut depth = 0usize;
101        for idx in (0..end_idx).rev() {
102            if self.char_is_escaped_for_pairing(idx) {
103                continue;
104            }
105            match self.rope().char(idx) {
106                ch if ch == close => depth = depth.saturating_add(1),
107                ch if ch == open => {
108                    if depth == 0 {
109                        return Some(self.char_to_pos(idx));
110                    }
111                    depth = depth.saturating_sub(1);
112                }
113                _ => {}
114            }
115        }
116        None
117    }
118
119    fn match_symmetric_delimiter(&self, char_idx: usize, delimiter: char) -> Option<Pos> {
120        if self.char_is_escaped_for_pairing(char_idx) {
121            return None;
122        }
123
124        let line = self.char_to_line(char_idx);
125        let line_range = self.line_char_range(line);
126        let mut delimiters = Vec::new();
127        for idx in line_range {
128            if self.rope().char(idx) == delimiter && !self.char_is_escaped_for_pairing(idx) {
129                delimiters.push(idx);
130            }
131        }
132
133        delimiters.chunks_exact(2).find_map(|pair| {
134            if char_idx == pair[0] {
135                Some(self.char_to_pos(pair[1]))
136            } else if char_idx == pair[1] {
137                Some(self.char_to_pos(pair[0]))
138            } else {
139                None
140            }
141        })
142    }
143
144    fn char_is_escaped_for_pairing(&self, char_idx: usize) -> bool {
145        let mut backslashes = 0;
146        let mut idx = char_idx;
147        while idx > 0 {
148            idx -= 1;
149            if self.rope().char(idx) != '\\' {
150                break;
151            }
152            backslashes += 1;
153        }
154        backslashes % 2 == 1
155    }
156
157    /// Find all non-overlapping literal matches of `needle` in the buffer.
158    ///
159    /// Returned ranges are half-open `(start, end)` position pairs.
160    pub fn find_matches(&self, needle: &str) -> Vec<(Pos, Pos)> {
161        if needle.is_empty() {
162            return Vec::new();
163        }
164
165        let needle_chars = needle.chars().count();
166        let overlap_char_limit = needle_chars.saturating_sub(1);
167        let mut matches = Vec::new();
168        let mut overlap = String::new();
169        let mut processed_chars = 0usize;
170        let mut last_emitted_end = 0usize;
171
172        for chunk in self.rope().chunks() {
173            let chunk_chars = chunk.chars().count();
174
175            if overlap.is_empty() {
176                collect_segment_matches(
177                    self,
178                    chunk,
179                    processed_chars,
180                    processed_chars,
181                    needle,
182                    needle_chars,
183                    &mut matches,
184                    &mut last_emitted_end,
185                );
186
187                if overlap_char_limit > 0 {
188                    overlap = trailing_chars(chunk, overlap_char_limit);
189                }
190            } else {
191                let overlap_chars = overlap.chars().count();
192                let segment_start_char = processed_chars.saturating_sub(overlap_chars);
193                let mut segment = String::with_capacity(overlap.len().saturating_add(chunk.len()));
194                segment.push_str(&overlap);
195                segment.push_str(chunk);
196
197                collect_segment_matches(
198                    self,
199                    &segment,
200                    segment_start_char,
201                    processed_chars,
202                    needle,
203                    needle_chars,
204                    &mut matches,
205                    &mut last_emitted_end,
206                );
207
208                overlap = trailing_chars(&segment, overlap_char_limit);
209            }
210
211            processed_chars = processed_chars.saturating_add(chunk_chars);
212        }
213
214        matches
215    }
216}
217
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219enum DelimiterPairKind {
220    Asymmetric { open: char, close: char },
221    Symmetric { delimiter: char },
222}
223
224fn delimiter_pair_for(ch: char) -> Option<DelimiterPairKind> {
225    match ch {
226        '(' | ')' => Some(DelimiterPairKind::Asymmetric {
227            open: '(',
228            close: ')',
229        }),
230        '[' | ']' => Some(DelimiterPairKind::Asymmetric {
231            open: '[',
232            close: ']',
233        }),
234        '{' | '}' => Some(DelimiterPairKind::Asymmetric {
235            open: '{',
236            close: '}',
237        }),
238        '<' | '>' => Some(DelimiterPairKind::Asymmetric {
239            open: '<',
240            close: '>',
241        }),
242        '\'' | '"' | '`' => Some(DelimiterPairKind::Symmetric { delimiter: ch }),
243        _ => None,
244    }
245}
246
247fn collect_segment_matches(
248    buffer: &TextBuffer,
249    segment: &str,
250    segment_start_char: usize,
251    emit_after_char: usize,
252    needle: &str,
253    needle_chars: usize,
254    out: &mut Vec<(Pos, Pos)>,
255    last_emitted_end: &mut usize,
256) {
257    let segment_scan_start_char = last_emitted_end.saturating_sub(segment_start_char);
258    let segment_scan_start_byte = byte_idx_for_char(segment, segment_scan_start_char);
259    let mut scan_start_byte = segment_scan_start_byte;
260    let mut scan_start_chars = segment_scan_start_char;
261
262    for (match_start_byte_rel, _) in segment[segment_scan_start_byte..].match_indices(needle) {
263        let match_start_byte = segment_scan_start_byte.saturating_add(match_start_byte_rel);
264        scan_start_chars = scan_start_chars
265            .saturating_add(segment[scan_start_byte..match_start_byte].chars().count());
266
267        let start_char = segment_start_char.saturating_add(scan_start_chars);
268        let end_char = start_char.saturating_add(needle_chars);
269        if start_char >= *last_emitted_end && end_char > emit_after_char {
270            out.push((buffer.char_to_pos(start_char), buffer.char_to_pos(end_char)));
271            *last_emitted_end = end_char;
272        }
273
274        scan_start_byte = match_start_byte.saturating_add(needle.len());
275        scan_start_chars = scan_start_chars.saturating_add(needle_chars);
276    }
277}
278
279fn byte_idx_for_char(text: &str, char_idx: usize) -> usize {
280    if char_idx == 0 {
281        return 0;
282    }
283
284    text.char_indices()
285        .nth(char_idx)
286        .map(|(byte_idx, _)| byte_idx)
287        .unwrap_or(text.len())
288}
289
290fn trailing_chars(text: &str, char_limit: usize) -> String {
291    if char_limit == 0 || text.is_empty() {
292        return String::new();
293    }
294
295    let total_chars = text.chars().count();
296    if total_chars <= char_limit {
297        return text.to_string();
298    }
299
300    let skip_chars = total_chars.saturating_sub(char_limit);
301    let start_byte = text
302        .char_indices()
303        .nth(skip_chars)
304        .map(|(byte_idx, _)| byte_idx)
305        .unwrap_or(text.len());
306    text[start_byte..].to_string()
307}
308
309#[cfg(test)]
310mod tests {
311    use super::*;
312
313    fn naive_find_matches(buffer: &TextBuffer, needle: &str) -> Vec<(Pos, Pos)> {
314        let source = buffer.to_string();
315        source
316            .match_indices(needle)
317            .map(|(start_byte, matched)| {
318                let start_char = buffer.rope().byte_to_char(start_byte);
319                let end_char = buffer
320                    .rope()
321                    .byte_to_char(start_byte.saturating_add(matched.len()));
322                (buffer.char_to_pos(start_char), buffer.char_to_pos(end_char))
323            })
324            .collect()
325    }
326
327    #[test]
328    fn find_matches_matches_naive_search_across_rope_chunks() {
329        let mut text = String::new();
330        for i in 0..40_000usize {
331            text.push_str(&format!("{i:05x}|"));
332        }
333        let buffer = TextBuffer::from_str(&text);
334
335        let mut chunks = buffer.rope().chunks();
336        let first_chunk = chunks.next().expect("expected at least one chunk");
337        let second_chunk = chunks.next().expect("expected multiple rope chunks");
338        let boundary_needle = format!(
339            "{}{}",
340            trailing_chars(first_chunk, 6),
341            second_chunk.chars().take(6).collect::<String>()
342        );
343
344        assert_eq!(
345            buffer.find_matches(&boundary_needle),
346            naive_find_matches(&buffer, &boundary_needle)
347        );
348    }
349
350    #[test]
351    fn find_matches_matches_naive_search_for_unicode_across_rope_chunks() {
352        let mut text = String::new();
353        for i in 0..20_000usize {
354            text.push_str(&format!("α{i:05x}🙂β"));
355        }
356        let buffer = TextBuffer::from_str(&text);
357
358        let mut chunks = buffer.rope().chunks();
359        let first_chunk = chunks.next().expect("expected at least one chunk");
360        let second_chunk = chunks.next().expect("expected multiple rope chunks");
361        let boundary_needle = format!(
362            "{}{}",
363            trailing_chars(first_chunk, 4),
364            second_chunk.chars().take(4).collect::<String>()
365        );
366
367        assert_eq!(
368            buffer.find_matches(&boundary_needle),
369            naive_find_matches(&buffer, &boundary_needle)
370        );
371    }
372
373    #[test]
374    fn find_matches_preserves_non_overlapping_semantics_across_chunk_boundaries() {
375        let text = "a".repeat(120_000);
376        let buffer = TextBuffer::from_str(&text);
377
378        // Ensure the buffer actually spans multiple chunks
379        assert!(
380            buffer.rope().chunks().count() > 1,
381            "expected multiple rope chunks"
382        );
383
384        assert_eq!(
385            buffer.find_matches("aaa"),
386            naive_find_matches(&buffer, "aaa")
387        );
388    }
389
390    #[test]
391    fn matching_delimiter_ignores_escaped_asymmetric_delimiter_under_cursor() {
392        let buffer = TextBuffer::from_str(r"\(alpha)");
393
394        assert_eq!(buffer.matching_delimiter(Pos::new(0, 1)), None);
395    }
396
397    #[test]
398    fn matching_delimiter_skips_escaped_asymmetric_delimiters_while_scanning() {
399        let buffer = TextBuffer::from_str(r"(alpha \( beta))");
400
401        assert_eq!(
402            buffer.matching_delimiter(Pos::new(0, 0)),
403            Some(Pos::new(0, 14))
404        );
405        assert_eq!(
406            buffer.matching_delimiter(Pos::new(0, 14)),
407            Some(Pos::new(0, 0))
408        );
409    }
410}