Skip to main content

redox_core/buffer/text_buffer/
search.rs

1//! Plain-text search helpers for `TextBuffer`.
2//!
3//! These helpers intentionally implement only literal substring and same-line
4//! character search semantics for now. Regex-aware search can layer on later
5//! for stuff like `:s/` commands.
6
7use super::TextBuffer;
8use crate::buffer::Pos;
9
10impl TextBuffer {
11    /// Find the next occurrence of `needle` after `pos` on the same line.
12    pub fn find_char_after_on_line(&self, pos: Pos, needle: char) -> Option<Pos> {
13        let pos = self.clamp_pos(pos);
14        let line = self.clamp_line(pos.line);
15        let line_text = self.line_slice(line);
16
17        for (col, ch) in line_text
18            .chars()
19            .enumerate()
20            .skip(pos.col.saturating_add(1))
21        {
22            if ch == needle {
23                return Some(Pos::new(line, col));
24            }
25        }
26
27        None
28    }
29
30    /// Find all non-overlapping literal matches of `needle` in the buffer.
31    ///
32    /// Returned ranges are half-open `(start, end)` position pairs.
33    pub fn find_matches(&self, needle: &str) -> Vec<(Pos, Pos)> {
34        if needle.is_empty() {
35            return Vec::new();
36        }
37
38        let needle_chars = needle.chars().count();
39        let overlap_char_limit = needle_chars.saturating_sub(1);
40        let mut matches = Vec::new();
41        let mut overlap = String::new();
42        let mut processed_chars = 0usize;
43        let mut last_emitted_end = 0usize;
44
45        for chunk in self.rope().chunks() {
46            let chunk_chars = chunk.chars().count();
47
48            if overlap.is_empty() {
49                collect_segment_matches(
50                    self,
51                    chunk,
52                    processed_chars,
53                    processed_chars,
54                    needle,
55                    needle_chars,
56                    &mut matches,
57                    &mut last_emitted_end,
58                );
59
60                if overlap_char_limit > 0 {
61                    overlap = trailing_chars(chunk, overlap_char_limit);
62                }
63            } else {
64                let overlap_chars = overlap.chars().count();
65                let segment_start_char = processed_chars.saturating_sub(overlap_chars);
66                let mut segment = String::with_capacity(overlap.len().saturating_add(chunk.len()));
67                segment.push_str(&overlap);
68                segment.push_str(chunk);
69
70                collect_segment_matches(
71                    self,
72                    &segment,
73                    segment_start_char,
74                    processed_chars,
75                    needle,
76                    needle_chars,
77                    &mut matches,
78                    &mut last_emitted_end,
79                );
80
81                overlap = trailing_chars(&segment, overlap_char_limit);
82            }
83
84            processed_chars = processed_chars.saturating_add(chunk_chars);
85        }
86
87        matches
88    }
89}
90
91fn collect_segment_matches(
92    buffer: &TextBuffer,
93    segment: &str,
94    segment_start_char: usize,
95    emit_after_char: usize,
96    needle: &str,
97    needle_chars: usize,
98    out: &mut Vec<(Pos, Pos)>,
99    last_emitted_end: &mut usize,
100) {
101    let segment_scan_start_char = last_emitted_end.saturating_sub(segment_start_char);
102    let segment_scan_start_byte = byte_idx_for_char(segment, segment_scan_start_char);
103    let mut scan_start_byte = segment_scan_start_byte;
104    let mut scan_start_chars = segment_scan_start_char;
105
106    for (match_start_byte_rel, _) in segment[segment_scan_start_byte..].match_indices(needle) {
107        let match_start_byte = segment_scan_start_byte.saturating_add(match_start_byte_rel);
108        scan_start_chars = scan_start_chars
109            .saturating_add(segment[scan_start_byte..match_start_byte].chars().count());
110
111        let start_char = segment_start_char.saturating_add(scan_start_chars);
112        let end_char = start_char.saturating_add(needle_chars);
113        if start_char >= *last_emitted_end && end_char > emit_after_char {
114            out.push((buffer.char_to_pos(start_char), buffer.char_to_pos(end_char)));
115            *last_emitted_end = end_char;
116        }
117
118        scan_start_byte = match_start_byte.saturating_add(needle.len());
119        scan_start_chars = scan_start_chars.saturating_add(needle_chars);
120    }
121}
122
123fn byte_idx_for_char(text: &str, char_idx: usize) -> usize {
124    if char_idx == 0 {
125        return 0;
126    }
127
128    text.char_indices()
129        .nth(char_idx)
130        .map(|(byte_idx, _)| byte_idx)
131        .unwrap_or(text.len())
132}
133
134fn trailing_chars(text: &str, char_limit: usize) -> String {
135    if char_limit == 0 || text.is_empty() {
136        return String::new();
137    }
138
139    let total_chars = text.chars().count();
140    if total_chars <= char_limit {
141        return text.to_string();
142    }
143
144    let skip_chars = total_chars.saturating_sub(char_limit);
145    let start_byte = text
146        .char_indices()
147        .nth(skip_chars)
148        .map(|(byte_idx, _)| byte_idx)
149        .unwrap_or(text.len());
150    text[start_byte..].to_string()
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    fn naive_find_matches(buffer: &TextBuffer, needle: &str) -> Vec<(Pos, Pos)> {
158        let source = buffer.to_string();
159        source
160            .match_indices(needle)
161            .map(|(start_byte, matched)| {
162                let start_char = buffer.rope().byte_to_char(start_byte);
163                let end_char = buffer
164                    .rope()
165                    .byte_to_char(start_byte.saturating_add(matched.len()));
166                (buffer.char_to_pos(start_char), buffer.char_to_pos(end_char))
167            })
168            .collect()
169    }
170
171    #[test]
172    fn find_matches_matches_naive_search_across_rope_chunks() {
173        let mut text = String::new();
174        for i in 0..40_000usize {
175            text.push_str(&format!("{i:05x}|"));
176        }
177        let buffer = TextBuffer::from_str(&text);
178
179        let mut chunks = buffer.rope().chunks();
180        let first_chunk = chunks.next().expect("expected at least one chunk");
181        let second_chunk = chunks.next().expect("expected multiple rope chunks");
182        let boundary_needle = format!(
183            "{}{}",
184            trailing_chars(first_chunk, 6),
185            second_chunk.chars().take(6).collect::<String>()
186        );
187
188        assert_eq!(
189            buffer.find_matches(&boundary_needle),
190            naive_find_matches(&buffer, &boundary_needle)
191        );
192    }
193
194    #[test]
195    fn find_matches_matches_naive_search_for_unicode_across_rope_chunks() {
196        let mut text = String::new();
197        for i in 0..20_000usize {
198            text.push_str(&format!("α{i:05x}🙂β"));
199        }
200        let buffer = TextBuffer::from_str(&text);
201
202        let mut chunks = buffer.rope().chunks();
203        let first_chunk = chunks.next().expect("expected at least one chunk");
204        let second_chunk = chunks.next().expect("expected multiple rope chunks");
205        let boundary_needle = format!(
206            "{}{}",
207            trailing_chars(first_chunk, 4),
208            second_chunk.chars().take(4).collect::<String>()
209        );
210
211        assert_eq!(
212            buffer.find_matches(&boundary_needle),
213            naive_find_matches(&buffer, &boundary_needle)
214        );
215    }
216
217    #[test]
218    fn find_matches_preserves_non_overlapping_semantics_across_chunk_boundaries() {
219        let text = "a".repeat(120_000);
220        let buffer = TextBuffer::from_str(&text);
221
222        // Ensure the buffer actually spans multiple chunks
223        assert!(
224            buffer.rope().chunks().count() > 1,
225            "expected multiple rope chunks"
226        );
227
228        assert_eq!(
229            buffer.find_matches("aaa"),
230            naive_find_matches(&buffer, "aaa")
231        );
232    }
233}