redox_core/buffer/text_buffer/
search.rs1use super::TextBuffer;
8use crate::buffer::Pos;
9
10impl TextBuffer {
11 pub fn find_char_after_on_line(&self, pos: Pos, needle: char) -> Option<Pos> {
13 let pos = self.clamp_pos(pos);
14 let line = self.clamp_line(pos.line);
15 let line_text = self.line_slice(line);
16
17 for (col, ch) in line_text
18 .chars()
19 .enumerate()
20 .skip(pos.col.saturating_add(1))
21 {
22 if ch == needle {
23 return Some(Pos::new(line, col));
24 }
25 }
26
27 None
28 }
29
30 pub fn find_matches(&self, needle: &str) -> Vec<(Pos, Pos)> {
34 if needle.is_empty() {
35 return Vec::new();
36 }
37
38 let needle_chars = needle.chars().count();
39 let overlap_char_limit = needle_chars.saturating_sub(1);
40 let mut matches = Vec::new();
41 let mut overlap = String::new();
42 let mut processed_chars = 0usize;
43 let mut last_emitted_end = 0usize;
44
45 for chunk in self.rope().chunks() {
46 let chunk_chars = chunk.chars().count();
47
48 if overlap.is_empty() {
49 collect_segment_matches(
50 self,
51 chunk,
52 processed_chars,
53 processed_chars,
54 needle,
55 needle_chars,
56 &mut matches,
57 &mut last_emitted_end,
58 );
59
60 if overlap_char_limit > 0 {
61 overlap = trailing_chars(chunk, overlap_char_limit);
62 }
63 } else {
64 let overlap_chars = overlap.chars().count();
65 let segment_start_char = processed_chars.saturating_sub(overlap_chars);
66 let mut segment = String::with_capacity(overlap.len().saturating_add(chunk.len()));
67 segment.push_str(&overlap);
68 segment.push_str(chunk);
69
70 collect_segment_matches(
71 self,
72 &segment,
73 segment_start_char,
74 processed_chars,
75 needle,
76 needle_chars,
77 &mut matches,
78 &mut last_emitted_end,
79 );
80
81 overlap = trailing_chars(&segment, overlap_char_limit);
82 }
83
84 processed_chars = processed_chars.saturating_add(chunk_chars);
85 }
86
87 matches
88 }
89}
90
91fn collect_segment_matches(
92 buffer: &TextBuffer,
93 segment: &str,
94 segment_start_char: usize,
95 emit_after_char: usize,
96 needle: &str,
97 needle_chars: usize,
98 out: &mut Vec<(Pos, Pos)>,
99 last_emitted_end: &mut usize,
100) {
101 let segment_scan_start_char = last_emitted_end.saturating_sub(segment_start_char);
102 let segment_scan_start_byte = byte_idx_for_char(segment, segment_scan_start_char);
103 let mut scan_start_byte = segment_scan_start_byte;
104 let mut scan_start_chars = segment_scan_start_char;
105
106 for (match_start_byte_rel, _) in segment[segment_scan_start_byte..].match_indices(needle) {
107 let match_start_byte = segment_scan_start_byte.saturating_add(match_start_byte_rel);
108 scan_start_chars = scan_start_chars
109 .saturating_add(segment[scan_start_byte..match_start_byte].chars().count());
110
111 let start_char = segment_start_char.saturating_add(scan_start_chars);
112 let end_char = start_char.saturating_add(needle_chars);
113 if start_char >= *last_emitted_end && end_char > emit_after_char {
114 out.push((buffer.char_to_pos(start_char), buffer.char_to_pos(end_char)));
115 *last_emitted_end = end_char;
116 }
117
118 scan_start_byte = match_start_byte.saturating_add(needle.len());
119 scan_start_chars = scan_start_chars.saturating_add(needle_chars);
120 }
121}
122
123fn byte_idx_for_char(text: &str, char_idx: usize) -> usize {
124 if char_idx == 0 {
125 return 0;
126 }
127
128 text.char_indices()
129 .nth(char_idx)
130 .map(|(byte_idx, _)| byte_idx)
131 .unwrap_or(text.len())
132}
133
134fn trailing_chars(text: &str, char_limit: usize) -> String {
135 if char_limit == 0 || text.is_empty() {
136 return String::new();
137 }
138
139 let total_chars = text.chars().count();
140 if total_chars <= char_limit {
141 return text.to_string();
142 }
143
144 let skip_chars = total_chars.saturating_sub(char_limit);
145 let start_byte = text
146 .char_indices()
147 .nth(skip_chars)
148 .map(|(byte_idx, _)| byte_idx)
149 .unwrap_or(text.len());
150 text[start_byte..].to_string()
151}
152
153#[cfg(test)]
154mod tests {
155 use super::*;
156
157 fn naive_find_matches(buffer: &TextBuffer, needle: &str) -> Vec<(Pos, Pos)> {
158 let source = buffer.to_string();
159 source
160 .match_indices(needle)
161 .map(|(start_byte, matched)| {
162 let start_char = buffer.rope().byte_to_char(start_byte);
163 let end_char = buffer
164 .rope()
165 .byte_to_char(start_byte.saturating_add(matched.len()));
166 (buffer.char_to_pos(start_char), buffer.char_to_pos(end_char))
167 })
168 .collect()
169 }
170
171 #[test]
172 fn find_matches_matches_naive_search_across_rope_chunks() {
173 let mut text = String::new();
174 for i in 0..40_000usize {
175 text.push_str(&format!("{i:05x}|"));
176 }
177 let buffer = TextBuffer::from_str(&text);
178
179 let mut chunks = buffer.rope().chunks();
180 let first_chunk = chunks.next().expect("expected at least one chunk");
181 let second_chunk = chunks.next().expect("expected multiple rope chunks");
182 let boundary_needle = format!(
183 "{}{}",
184 trailing_chars(first_chunk, 6),
185 second_chunk.chars().take(6).collect::<String>()
186 );
187
188 assert_eq!(
189 buffer.find_matches(&boundary_needle),
190 naive_find_matches(&buffer, &boundary_needle)
191 );
192 }
193
194 #[test]
195 fn find_matches_matches_naive_search_for_unicode_across_rope_chunks() {
196 let mut text = String::new();
197 for i in 0..20_000usize {
198 text.push_str(&format!("α{i:05x}🙂β"));
199 }
200 let buffer = TextBuffer::from_str(&text);
201
202 let mut chunks = buffer.rope().chunks();
203 let first_chunk = chunks.next().expect("expected at least one chunk");
204 let second_chunk = chunks.next().expect("expected multiple rope chunks");
205 let boundary_needle = format!(
206 "{}{}",
207 trailing_chars(first_chunk, 4),
208 second_chunk.chars().take(4).collect::<String>()
209 );
210
211 assert_eq!(
212 buffer.find_matches(&boundary_needle),
213 naive_find_matches(&buffer, &boundary_needle)
214 );
215 }
216
217 #[test]
218 fn find_matches_preserves_non_overlapping_semantics_across_chunk_boundaries() {
219 let text = "a".repeat(120_000);
220 let buffer = TextBuffer::from_str(&text);
221
222 assert!(
224 buffer.rope().chunks().count() > 1,
225 "expected multiple rope chunks"
226 );
227
228 assert_eq!(
229 buffer.find_matches("aaa"),
230 naive_find_matches(&buffer, "aaa")
231 );
232 }
233}