ad_editor/regex/
haystack.rs

1//! A haystack is something that can be searched over by a Regex
2use crate::buffer::{Buffer, GapBuffer};
3use std::borrow::Cow;
4
5/// Something that can be searched over by a [Regex][0].
6///
7/// The interface exposed by this trait supports searching over streaming data if needed but at the
8/// cost of reduced performance.
9///
10/// [0]: crate::regex::Regex
11#[allow(clippy::len_without_is_empty)]
12pub trait Haystack {
13    fn try_make_contiguous(&mut self);
14    fn is_contiguous(&self) -> bool;
15    fn len(&self) -> usize;
16    fn substr_from<'a>(&'a self, offset: usize) -> Option<Cow<'a, str>>;
17    fn substr<'a>(&'a self, from: usize, to: usize) -> Cow<'a, str>;
18
19    fn byte_to_char(&self, byte_idx: usize) -> Option<usize>;
20    fn char_to_byte(&self, char_idx: usize) -> Option<usize>;
21
22    fn iter_from(&self, from: usize) -> Option<impl Iterator<Item = (usize, char)>>;
23    fn iter_between(&self, from: usize, to: usize) -> impl Iterator<Item = (usize, char)>;
24    fn rev_iter_between(&self, from: usize, to: usize) -> impl Iterator<Item = (usize, char)>;
25}
26
27impl Haystack for &str {
28    fn try_make_contiguous(&mut self) {}
29
30    fn is_contiguous(&self) -> bool {
31        true
32    }
33
34    fn len(&self) -> usize {
35        str::len(self)
36    }
37
38    fn substr_from<'a>(&'a self, offset: usize) -> Option<Cow<'a, str>> {
39        if offset > self.len() {
40            None
41        } else {
42            let raw = &self.as_bytes()[offset..];
43            // SAFETY: assumes a valid byte offset
44            Some(Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(raw) }))
45        }
46    }
47
48    fn substr<'a>(&'a self, from: usize, to: usize) -> Cow<'a, str> {
49        Cow::Borrowed(&self[from..to])
50    }
51
52    fn byte_to_char(&self, byte_idx: usize) -> Option<usize> {
53        Some(
54            self.char_indices()
55                .take_while(|&(idx, _)| idx < byte_idx)
56                .count(),
57        )
58    }
59
60    fn char_to_byte(&self, char_idx: usize) -> Option<usize> {
61        self.char_indices().nth(char_idx).map(|(idx, _)| idx)
62    }
63
64    fn iter_from(&self, from: usize) -> Option<impl Iterator<Item = (usize, char)>> {
65        if from >= self.len() {
66            None
67        } else {
68            Some(
69                self[from..]
70                    .char_indices()
71                    .map(move |(i, ch)| (i + from, ch)),
72            )
73        }
74    }
75
76    fn iter_between(&self, from: usize, to: usize) -> impl Iterator<Item = (usize, char)> {
77        self[from..to]
78            .char_indices()
79            .map(move |(i, ch)| (i + from, ch))
80    }
81
82    fn rev_iter_between(&self, from: usize, to: usize) -> impl Iterator<Item = (usize, char)> {
83        self[from..to]
84            .char_indices()
85            .map(move |(i, ch)| (i + from, ch))
86            .rev()
87    }
88}
89
90impl Haystack for GapBuffer {
91    fn try_make_contiguous(&mut self) {
92        self.make_contiguous();
93    }
94
95    fn is_contiguous(&self) -> bool {
96        self.is_contiguous()
97    }
98
99    fn len(&self) -> usize {
100        self.len()
101    }
102
103    fn substr_from<'a>(&'a self, offset: usize) -> Option<Cow<'a, str>> {
104        if offset > self.len() {
105            None
106        } else {
107            // SAFETY: assumes make_contiguous was called first
108            Some(Cow::Borrowed(unsafe { self.substr_from(offset) }))
109        }
110    }
111
112    fn substr<'a>(&'a self, from: usize, to: usize) -> Cow<'a, str> {
113        self.slice_from_byte_offsets(from, to).into()
114    }
115
116    fn byte_to_char(&self, byte_idx: usize) -> Option<usize> {
117        if byte_idx > self.len() {
118            None
119        } else {
120            Some(self.byte_to_char(byte_idx))
121        }
122    }
123
124    fn char_to_byte(&self, char_idx: usize) -> Option<usize> {
125        if char_idx > self.len_chars() {
126            None
127        } else {
128            Some(self.char_to_byte(char_idx))
129        }
130    }
131
132    fn iter_from(&self, from: usize) -> Option<impl Iterator<Item = (usize, char)>> {
133        if from >= self.len() {
134            None
135        } else {
136            let mut acc = from;
137            Some(
138                self.slice_from_byte_offsets(from, self.len())
139                    .chars()
140                    .map(move |ch| {
141                        let item = (acc, ch);
142                        acc += ch.len_utf8();
143                        item
144                    }),
145            )
146        }
147    }
148
149    fn iter_between(&self, from: usize, to: usize) -> impl Iterator<Item = (usize, char)> {
150        let mut acc = from;
151
152        self.slice_from_byte_offsets(from, to)
153            .chars()
154            .map(move |ch| {
155                let item = (acc, ch);
156                acc += ch.len_utf8();
157                item
158            })
159    }
160
161    fn rev_iter_between(&self, from: usize, to: usize) -> impl Iterator<Item = (usize, char)> {
162        let mut acc = to;
163
164        self.slice_from_byte_offsets(from, to)
165            .rev_chars()
166            .map(move |ch| {
167                acc -= ch.len_utf8();
168                (acc, ch)
169            })
170    }
171}
172
173impl Haystack for Buffer {
174    fn try_make_contiguous(&mut self) {
175        self.txt.make_contiguous();
176    }
177
178    fn is_contiguous(&self) -> bool {
179        self.txt.is_contiguous()
180    }
181
182    fn len(&self) -> usize {
183        self.txt.len()
184    }
185
186    fn substr_from<'a>(&'a self, offset: usize) -> Option<Cow<'a, str>> {
187        if offset > self.txt.len() {
188            None
189        } else {
190            // SAFETY: assumes make_contiguous was called first
191            Some(Cow::Borrowed(unsafe { self.txt.substr_from(offset) }))
192        }
193    }
194
195    fn substr<'a>(&'a self, from: usize, to: usize) -> Cow<'a, str> {
196        self.txt.slice_from_byte_offsets(from, to).into()
197    }
198
199    fn byte_to_char(&self, byte_idx: usize) -> Option<usize> {
200        if byte_idx > self.txt.len() {
201            None
202        } else {
203            Some(self.txt.byte_to_char(byte_idx))
204        }
205    }
206
207    fn char_to_byte(&self, char_idx: usize) -> Option<usize> {
208        if char_idx > self.txt.len_chars() {
209            None
210        } else {
211            Some(self.txt.char_to_byte(char_idx))
212        }
213    }
214
215    fn iter_from(&self, from: usize) -> Option<impl Iterator<Item = (usize, char)>> {
216        if from >= self.len() {
217            None
218        } else {
219            let mut acc = from;
220            Some(
221                self.txt
222                    .slice_from_byte_offsets(from, self.len())
223                    .chars()
224                    .map(move |ch| {
225                        let item = (acc, ch);
226                        acc += ch.len_utf8();
227                        item
228                    }),
229            )
230        }
231    }
232
233    fn iter_between(&self, from: usize, to: usize) -> impl Iterator<Item = (usize, char)> {
234        let mut acc = from;
235
236        self.txt
237            .slice_from_byte_offsets(from, to)
238            .chars()
239            .map(move |ch| {
240                let item = (acc, ch);
241                acc += ch.len_utf8();
242                item
243            })
244    }
245
246    fn rev_iter_between(&self, from: usize, to: usize) -> impl Iterator<Item = (usize, char)> {
247        let mut acc = to;
248
249        self.txt
250            .slice_from_byte_offsets(from, to)
251            .rev_chars()
252            .map(move |ch| {
253                acc -= ch.len_utf8();
254                (acc, ch)
255            })
256    }
257}