ad_editor/regex/
stream.rs

1use crate::{Edit, buffer::GapBuffer, dot::Dot, exec::Address, regex::Haystack};
2use std::{
3    borrow::Cow,
4    cell::RefCell,
5    io::{BufRead, BufReader, Read},
6};
7
8/// Initial length of the line buffer for when we read from the reader
9const LINE_BUF_LEN: usize = 100;
10
11/// A wrapper around a [Read] that buffers and caches the data read in order to support searching
12/// with a regex.
13#[derive(Debug)]
14pub struct CachingStream<R>
15where
16    R: Read,
17{
18    inner: RefCell<Inner<R>>,
19}
20
21impl<R> CachingStream<R>
22where
23    R: Read,
24{
25    pub fn new(r: R) -> Self {
26        Self {
27            inner: RefCell::new(Inner {
28                reader: BufReader::new(r),
29                buf: String::with_capacity(LINE_BUF_LEN),
30                gb: GapBuffer::from(""),
31                closed: false,
32                cleared_lines: 0,
33                cleared_bytes: 0,
34            }),
35        }
36    }
37
38    /// Clear the inner gap buffer state up until the end of the line before the given offset,
39    /// tracking the historic line and byte counts so we can correctly adjust future read offsets.
40    ///
41    /// Data before offset will no longer be accessible.
42    pub fn clear_until(&self, offset: usize) {
43        self.inner.borrow_mut().clear_until(offset);
44    }
45
46    fn is_closed(&self) -> bool {
47        self.inner.borrow().closed
48    }
49
50    fn get_char_at(&self, byte_idx: usize) -> Option<char> {
51        let inner = self.inner.borrow();
52        inner.gb.get_char_at(byte_idx - inner.cleared_bytes)
53    }
54
55    fn try_read_next_line(&self) {
56        self.inner.borrow_mut().try_read_next_line();
57    }
58}
59
60impl<R> Haystack for CachingStream<R>
61where
62    R: Read,
63{
64    fn try_make_contiguous(&mut self) {}
65
66    fn is_contiguous(&self) -> bool {
67        false
68    }
69
70    fn len(&self) -> usize {
71        usize::MAX
72    }
73
74    fn substr_from<'a>(&'a self, byte_offset: usize) -> Option<Cow<'a, str>> {
75        let inner = self.inner.borrow();
76        let s = Haystack::substr_from(&inner.gb, byte_offset)?.into_owned();
77
78        Some(Cow::Owned(s))
79    }
80
81    fn substr<'a>(&'a self, byte_from: usize, byte_to: usize) -> Cow<'a, str> {
82        let inner = self.inner.borrow();
83        let s = inner.gb.substr(byte_from, byte_to).into_owned();
84
85        Cow::Owned(s)
86    }
87
88    fn byte_to_char(&self, byte_idx: usize) -> Option<usize> {
89        Haystack::byte_to_char(&self.inner.borrow().gb, byte_idx)
90    }
91
92    fn char_to_byte(&self, char_idx: usize) -> Option<usize> {
93        Haystack::char_to_byte(&self.inner.borrow().gb, char_idx)
94    }
95
96    fn iter_from(&self, from: usize) -> Option<impl Iterator<Item = (usize, char)>> {
97        if self.inner.borrow().closed {
98            None
99        } else {
100            Some(CachingStreamIter {
101                inner: self,
102                from,
103                to: usize::MAX,
104            })
105        }
106    }
107
108    fn iter_between(&self, from: usize, to: usize) -> impl Iterator<Item = (usize, char)> {
109        CachingStreamIter {
110            inner: self,
111            from,
112            to,
113        }
114    }
115
116    fn rev_iter_between(&self, _from: usize, _to: usize) -> impl Iterator<Item = (usize, char)> {
117        std::iter::empty()
118    }
119}
120
121impl<R> Edit for CachingStream<R>
122where
123    R: Read,
124{
125    fn insert(&mut self, ix: usize, s: &str) {
126        self.inner.borrow_mut().gb.insert(ix, s);
127    }
128
129    fn remove(&mut self, from: usize, to: usize) {
130        self.inner.borrow_mut().gb.remove_range(from, to);
131    }
132}
133
134impl<R> Address for CachingStream<R>
135where
136    R: Read,
137{
138    fn current_dot(&self) -> Dot {
139        Dot::from_char_indices(0, usize::MAX)
140    }
141
142    fn len_bytes(&self) -> usize {
143        if self.is_closed() {
144            self.inner.borrow().gb.len()
145        } else {
146            usize::MAX
147        }
148    }
149
150    fn len_chars(&self) -> usize {
151        if self.is_closed() {
152            self.inner.borrow().gb.len_chars()
153        } else {
154            usize::MAX
155        }
156    }
157
158    fn max_iter(&self) -> usize {
159        if self.is_closed() {
160            self.inner.borrow().gb.len_chars()
161        } else {
162            usize::MAX
163        }
164    }
165
166    fn line_to_char(&self, line_idx: usize) -> Option<usize> {
167        let cur_len = self.inner.borrow().gb.len_lines();
168
169        if line_idx > cur_len {
170            for _ in cur_len..=line_idx {
171                self.try_read_next_line();
172                if self.is_closed() {
173                    break;
174                }
175            }
176        }
177
178        self.inner.borrow().gb.try_line_to_char(line_idx)
179    }
180
181    fn char_to_line(&self, char_idx: usize) -> Option<usize> {
182        self.inner.borrow().gb.try_char_to_line(char_idx)
183    }
184
185    fn char_to_line_end(&self, char_idx: usize) -> Option<usize> {
186        let gb = &self.inner.borrow().gb;
187        let line_idx = gb.try_char_to_line(char_idx)?;
188        match gb.try_line_to_char(line_idx + 1) {
189            None => Some(gb.len_chars() - 1),
190            Some(idx) => Some(idx),
191        }
192    }
193
194    fn char_to_line_start(&self, char_idx: usize) -> Option<usize> {
195        let gb = &self.inner.borrow().gb;
196        let line_idx = gb.try_char_to_line(char_idx)?;
197        Some(gb.line_to_char(line_idx))
198    }
199}
200
201#[derive(Debug)]
202struct Inner<R>
203where
204    R: Read,
205{
206    reader: BufReader<R>,
207    buf: String,
208    gb: GapBuffer,
209    closed: bool,
210    cleared_lines: usize,
211    cleared_bytes: usize,
212}
213
214impl<R> Inner<R>
215where
216    R: Read,
217{
218    fn try_read_next_line(&mut self) {
219        self.buf.clear();
220
221        match self.reader.read_line(&mut self.buf) {
222            Ok(n) => {
223                let len = self.gb.len_chars();
224                self.gb.insert_str(len, &self.buf);
225                self.closed = n == 0;
226            }
227            Err(_) => self.closed = true,
228        };
229    }
230
231    fn clear_until(&mut self, logical_offset: usize) {
232        let offset = logical_offset - self.cleared_bytes;
233        let cleared_lines = self.gb.lines_before_byte_offset(offset);
234        let char_to = self.gb.byte_to_char(offset);
235        self.gb.remove_range(0, char_to);
236
237        self.cleared_bytes = logical_offset;
238        self.cleared_lines += cleared_lines;
239    }
240}
241
242#[derive(Debug)]
243pub struct StreamSlice<'a, R>
244where
245    R: Read,
246{
247    inner: &'a RefCell<Inner<R>>,
248    from: usize,
249    to: usize,
250}
251
252impl<'a, R> StreamSlice<'a, R>
253where
254    R: Read,
255{
256    pub fn len(&self) -> usize {
257        self.to - self.from
258    }
259
260    pub fn is_empty(&self) -> bool {
261        self.len() == 0
262    }
263}
264
265impl<'a, R> From<StreamSlice<'a, R>> for Cow<'a, str>
266where
267    R: Read,
268{
269    fn from(s: StreamSlice<'a, R>) -> Self {
270        let inner = s.inner.borrow();
271        let to = if s.to == usize::MAX {
272            inner.gb.len()
273        } else {
274            s.to - inner.cleared_bytes
275        };
276
277        let slice = inner
278            .gb
279            .slice_from_byte_offsets(s.from - inner.cleared_bytes, to);
280
281        Cow::Owned(slice.to_string())
282    }
283}
284
285#[derive(Debug)]
286pub struct CachingStreamIter<'a, R>
287where
288    R: Read,
289{
290    pub(super) inner: &'a CachingStream<R>,
291    pub(super) from: usize,
292    pub(super) to: usize,
293}
294
295impl<R> Iterator for CachingStreamIter<'_, R>
296where
297    R: Read,
298{
299    type Item = (usize, char);
300
301    fn next(&mut self) -> Option<Self::Item> {
302        // self.from == self.to - 1 is the last character so
303        // we catch end of iteration on the subsequent call
304        if self.from >= self.to {
305            return None;
306        }
307
308        loop {
309            match self.inner.get_char_at(self.from) {
310                Some(ch) => {
311                    let res = (self.from, ch);
312                    self.from += ch.len_utf8();
313                    return Some(res);
314                }
315                None if self.inner.is_closed() => return None,
316                None => self.inner.try_read_next_line(),
317            }
318        }
319    }
320}
321
322mod impl_structex {
323    use super::*;
324    use crate::regex::Regex;
325    use std::{
326        io::{self, Read},
327        ops::Range,
328    };
329    use structex::re::{Haystack, RawCaptures, Sliceable, Writable};
330
331    impl<R> Haystack<Regex> for CachingStream<R>
332    where
333        R: Read,
334    {
335        fn is_match_between(&self, re: &Regex, from: usize, to: usize) -> bool {
336            re.matches_between(self, from, to)
337        }
338
339        fn captures_between(&self, re: &Regex, from: usize, to: usize) -> Option<RawCaptures> {
340            let m = re.find_between(self, from, to)?;
341
342            Some(RawCaptures::new(m.iter_locs()))
343        }
344    }
345
346    impl<R> Sliceable for CachingStream<R>
347    where
348        R: Read,
349    {
350        type Slice<'h>
351            = StreamSlice<'h, R>
352        where
353            Self: 'h;
354
355        fn char_at(&self, byte_offset: usize) -> Option<char> {
356            self.get_char_at(byte_offset)
357        }
358
359        fn slice(&self, range: Range<usize>) -> Self::Slice<'_> {
360            StreamSlice {
361                inner: &self.inner,
362                from: range.start,
363                to: range.end,
364            }
365        }
366
367        fn max_len(&self) -> usize {
368            usize::MAX
369        }
370    }
371
372    impl<R> Writable for CachingStream<R>
373    where
374        R: Read,
375    {
376        fn write_to<W>(&self, w: &mut W) -> io::Result<usize>
377        where
378            W: std::io::Write,
379        {
380            let inner = self.inner.borrow();
381            let (l, r) = inner.gb.as_byte_slices();
382            w.write_all(l)?;
383            w.write_all(r)?;
384
385            Ok(l.len() + r.len())
386        }
387    }
388
389    impl<'h, R> Writable for StreamSlice<'h, R>
390    where
391        R: Read,
392    {
393        fn write_to<W>(&self, w: &mut W) -> io::Result<usize>
394        where
395            W: std::io::Write,
396        {
397            let inner = self.inner.borrow();
398            let to = if self.to == usize::MAX {
399                inner.gb.len()
400            } else {
401                self.to - inner.cleared_bytes
402            };
403
404            let s = inner
405                .gb
406                .slice_from_byte_offsets(self.from - inner.cleared_bytes, to);
407            let (l, r) = s.as_slices();
408
409            w.write_all(l)?;
410            w.write_all(r)?;
411
412            Ok(l.len() + r.len())
413        }
414    }
415}