duat_core/text/
bytes.rs

1use std::{iter::FusedIterator, ops::RangeBounds, str::from_utf8_unchecked};
2
3use gapbuf::GapBuffer;
4
5use super::{Point, TextRange, records::Records, utf8_char_width};
6
7#[derive(Default, Clone)]
8pub struct Bytes {
9    buf: GapBuffer<u8>,
10    records: Records<[usize; 3]>,
11}
12
13impl Bytes {
14    /// Returns a new instance of a [`Buffer`]
15    pub(crate) fn new(string: &str) -> Self {
16        let buf = GapBuffer::from_iter(string.bytes());
17
18        let len = buf.len();
19        let chars = unsafe {
20            let (s0, s1) = buf.as_slices();
21            std::str::from_utf8_unchecked(s0).chars().count()
22                + std::str::from_utf8_unchecked(s1).chars().count()
23        };
24        let lines = buf.iter().filter(|b| **b == b'\n').count();
25        Self {
26            buf,
27            records: Records::with_max([len, chars, lines]),
28        }
29    }
30
31    ////////// Querying functions
32
33    /// The [`Point`] at the end of the text
34    pub fn len(&self) -> Point {
35        let [b, c, l] = self.records.max();
36        Point::from_raw(b, c, l)
37    }
38
39    /// Whether or not there are any characters in [`Bytes`]
40    ///
41    /// # Note
42    ///
43    /// This does not check for tags, so with a [`Tag::GhostText`],
44    /// there could actually be a "string" of characters on the
45    /// [`Text`], it just wouldn't be considered real "text".
46    ///
47    /// [`Tag::GhostText`]: super::Tag::GhostText
48    /// [`Text`]: super::Text
49    pub fn is_empty(&self) -> bool {
50        self.buf.is_empty()
51    }
52
53    /// The `char` at the [`Point`]'s position
54    pub fn char_at(&self, p: Point) -> Option<char> {
55        if p.byte() >= self.len().byte() {
56            return None;
57        }
58
59        let [s0, s1] = self.strs(..).to_array();
60        if p.byte() < s0.len() {
61            s0[p.byte()..].chars().next()
62        } else {
63            s1[p.byte() - s0.len()..].chars().next()
64        }
65    }
66
67    /// An [`Iterator`] over the bytes in a given [range]
68    ///
69    /// If the range is fully or partially out of bounds, one or both
70    /// of the slices might be empty.
71    ///
72    /// [range]: TextRange
73    pub fn buffers(&self, range: impl TextRange) -> Buffers {
74        let range = range.to_range_fwd(self.buf.len());
75        let (s0, s1) = self.buf.range(range).as_slices();
76        Buffers([s0.iter(), s1.iter()])
77    }
78
79    /// An [`Iterator`] over the [`&str`]s of the [`Text`]
80    ///
81    /// # Note
82    ///
83    /// The reason why this function returns two strings is that the
84    /// contents of the text are stored in a [`GapBuffer`], which
85    /// works with two strings.
86    ///
87    /// If you want to iterate over them, you can do the following:
88    ///
89    /// ```rust
90    /// # use duat_core::text::{Point, Text};
91    /// # let (p0, p1) = (Point::default(), Point::default());
92    /// # let text = Text::new();
93    /// let bytes = text.bytes();
94    /// bytes.strs((p0, p1)).flat_map(str::chars);
95    /// ```
96    ///
97    /// Do note that you should avoid iterators like [`str::lines`],
98    /// as they will separate the line that is partially owned by each
99    /// [`&str`]:
100    ///
101    /// ```rust
102    /// let broken_up_line = [
103    ///     "This is line 1, business as usual.\nThis is line 2, but it",
104    ///     "is broken into two separate strings.\nSo 4 lines would be counted, \
105    ///      instead of 3",
106    /// ];
107    /// ```
108    ///
109    /// # [`TextRange`] behavior:
110    ///
111    /// If you give a single [`usize`]/[`Point`], it will be
112    /// interpreted as a range from.
113    ///
114    /// If you want the two full [`&str`]s, see [`strs`]
115    ///
116    /// [`&str`]: str
117    /// [`Text`]: super::Text
118    /// [range]: TextRange
119    /// [`strs`]: Self::strs
120    pub fn strs(&self, range: impl TextRange) -> Strs {
121        let range = range.to_range_fwd(self.buf.len());
122        Strs(self.strs_in_range_inner(range).into_iter())
123    }
124
125    /// Returns an iterator over the lines in a given range
126    ///
127    /// The lines are inclusive, that is, it will iterate over the
128    /// whole line, not just the parts within the range.
129    ///
130    /// # NOTE
131    ///
132    /// The reason why this requires mutable access is because we may
133    /// need to move the [`GapBuffer`]'s gap in order to make the
134    /// [range] contiguous for proper iteration.
135    ///
136    /// [range]: TextRange
137    pub fn lines(
138        &mut self,
139        range: impl TextRange,
140    ) -> impl DoubleEndedIterator<Item = (usize, &str)> + '_ {
141        let range = range.to_range_at(self.len().byte());
142        let start = self.point_at_line(self.point_at(range.start).line());
143        let end = {
144            let end = self.point_at(range.end);
145            let line_start = self.point_at_line(end.line());
146            match line_start == end {
147                true => end,
148                false => self.point_at_line((end.line() + 1).min(self.len().line())),
149            }
150        };
151        let lines = self.contiguous((start, end)).lines();
152        let (fwd_i, rev_i) = (start.line(), end.line());
153        TextLines { lines, fwd_i, rev_i }
154    }
155
156    /// Returns the two `&str`s in the byte range.
157    fn strs_in_range_inner(&self, range: impl RangeBounds<usize>) -> [&str; 2] {
158        let (s0, s1) = self.buf.as_slices();
159        let (start, end) = crate::get_ends(range, self.buf.len());
160        let (start, end) = (start, end);
161        // Make sure the start and end are in character bounds.
162        assert!(
163            [start, end]
164                .into_iter()
165                .filter_map(|b| self.buf.get(b))
166                .all(|b| utf8_char_width(*b) > 0),
167        );
168
169        unsafe {
170            let r0 = start.min(s0.len())..end.min(s0.len());
171            let r1 = start.saturating_sub(s0.len()).min(s1.len())
172                ..end.saturating_sub(s0.len()).min(s1.len());
173
174            [from_utf8_unchecked(&s0[r0]), from_utf8_unchecked(&s1[r1])]
175        }
176    }
177
178    /// The [`Point`] corresponding to the byte position, 0 indexed
179    ///
180    /// If the byte position would fall in between two characters
181    /// (because the first one comprises more than one byte), the
182    /// first character is chosen as the [`Point`] where the byte is
183    /// located.
184    ///
185    /// # Panics
186    ///
187    /// Will panic if `b` is greater than the length of the text
188    #[inline(always)]
189    pub fn point_at(&self, b: usize) -> Point {
190        assert!(
191            b <= self.len().byte(),
192            "byte out of bounds: the len is {}, but the byte is {b}",
193            self.len().byte()
194        );
195        let [c_b, c_c, mut c_l] = self.records.closest_to(b);
196
197        let found = if b >= c_b {
198            let [s0, s1] = self.strs_in_range_inner(c_b..);
199
200            s0.char_indices()
201                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
202                .enumerate()
203                .map(|(i, (this_b, char))| {
204                    c_l += (char == '\n') as usize;
205                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
206                })
207                .take_while(|&(rhs, ..)| b >= rhs)
208                .last()
209        } else {
210            let mut c_len = 0;
211            self.strs_in_range_inner(..c_b)
212                .into_iter()
213                .flat_map(str::chars)
214                .rev()
215                .enumerate()
216                .map(|(i, char)| {
217                    c_l -= (char == '\n') as usize;
218                    c_len += char.len_utf8();
219                    (c_b - c_len, c_c - (i + 1), c_l)
220                })
221                .take_while(|&(rhs, ..)| b <= rhs)
222                .last()
223        };
224
225        found
226            .map(|(b, c, l)| Point::from_raw(b, c, l))
227            .unwrap_or(self.len())
228    }
229
230    /// The [`Point`] associated with the `c`th char
231    ///
232    /// # Panics
233    ///
234    /// Will panic if `c` is greater than the number of chars in the
235    /// text.
236    #[inline(always)]
237    pub fn point_at_char(&self, c: usize) -> Point {
238        assert!(
239            c <= self.len().char(),
240            "char out of bounds: the len is {}, but the char is {c}",
241            self.len().char()
242        );
243        let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(c, |[_, c, _]| *c);
244
245        let found = if c >= c_c {
246            let [s0, s1] = self.strs_in_range_inner(c_b..);
247
248            s0.char_indices()
249                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
250                .enumerate()
251                .map(|(i, (this_b, char))| {
252                    c_l += (char == '\n') as usize;
253                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
254                })
255                .take_while(|&(_, rhs, _)| c >= rhs)
256                .last()
257        } else {
258            let mut c_len = 0;
259            self.strs_in_range_inner(..)
260                .into_iter()
261                .flat_map(str::chars)
262                .rev()
263                .enumerate()
264                .map(|(i, char)| {
265                    c_l -= (char == '\n') as usize;
266                    c_len += char.len_utf8();
267                    (c_b - c_len, c_c - (i + 1), c_l)
268                })
269                .take_while(|&(_, rhs, _)| c <= rhs)
270                .last()
271        };
272
273        found
274            .map(|(b, c, l)| Point::from_raw(b, c, l))
275            .unwrap_or(self.len())
276    }
277
278    /// The [`Point`] where the `l`th line starts, 0 indexed
279    ///
280    /// If `l == number_of_lines`, returns the last point of the
281    /// text.
282    ///
283    /// # Panics
284    ///
285    /// Will panic if the number `l` is greater than the number of
286    /// lines on the text
287    #[inline(always)]
288    pub fn point_at_line(&self, l: usize) -> Point {
289        assert!(
290            l <= self.len().line(),
291            "line out of bounds: the len is {}, but the line is {l}",
292            self.len().line()
293        );
294        let (c_b, c_c, mut c_l) = {
295            let [mut b, mut c, l] = self.records.closest_to_by_key(l, |[.., l]| *l);
296            self.strs_in_range_inner(..b)
297                .into_iter()
298                .flat_map(str::chars)
299                .rev()
300                .take_while(|c| *c != '\n')
301                .for_each(|char| {
302                    b -= char.len_utf8();
303                    c -= 1;
304                });
305            (b, c, l)
306        };
307
308        let found = if l >= c_l {
309            let [s0, s1] = self.strs_in_range_inner(c_b..);
310
311            s0.char_indices()
312                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
313                .enumerate()
314                .map(|(i, (this_b, char))| {
315                    c_l += (char == '\n') as usize;
316                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
317                })
318                .find(|&(.., rhs)| l == rhs)
319        } else {
320            let mut c_len = 0;
321            self.strs_in_range_inner(..c_b)
322                .into_iter()
323                .flat_map(str::chars)
324                .rev()
325                .enumerate()
326                .map(|(i, char)| {
327                    c_l -= (char == '\n') as usize;
328                    c_len += char.len_utf8();
329                    (c_b - c_len, c_c - (i + 1), c_l)
330                })
331                .take_while(|&(.., rhs)| l <= rhs)
332                .last()
333        };
334
335        found
336            .map(|(b, c, l)| Point::from_raw(b, c, l))
337            .unwrap_or(self.len())
338    }
339
340    /// The start and end [`Point`]s for the `l`th line
341    ///
342    /// If `l == number_of_lines`, these points will be the same.
343    ///
344    /// # Panics
345    ///
346    /// Will panic if the number `l` is greater than the number of
347    /// lines on the text
348    #[inline(always)]
349    pub fn points_of_line(&self, l: usize) -> [Point; 2] {
350        assert!(
351            l <= self.len().line(),
352            "byte out of bounds: the len is {}, but the line is {l}",
353            self.len().line()
354        );
355
356        let start = self.point_at_line(l);
357        let end = self
358            .chars_fwd(start)
359            .find_map(|(p, _)| (p.line() > start.line()).then_some(p))
360            .unwrap_or(start);
361        [start, end]
362    }
363
364    /// The last [`Point`] associated with a `char`
365    ///
366    /// This will give the [`Point`] of the last `char` of the text.
367    /// The difference between this method and [`len`] is that
368    /// it will return a [`Point`] one position earlier than it. If
369    /// the text is completely empty, it will return [`None`].
370    ///
371    /// [`len`]: Self::len
372    pub fn last_point(&self) -> Option<Point> {
373        self.strs(..)
374            .chars()
375            .next_back()
376            .map(|char| self.len().rev(char))
377    }
378
379    /// A forward iterator of the [`char`]s of [`Bytes`]
380    ///
381    /// Each [`char`] will be accompanied by a [`Point`], which is the
382    /// position where said character starts, e.g.
383    /// [`Point::default()`] for the first character
384    pub fn chars_fwd(&self, p: Point) -> impl Iterator<Item = (Point, char)> + '_ {
385        self.strs_in_range_inner(p.byte()..)
386            .into_iter()
387            .flat_map(str::chars)
388            .scan(p, |p, char| {
389                let old_p = *p;
390                *p = p.fwd(char);
391                Some((old_p, char))
392            })
393    }
394
395    /// A reverse iterator of the [`char`]s in [`Bytes`]
396    ///
397    /// Each [`char`] will be accompanied by a [`Point`], which is the
398    /// position where said character starts, e.g.
399    /// [`Point::default()`] for the first character
400    pub fn chars_rev(&self, p: Point) -> impl Iterator<Item = (Point, char)> + '_ {
401        self.strs_in_range_inner(..p.byte())
402            .into_iter()
403            .flat_map(str::chars)
404            .rev()
405            .scan(p, |p, char| {
406                *p = p.rev(char);
407                Some((*p, char))
408            })
409    }
410
411    ////////// Modification functions
412
413    /// Applies a [`Change`] to the [`GapBuffer`] within
414    ///
415    /// [`Change`]: super::Change
416    pub(super) fn apply_change(&mut self, change: super::Change<&str>) {
417        let edit = change.added_text();
418        let start = change.start();
419
420        let new_len = {
421            let lines = edit.bytes().filter(|b| *b == b'\n').count();
422            [edit.len(), edit.chars().count(), lines]
423        };
424
425        let old_len = unsafe {
426            let range = start.byte()..change.taken_end().byte();
427            let str = String::from_utf8_unchecked(
428                self.buf
429                    .splice(range, edit.as_bytes().iter().cloned())
430                    .collect(),
431            );
432
433            let lines = str.bytes().filter(|b| *b == b'\n').count();
434            [str.len(), str.chars().count(), lines]
435        };
436
437        let start_rec = [start.byte(), start.char(), start.line()];
438        self.records.transform(start_rec, old_len, new_len);
439        self.records.insert(start_rec);
440    }
441
442    /// Extends this [`Bytes`] with another
443    pub(super) fn extend(&mut self, other: Self) {
444        self.buf.extend(other.buf);
445        self.records
446            .transform(self.records.max(), [0, 0, 0], other.records.max())
447    }
448
449    /// Adds a record in the given position
450    pub(super) fn add_record(&mut self, [b, c, l]: [usize; 3]) {
451        self.records.insert([b, c, l]);
452    }
453
454    ////////// One str functions
455
456    /// Gets a single [`&str`] from a given [range]
457    ///
458    /// This is the equivalent of calling
459    /// [`Bytes::make_contiguous`] and [`Bytes::get_contiguous`].
460    /// While this takes less space in code, calling the other two
461    /// functions means that you won't be mutably borrowing the
462    /// [`Bytes`] anymore, so if that matters to you, you should do
463    /// that.
464    ///
465    /// [`&str`]: str
466    /// [range]: TextRange
467    pub fn contiguous(&mut self, range: impl TextRange) -> &str {
468        self.make_contiguous(range.clone());
469        self.get_contiguous(range).unwrap()
470    }
471
472    /// Moves the [`GapBuffer`]'s gap, so that the `range` is whole
473    ///
474    /// The return value is the value of the gap, if the second `&str`
475    /// is the contiguous one.
476    pub fn make_contiguous(&mut self, range: impl TextRange) {
477        let range = range.to_range_fwd(self.len().byte());
478        let gap = self.buf.gap();
479
480        if range.end <= gap || range.start >= gap {
481            return;
482        }
483
484        if gap.abs_diff(range.start) < gap.abs_diff(range.end) {
485            self.buf.set_gap(range.start);
486        } else {
487            self.buf.set_gap(range.end);
488        }
489    }
490
491    /// Assumes that the `range` given is contiguous in `self`
492    ///
493    /// You *MUST* call [`make_contiguous`] before using this
494    /// function. The sole purpose of this function is to not keep the
495    /// [`Bytes`] mutably borrowed.
496    ///
497    /// [`make_contiguous`]: Self::make_contiguous
498    pub fn get_contiguous(&self, range: impl TextRange) -> Option<&str> {
499        let range = range.to_range_fwd(self.len().byte());
500        let [s0, s1] = self.strs(..).to_array();
501
502        if range.end <= self.buf.gap() {
503            s0.get(range)
504        } else {
505            let gap = self.buf.gap();
506            s1.get(range.start - gap..range.end - gap)
507        }
508    }
509}
510
511pub struct TextLines<'a> {
512    lines: std::str::Lines<'a>,
513    fwd_i: usize,
514    rev_i: usize,
515}
516
517impl<'a> Iterator for TextLines<'a> {
518    type Item = (usize, &'a str);
519
520    fn next(&mut self) -> Option<Self::Item> {
521        self.lines.next().map(|line| {
522            self.fwd_i += 1;
523            (self.fwd_i - 1, line)
524        })
525    }
526}
527
528impl DoubleEndedIterator for TextLines<'_> {
529    fn next_back(&mut self) -> Option<Self::Item> {
530        self.lines.next_back().map(|line| {
531            self.rev_i -= 1;
532            (self.rev_i, line)
533        })
534    }
535}
536
537/// An [`Iterator`] over the bytes in a [`Text`]
538///
539/// [`Text`]: super::Text
540#[derive(Clone)]
541pub struct Buffers<'a>([std::slice::Iter<'a, u8>; 2]);
542
543impl<'a> Buffers<'a> {
544    /// Converts this [`Iterator`] into an array of its two parts
545    pub fn to_array(&self) -> [&'a [u8]; 2] {
546        self.0.clone().map(|iter| iter.as_slice())
547    }
548}
549
550impl<'a> Iterator for Buffers<'a> {
551    type Item = u8;
552
553    fn next(&mut self) -> Option<Self::Item> {
554        self.0[0].next().or_else(|| self.0[1].next()).copied()
555    }
556
557    fn size_hint(&self) -> (usize, Option<usize>) {
558        let (l0, u0) = self.0[0].size_hint();
559        let (l1, u1) = self.0[1].size_hint();
560        (l0 + l1, Some(u0.unwrap() + u1.unwrap()))
561    }
562}
563
564impl<'a> ExactSizeIterator for Buffers<'a> {}
565
566impl<'a> DoubleEndedIterator for Buffers<'a> {
567    fn next_back(&mut self) -> Option<Self::Item> {
568        self.0[1]
569            .next_back()
570            .or_else(|| self.0[0].next_back())
571            .copied()
572    }
573}
574
575/// An [`Iterator`] over the [`&str`]s in a [`Text`]
576///
577/// [`&str`]: str
578/// [`Text`]: super::Text
579#[derive(Clone)]
580pub struct Strs<'a>(std::array::IntoIter<&'a str, 2>);
581
582impl<'a> Strs<'a> {
583    /// Converts this [`Iterator`] into an array of its two parts
584    pub fn to_array(&self) -> [&'a str; 2] {
585        let strs = self.0.as_slice();
586        [
587            strs.first().copied().unwrap_or(""),
588            strs.last().copied().unwrap_or(""),
589        ]
590    }
591
592    /// Iterates over the [`char`]s of both [`&str`]s
593    ///
594    /// [`&str`]: str
595    pub fn chars(self) -> impl DoubleEndedIterator<Item = char> + 'a {
596        let [s0, s1] = self.to_array();
597        s0.chars().chain(s1.chars())
598    }
599}
600
601impl<'a> Iterator for Strs<'a> {
602    type Item = &'a str;
603
604    fn next(&mut self) -> Option<Self::Item> {
605        self.0.next()
606    }
607
608    fn size_hint(&self) -> (usize, Option<usize>) {
609        self.0.size_hint()
610    }
611}
612
613impl ExactSizeIterator for Strs<'_> {}
614
615impl DoubleEndedIterator for Strs<'_> {
616    fn next_back(&mut self) -> Option<Self::Item> {
617        self.0.next_back()
618    }
619}
620
621impl FusedIterator for Strs<'_> {}
622
623impl std::fmt::Display for Strs<'_> {
624    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
625        let [s0, s1] = self.to_array();
626        write!(f, "{s0}{s1}")
627    }
628}
629
630impl std::fmt::Debug for Bytes {
631    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
632        f.debug_struct("Bytes")
633            .field("buf", &self.strs(..).to_array())
634            .field("records", &self.records)
635            .finish()
636    }
637}
638
639impl PartialEq for Bytes {
640    fn eq(&self, other: &Self) -> bool {
641        self.buf == other.buf
642    }
643}
644
645impl PartialEq<&str> for Bytes {
646    fn eq(&self, other: &&str) -> bool {
647        let [s0, s1] = self.strs(..).to_array();
648        other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
649    }
650}
651
652impl PartialEq<String> for Bytes {
653    fn eq(&self, other: &String) -> bool {
654        let [s0, s1] = self.strs(..).to_array();
655        other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
656    }
657}