duat_core/text/
bytes.rs

1use std::{iter::FusedIterator, ops::RangeBounds, str::Utf8Error};
2
3use gapbuf::GapBuffer;
4use lender::{DoubleEndedLender, ExactSizeLender, Lender, Lending};
5
6use super::{Point, TextRange, records::Records};
7use crate::cfg::PrintCfg;
8
9/// The bytes of a [`Text`], encoded in UTF-8
10///
11/// [`Text`]: super::Text
12#[derive(Default, Clone)]
13pub struct Bytes {
14    buf: GapBuffer<u8>,
15    records: Records,
16}
17
18impl Bytes {
19    /// Returns a new instance of a [`Buffer`]
20    pub(crate) fn new(string: &str) -> Self {
21        let buf = GapBuffer::from_iter(string.bytes());
22
23        let len = buf.len();
24        let chars = string.chars().count();
25        let lines = buf.iter().filter(|b| **b == b'\n').count();
26        Self {
27            buf,
28            records: Records::new([len, chars, lines]),
29        }
30    }
31
32    ////////// Querying functions
33
34    /// The [`Point`] at the end of the text
35    pub fn len(&self) -> Point {
36        let [b, c, l] = self.records.max();
37        Point::from_raw(b, c, l)
38    }
39
40    /// Whether or not there are any characters in [`Bytes`], besides
41    /// the final `b'\n'`
42    ///
43    /// # Note
44    ///
45    /// This does not check for tags, so with a [`Tag::Ghost`],
46    /// there could actually be a "string" of characters on the
47    /// [`Text`], it just wouldn't be considered real "text". If you
48    /// want to check for the `InnerTags`'s possible emptyness as
49    /// well, see [`Text::is_empty_empty`].
50    ///
51    /// [`Tag::Ghost`]: super::Ghost
52    /// [`Text`]: super::Text
53    /// [`Text::is_empty_empty`]: super::Text::is_empty_empty
54    pub fn is_empty(&self) -> bool {
55        let (s0, s1) = self.buf.as_slices();
56        (s0 == b"\n" && s1 == b"") || (s0 == b"" && s1 == b"\n")
57    }
58
59    /// The `char` at the [`Point`]'s position
60    pub fn char_at(&self, p: Point) -> Option<char> {
61        if p.byte() >= self.len().byte() {
62            return None;
63        }
64
65        let [s0, s1] = self.strs_inner(..).unwrap();
66        Some(if p.byte() < s0.len() {
67            s0[p.byte()..].chars().next().unwrap()
68        } else {
69            s1[p.byte() - s0.len()..]
70                .chars()
71                .next()
72                .unwrap_or_else(|| panic!("{self:#?}"))
73        })
74    }
75
76    /// An [`Iterator`] over the bytes in a given _byte_ range
77    ///
78    /// Unlike [`strs`], this function works with _byte_ ranges, not
79    /// [`TextRange`]s. That's because [`Strs`] is supposed to return
80    /// valid UTF-8 strings, which need to have valid character
81    /// terminations, so they should be indexed by a character range,
82    /// not a byte range.
83    ///
84    /// Since buffers is based on `[u8]`s, not `str`s, it doesn't have
85    /// the same restrictions, so a byte range can be used instead.
86    ///
87    /// If the range is fully or partially out of bounds, one or both
88    /// of the slices might be empty.
89    ///
90    /// [`strs`]: Self::strs
91    pub fn buffers(&self, range: impl RangeBounds<usize>) -> Buffers<'_> {
92        let (s0, s1) = self.buf.range(range).as_slices();
93        Buffers([s0.iter(), s1.iter()])
94    }
95
96    /// An [`Iterator`] over the [`&str`]s of the [`Text`]
97    ///
98    /// # Note
99    ///
100    /// The reason why this function returns two strings is that the
101    /// contents of the text are stored in a [`GapBuffer`], which
102    /// works with two strings.
103    ///
104    /// If you want to iterate over them, you can do the following:
105    ///
106    /// ```rust
107    /// # use duat_core::{text::Point, prelude::*};
108    /// # let (p0, p1) = (Point::default(), Point::default());
109    /// # let text = Text::new();
110    /// let bytes = text.bytes();
111    /// let chars = bytes.strs(p0..p1).unwrap().chars();
112    ///
113    /// for char in chars {
114    ///     todo!();
115    /// }
116    /// ```
117    ///
118    /// Do note that you should avoid iterators like [`str::lines`],
119    /// as they will separate the line that is partially owned by each
120    /// [`&str`]:
121    ///
122    /// ```rust
123    /// let broken_up_line = [
124    ///     "This is line 1, business as usual.\nThis is line 2, but it",
125    ///     "is broken into two separate strings.\nSo 4 lines would be counted, instead of 3",
126    /// ];
127    /// ```
128    ///
129    /// This is one way that the inner [`GapBuffer`] could be set up,
130    /// where one of the lines is split among the two slices.
131    ///
132    /// If you wish to iterate over the lines, see [`Bytes::lines`].
133    ///
134    /// [`&str`]: str
135    /// [`Text`]: super::Text
136    /// [range]: TextRange
137    /// [`strs`]: Self::strs
138    pub fn strs(&self, range: impl TextRange) -> Option<Strs<'_>> {
139        let range = range.to_range(self.len().byte());
140        Some(Strs {
141            arr: self.strs_inner(range)?,
142            fwd: 0,
143            rev: 2,
144        })
145    }
146
147    /// Returns an iterator over the lines in a given range
148    ///
149    /// The lines are inclusive, that is, it will iterate over the
150    /// whole line, not just the parts within the range.
151    ///
152    /// [range]: TextRange
153    pub fn lines(&self, range: impl TextRange) -> Lines<'_> {
154        let range = range.to_range(self.len().byte());
155        let start = self.point_at_line(self.point_at_byte(range.start).line());
156        let end = {
157            let end = self.point_at_byte(range.end);
158            let line_start = self.point_at_line(end.line());
159            match line_start == end {
160                true => end,
161                false => self.point_at_line((end.line() + 1).min(self.len().line())),
162            }
163        };
164
165        // If the gap is outside of the range, we can just iterate through it
166        // regularly
167        let (fwd_i, rev_i) = (start.line(), end.line());
168        if let Some(str) = self.get_contiguous(start..end) {
169            let lines = [str.lines(), "".lines()];
170            Lines::new(lines, None, fwd_i, rev_i)
171        // If the gap is within the range, but on a line split, we
172        // can just iterate through two sets of lines.
173        } else if end.byte() > start.byte()
174            && self.buf[self.buf.gap() - 1] != b'\n'
175            && self.buf[self.buf.gap()] != b'\n'
176        {
177            let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
178            let lines = [s0.lines(), s1.lines()];
179            Lines::new(lines, None, fwd_i, rev_i)
180            // Otherwise, the line that was split will need to be
181            // allocated and returned separately.
182        } else {
183            let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
184
185            let (before, split0) = match s0.rsplit_once('\n') {
186                Some((before, split)) => (before, split),
187                None => ("", s0),
188            };
189            let (after, split1) = match s1.split_once('\n') {
190                Some((after, split)) => (after, split),
191                None => ("", s1),
192            };
193
194            let lines = [before.lines(), after.lines()];
195            let split_line = Some(split0.to_string() + split1);
196            Lines::new(lines, split_line, fwd_i, rev_i)
197        }
198    }
199
200    /// Returns the two `&str`s in the byte range.
201    fn strs_inner(&self, range: impl RangeBounds<usize>) -> Option<[&str; 2]> {
202        let (start, end) = crate::get_ends(range, self.len().byte());
203        use std::str::from_utf8_unchecked;
204
205        let (s0, s1) = self.buf.as_slices();
206
207        // Check if the slices match utf8 boundaries.
208        if s0.first().is_some_and(|b| utf8_char_width(*b) == 0)
209            || s1.first().is_some_and(|b| utf8_char_width(*b) == 0)
210            || self.buf.get(end).is_some_and(|b| utf8_char_width(*b) == 0)
211        {
212            return None;
213        }
214
215        Some(unsafe {
216            let r0 = start.min(s0.len())..end.min(s0.len());
217            let r1 = start.saturating_sub(s0.len()).min(s1.len())
218                ..end.saturating_sub(s0.len()).min(s1.len());
219
220            [from_utf8_unchecked(&s0[r0]), from_utf8_unchecked(&s1[r1])]
221        })
222    }
223
224    /// The [`Point`] corresponding to the byte position, 0 indexed
225    ///
226    /// If the byte position would fall in between two characters
227    /// (because the first one comprises more than one byte), the
228    /// first character is chosen as the [`Point`] where the byte is
229    /// located.
230    ///
231    /// # Panics
232    ///
233    /// Will panic if `b` is greater than the length of the text
234    #[inline(always)]
235    pub fn point_at_byte(&self, b: usize) -> Point {
236        assert!(
237            b <= self.len().byte(),
238            "byte out of bounds: the len is {}, but the byte is {b}",
239            self.len().byte()
240        );
241
242        let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(b, |[b, ..]| b);
243
244        let found = if b >= c_b {
245            let [s0, s1] = self.strs_inner(c_b..).unwrap();
246
247            s0.char_indices()
248                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
249                .enumerate()
250                .map(|(i, (this_b, char))| {
251                    c_l += (char == '\n') as usize;
252                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
253                })
254                .take_while(|&(rhs, ..)| b >= rhs)
255                .last()
256        } else {
257            let mut c_len = 0;
258            self.strs_inner(..c_b)
259                .unwrap()
260                .into_iter()
261                .flat_map(str::chars)
262                .rev()
263                .enumerate()
264                .map(|(i, char)| {
265                    c_l -= (char == '\n') as usize;
266                    c_len += char.len_utf8();
267                    (c_b - c_len, c_c - (i + 1), c_l)
268                })
269                .take_while(|&(rhs, ..)| b <= rhs)
270                .last()
271        };
272
273        found
274            .map(|(b, c, l)| Point::from_raw(b, c, l))
275            .unwrap_or(self.len())
276    }
277
278    /// The [`Point`] associated with the `c`th char
279    ///
280    /// # Panics
281    ///
282    /// Will panic if `c` is greater than the number of chars in the
283    /// text.
284    #[inline(always)]
285    pub fn point_at_char(&self, c: usize) -> Point {
286        assert!(
287            c <= self.len().char(),
288            "char out of bounds: the len is {}, but the char is {c}",
289            self.len().char()
290        );
291
292        let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(c, |[_, c, _]| c);
293
294        let found = if c >= c_c {
295            let [s0, s1] = self.strs_inner(c_b..).unwrap();
296
297            s0.char_indices()
298                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
299                .enumerate()
300                .map(|(i, (this_b, char))| {
301                    c_l += (char == '\n') as usize;
302                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
303                })
304                .take_while(|&(_, rhs, _)| c >= rhs)
305                .last()
306        } else {
307            let mut c_len = 0;
308            self.strs_inner(..c_b)
309                .unwrap()
310                .into_iter()
311                .flat_map(str::chars)
312                .rev()
313                .enumerate()
314                .map(|(i, char)| {
315                    c_l -= (char == '\n') as usize;
316                    c_len += char.len_utf8();
317                    (c_b - c_len, c_c - (i + 1), c_l)
318                })
319                .take_while(|&(_, rhs, _)| c <= rhs)
320                .last()
321        };
322
323        found
324            .map(|(b, c, l)| Point::from_raw(b, c, l))
325            .unwrap_or(self.len())
326    }
327
328    /// The [`Point`] where the `l`th line starts, 0 indexed
329    ///
330    /// If `l == number_of_lines`, returns the last point of the
331    /// text.
332    ///
333    /// # Panics
334    ///
335    /// Will panic if the number `l` is greater than the number of
336    /// lines on the text
337    #[inline(always)]
338    pub fn point_at_line(&self, l: usize) -> Point {
339        assert!(
340            l <= self.len().line(),
341            "line out of bounds: the len is {}, but the line is {l}",
342            self.len().line()
343        );
344
345        let (c_b, c_c, mut c_l) = {
346            let [mut b, mut c, l] = self.records.closest_to_by_key(l, |[.., l]| l);
347            self.strs_inner(..b)
348                .unwrap()
349                .into_iter()
350                .flat_map(str::chars)
351                .rev()
352                .take_while(|c| *c != '\n')
353                .for_each(|char| {
354                    b -= char.len_utf8();
355                    c -= 1;
356                });
357            (b, c, l)
358        };
359
360        let found = if l >= c_l {
361            let [s0, s1] = self.strs_inner(c_b..).unwrap();
362
363            s0.char_indices()
364                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
365                .enumerate()
366                .map(|(i, (this_b, char))| {
367                    c_l += (char == '\n') as usize;
368                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
369                })
370                .find(|&(.., rhs)| l == rhs)
371        } else {
372            let mut c_len = 0;
373            self.strs_inner(..c_b)
374                .unwrap()
375                .into_iter()
376                .flat_map(str::chars)
377                .rev()
378                .enumerate()
379                .map(|(i, char)| {
380                    c_l -= (char == '\n') as usize;
381                    c_len += char.len_utf8();
382                    (c_b - c_len, c_c - (i + 1), c_l)
383                })
384                .take_while(|&(.., rhs)| l <= rhs)
385                .last()
386        };
387
388        found
389            .map(|(b, c, l)| Point::from_raw(b, c, l))
390            .unwrap_or(self.len())
391    }
392
393    /// The start and end [`Point`]s for the `l`th line
394    ///
395    /// If `l == number_of_lines`, these points will be the same.
396    ///
397    /// The second number _includes_ the `\n` at the end of the line.
398    ///
399    /// # Panics
400    ///
401    /// Will panic if the number `l` is greater than the number of
402    /// lines on the text
403    #[inline(always)]
404    pub fn points_of_line(&self, l: usize) -> [Point; 2] {
405        assert!(
406            l <= self.len().line(),
407            "byte out of bounds: the len is {}, but the line is {l}",
408            self.len().line()
409        );
410
411        let start = self.point_at_line(l);
412        let end = self
413            .chars_fwd(start..)
414            .unwrap()
415            .find_map(|(p, _)| (p.line() > start.line()).then_some(p))
416            .unwrap_or(start);
417        [start, end]
418    }
419
420    /// The last [`Point`] associated with a `char`
421    ///
422    /// This will give the [`Point`] of the last `char` of the text.
423    /// The difference between this method and [`len`] is that
424    /// it will return a [`Point`] one position earlier than it. If
425    /// the text is completely empty, it will return [`None`].
426    ///
427    /// [`len`]: Self::len
428    pub fn last_point(&self) -> Point {
429        let strs = self.strs_inner(..).unwrap();
430        let char = strs.into_iter().flat_map(str::chars).next_back().unwrap();
431        self.len().rev(char)
432    }
433
434    /// A forward iterator of the [`char`]s of [`Bytes`]
435    ///
436    /// Each [`char`] will be accompanied by a [`Point`], which is the
437    /// position where said character starts, e.g.
438    /// [`Point::default()`] for the first character
439    pub fn chars_fwd(
440        &self,
441        range: impl TextRange,
442    ) -> Option<impl Iterator<Item = (Point, char)> + '_> {
443        let range = range.to_range(self.len().byte());
444        let p = self.point_at_byte(range.start);
445        Some(self.strs(range)?.chars().scan(p, |p, char| {
446            let old_p = *p;
447            *p = p.fwd(char);
448            Some((old_p, char))
449        }))
450    }
451
452    /// A reverse iterator of the [`char`]s in [`Bytes`]
453    ///
454    /// Each [`char`] will be accompanied by a [`Point`], which is the
455    /// position where said character starts, e.g.
456    /// [`Point::default()`] for the first character
457    pub fn chars_rev(
458        &self,
459        range: impl TextRange,
460    ) -> Option<impl Iterator<Item = (Point, char)> + '_> {
461        let range = range.to_range(self.len().byte());
462        let p = self.point_at_byte(range.end);
463        Some(self.strs(range)?.chars().rev().scan(p, |p, char| {
464            *p = p.rev(char);
465            Some((*p, char))
466        }))
467    }
468
469    /// Gets the indentation level on the current line
470    pub fn indent(&self, p: Point, cfg: PrintCfg) -> usize {
471        let [start, _] = self.points_of_line(p.line());
472        self.chars_fwd(start..)
473            .unwrap()
474            .map_while(|(_, c)| match c {
475                ' ' => Some(1),
476                '\t' => Some(cfg.tab_stops.size() as usize),
477                _ => None,
478            })
479            .sum()
480    }
481
482    ////////// Modification functions
483
484    /// Applies a [`Change`] to the [`GapBuffer`] within
485    ///
486    /// [`Change`]: super::Change
487    pub(crate) fn apply_change(&mut self, change: super::Change<&str>) {
488        let edit = change.added_str();
489        let start = change.start();
490
491        let range = start.byte()..change.taken_end().byte();
492        self.buf.splice(range, edit.bytes());
493
494        let start_rec = [start.byte(), start.char(), start.line()];
495        let old_len = [
496            change.taken_end().byte() - start.byte(),
497            change.taken_end().char() - start.char(),
498            change.taken_end().line() - start.line(),
499        ];
500        let new_len = [
501            change.added_end().byte() - start.byte(),
502            change.added_end().char() - start.char(),
503            change.added_end().line() - start.line(),
504        ];
505
506        self.records.transform(start_rec, old_len, new_len);
507        self.records.insert(start_rec);
508    }
509
510    /// Extends this [`Bytes`] with another
511    pub(super) fn extend(&mut self, other: Self) {
512        self.buf.extend(other.buf);
513        self.records
514            .transform(self.records.max(), [0, 0, 0], other.records.max())
515    }
516
517    /// Adds a record in the given position
518    pub(super) fn add_record(&mut self, [b, c, l]: [usize; 3]) {
519        self.records.insert([b, c, l]);
520    }
521
522    ////////// One str functions
523
524    /// Tries to get a contiguous [`&str`] from the [`Bytes`]
525    ///
526    /// Returns [`None`] if the gap of the inner buffer was within the
527    /// given range *OR*.
528    ///
529    /// [`&str`]: str
530    pub fn get_contiguous(&self, range: impl TextRange) -> Option<&str> {
531        let range = range.to_range(self.len().byte());
532        let [s0, s1] = self.strs_inner(..).unwrap();
533
534        if range.end <= self.buf.gap() {
535            s0.get(range)
536        } else {
537            let gap = self.buf.gap();
538            s1.get(range.start.checked_sub(gap)?..range.end.checked_sub(gap)?)
539        }
540    }
541}
542
543/// A [`Lender`] over the lines on [`Bytes`]
544///
545/// The reason for this being a [`Lender`], rather than a regular
546/// [`Iterator`] is because the [`Bytes`] use a [`GapBuffer`] within,
547/// which means that any line may be split in two. In order to still
548/// return it as an `&str`, a new [`String`] needs to be allocated,
549/// which will be owned by the [`Lines`], hence the [`Lender`] trait.
550pub struct Lines<'a> {
551    lines: [std::str::Lines<'a>; 2],
552    split_line: Option<String>,
553    fwd_i: usize,
554    rev_i: usize,
555    split_line_used: bool,
556}
557
558impl<'a> Lines<'a> {
559    fn new(
560        lines: [std::str::Lines<'a>; 2],
561        split_line: Option<String>,
562        fwd_i: usize,
563        rev_i: usize,
564    ) -> Self {
565        Self {
566            lines,
567            split_line,
568            fwd_i,
569            rev_i,
570            split_line_used: false,
571        }
572    }
573}
574
575impl<'a, 'text> Lending<'a> for Lines<'text> {
576    type Lend = (usize, &'a str);
577}
578
579impl<'a> Lender for Lines<'a> {
580    fn next(&mut self) -> Option<lender::Lend<'_, Self>> {
581        self.lines[0]
582            .next()
583            .or_else(|| {
584                if self.split_line_used {
585                    None
586                } else {
587                    self.split_line_used = true;
588                    self.split_line.as_deref()
589                }
590            })
591            .or_else(|| self.lines[1].next())
592            .map(|line| {
593                self.fwd_i += 1;
594                (self.fwd_i - 1, line)
595            })
596    }
597
598    fn size_hint(&self) -> (usize, Option<usize>) {
599        (self.rev_i - self.fwd_i, Some(self.rev_i - self.fwd_i))
600    }
601}
602
603impl<'a> DoubleEndedLender for Lines<'a> {
604    fn next_back(&mut self) -> Option<lender::Lend<'_, Self>> {
605        self.lines[1]
606            .next_back()
607            .or_else(|| {
608                if self.split_line_used {
609                    None
610                } else {
611                    self.split_line_used = true;
612                    self.split_line.as_deref()
613                }
614            })
615            .or_else(|| self.lines[0].next_back())
616            .map(|line| {
617                self.rev_i -= 1;
618                (self.rev_i, line)
619            })
620    }
621}
622
623impl<'a> ExactSizeLender for Lines<'a> {}
624
625/// An [`Iterator`] over the bytes in a [`Text`]
626///
627/// [`Text`]: super::Text
628#[derive(Clone)]
629pub struct Buffers<'a>([std::slice::Iter<'a, u8>; 2]);
630
631impl<'a> Buffers<'a> {
632    /// Converts this [`Iterator`] into an array of its two parts
633    pub fn to_array(&self) -> [&'a [u8]; 2] {
634        self.0.clone().map(|iter| iter.as_slice())
635    }
636
637    /// Tries to create a [`String`] out of the two buffers
638    ///
639    /// # Errors
640    ///
641    /// This function will return an error if the bounds of the slices
642    /// don't correspond to utf8 character boundaries, or if the gap
643    /// within these slices doesn't correspond to a utf8 character
644    /// boundary.
645    pub fn try_to_string(self) -> Result<String, Utf8Error> {
646        let [s0, s1] = self.0.map(|arr| arr.as_slice());
647        Ok([str::from_utf8(s0)?, str::from_utf8(s1)?].join(""))
648    }
649
650    /// Treats the inner slices as `&str`s and iterates over their
651    /// characters
652    ///
653    /// You will want to use this function iff you don't want to check
654    /// for character boundaries at the edges (very rarely). Otherwise
655    /// [`bytes.strs({byte_range}).chars()`] instead.
656    ///
657    /// # Safety
658    ///
659    /// You must ensure that the [`Buffers`] were acquired from valid
660    /// byte ranges which coincide with character terminations. If you
661    /// are unsure of that, you should use [`Strs::chars`] instead.
662    ///
663    /// [`bytes.strs({byte_range}).chars()`]: Bytes::strs
664    pub unsafe fn chars_unchecked(self) -> impl Iterator<Item = char> {
665        self.0
666            .into_iter()
667            .flat_map(|iter| unsafe { str::from_utf8_unchecked(iter.as_slice()) }.chars())
668    }
669}
670
671impl<'a> Iterator for Buffers<'a> {
672    type Item = u8;
673
674    fn next(&mut self) -> Option<Self::Item> {
675        self.0[0].next().or_else(|| self.0[1].next()).copied()
676    }
677
678    fn size_hint(&self) -> (usize, Option<usize>) {
679        let (l0, u0) = self.0[0].size_hint();
680        let (l1, u1) = self.0[1].size_hint();
681        (l0 + l1, Some(u0.unwrap() + u1.unwrap()))
682    }
683}
684
685impl<'a> ExactSizeIterator for Buffers<'a> {}
686
687impl<'a> DoubleEndedIterator for Buffers<'a> {
688    fn next_back(&mut self) -> Option<Self::Item> {
689        self.0[1]
690            .next_back()
691            .or_else(|| self.0[0].next_back())
692            .copied()
693    }
694}
695
696/// An [`Iterator`] over the [`&str`]s in a [`Text`]
697///
698/// [`&str`]: str
699/// [`Text`]: super::Text
700#[derive(Clone)]
701pub struct Strs<'a> {
702    arr: [&'a str; 2],
703    fwd: usize,
704    rev: usize,
705}
706
707impl<'a> Strs<'a> {
708    /// Converts this [`Iterator`] into an array of its two parts
709    pub fn to_array(&self) -> [&'a str; 2] {
710        self.arr
711    }
712
713    /// Iterates over the [`char`]s of both [`&str`]s
714    ///
715    /// [`&str`]: str
716    pub fn chars(self) -> impl DoubleEndedIterator<Item = char> + 'a {
717        let [s0, s1] = self.arr;
718        s0.chars().chain(s1.chars())
719    }
720}
721
722impl<'a> Iterator for Strs<'a> {
723    type Item = &'a str;
724
725    fn next(&mut self) -> Option<Self::Item> {
726        match self.fwd {
727            0 | 1 if self.fwd != self.rev => {
728                self.fwd += 1;
729                Some(self.arr[self.fwd - 1])
730            }
731            _ => None,
732        }
733    }
734
735    fn size_hint(&self) -> (usize, Option<usize>) {
736        (self.rev - self.fwd, Some(self.rev - self.fwd))
737    }
738}
739
740impl ExactSizeIterator for Strs<'_> {}
741
742impl DoubleEndedIterator for Strs<'_> {
743    fn next_back(&mut self) -> Option<Self::Item> {
744        match self.rev {
745            1 | 2 if self.fwd != self.rev => {
746                self.rev -= 1;
747                Some(self.arr[self.rev])
748            }
749            _ => None,
750        }
751    }
752}
753
754impl FusedIterator for Strs<'_> {}
755
756impl AsRef<Bytes> for Bytes {
757    fn as_ref(&self) -> &Bytes {
758        self
759    }
760}
761
762impl std::fmt::Display for Strs<'_> {
763    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
764        let [s0, s1] = self.to_array();
765        write!(f, "{s0}{s1}")
766    }
767}
768
769impl std::fmt::Debug for Bytes {
770    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
771        f.debug_struct("Bytes")
772            .field("buf", &self.strs_inner(..))
773            .field("records", &self.records)
774            .finish()
775    }
776}
777
778impl PartialEq for Bytes {
779    fn eq(&self, other: &Self) -> bool {
780        self.buf.as_slices() == other.buf.as_slices()
781    }
782}
783
784impl PartialEq<&str> for Bytes {
785    fn eq(&self, other: &&str) -> bool {
786        let [s0, s1] = self.strs_inner(..).unwrap();
787        other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
788    }
789}
790
791impl PartialEq<String> for Bytes {
792    fn eq(&self, other: &String) -> bool {
793        let [s0, s1] = self.strs_inner(..).unwrap();
794        other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
795    }
796}
797
798impl PartialEq for Strs<'_> {
799    fn eq(&self, other: &Self) -> bool {
800        self.to_array() == other.to_array()
801    }
802}
803
804impl PartialEq<&str> for Strs<'_> {
805    fn eq(&self, other: &&str) -> bool {
806        let [s0, s1] = self.to_array();
807        other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
808    }
809}
810
811impl PartialEq<String> for Strs<'_> {
812    fn eq(&self, other: &String) -> bool {
813        let [s0, s1] = self.to_array();
814        other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
815    }
816}
817
818/// Given a first byte, determines how many bytes are in this UTF-8
819/// character.
820#[must_use]
821#[inline]
822pub const fn utf8_char_width(b: u8) -> usize {
823    // https://tools.ietf.org/html/rfc3629
824    const UTF8_CHAR_WIDTH: &[u8; 256] = &[
825        // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
826        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
827        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
828        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
829        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
830        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
831        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
832        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
833        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
834        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
835        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
836        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
837        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
838        0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
839        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
840        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
841        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
842    ];
843    UTF8_CHAR_WIDTH[b as usize] as usize
844}
duat_core/text/bytes.rs

duat_core/text/
bytes.rs