duat_core/text/
bytes.rs

1use std::{
2    iter::FusedIterator,
3    ops::{Range, RangeBounds},
4    str::Utf8Error,
5    sync::LazyLock,
6};
7
8use gapbuf::GapBuffer;
9use lender::{DoubleEndedLender, ExactSizeLender, Lender, Lending};
10
11use super::{Point, RegexPattern, TextRange, records::Records};
12use crate::{opts::PrintOpts, text::TextIndex};
13
14static EMPTY_BYTES: LazyLock<Bytes> = LazyLock::new(Bytes::default);
15
16/// The bytes of a [`Text`], encoded in UTF-8
17///
18/// [`Text`]: super::Text
19#[derive(Default, Clone)]
20pub struct Bytes {
21    buf: GapBuffer<u8>,
22    records: Records,
23}
24
25impl Bytes {
26    /// Returns a new instance of [`Bytes`]
27    ///
28    /// Not intended for public use, it is necessary in duat
29    #[doc(hidden)]
30    pub(crate) fn new(string: &str) -> Self {
31        let buf = GapBuffer::from_iter(string.bytes());
32
33        let len = buf.len();
34        let chars = string.chars().count();
35        let lines = buf.iter().filter(|b| **b == b'\n').count();
36        Self {
37            buf,
38            records: Records::new([len, chars, lines]),
39        }
40    }
41
42    ////////// Querying functions
43
44    /// The [`Point`] at the end of the text
45    pub fn len(&self) -> Point {
46        let [b, c, l] = self.records.max();
47        Point::from_raw(b, c, l)
48    }
49
50    /// Whether or not there are any characters in [`Bytes`], besides
51    /// the final `b'\n'`
52    ///
53    /// # Note
54    ///
55    /// This does not check for tags, so with a [`Tag::Ghost`],
56    /// there could actually be a "string" of characters on the
57    /// [`Text`], it just wouldn't be considered real "text". If you
58    /// want to check for the `InnerTags`'s possible emptyness as
59    /// well, see [`Text::is_empty_empty`].
60    ///
61    /// [`Tag::Ghost`]: super::Ghost
62    /// [`Text`]: super::Text
63    /// [`Text::is_empty_empty`]: super::Text::is_empty_empty
64    pub fn is_empty(&self) -> bool {
65        let (s0, s1) = self.buf.as_slices();
66        (s0 == b"\n" && s1 == b"") || (s0 == b"" && s1 == b"\n")
67    }
68
69    /// The `char` at the [`Point`]'s position
70    pub fn char_at(&self, p: impl TextIndex) -> Option<char> {
71        if p.to_byte_index() >= self.len().byte() {
72            return None;
73        }
74
75        let [s0, s1] = self.strs_inner(..).unwrap();
76        Some(if p.to_byte_index() < s0.len() {
77            s0[p.to_byte_index()..].chars().next().unwrap()
78        } else {
79            s1[p.to_byte_index() - s0.len()..]
80                .chars()
81                .next()
82                .unwrap_or_else(|| panic!("{self:#?}"))
83        })
84    }
85
86    /// An [`Iterator`] over the bytes in a given _byte_ range
87    ///
88    /// Unlike [`strs`], this function works with _byte_ ranges, not
89    /// [`TextRange`]s. That's because [`Strs`] is supposed to return
90    /// valid UTF-8 strings, which need to have valid character
91    /// terminations, so they should be indexed by a character range,
92    /// not a byte range.
93    ///
94    /// Since buffers is based on `[u8]`s, not `str`s, it doesn't have
95    /// the same restrictions, so a byte range can be used instead.
96    ///
97    /// If the range is fully or partially out of bounds, one or both
98    /// of the slices might be empty.
99    ///
100    /// [`strs`]: Self::strs
101    #[track_caller]
102    pub fn slices(&self, range: impl TextRange) -> Slices<'_> {
103        let (s0, s1) = self
104            .buf
105            .range(range.to_range(self.len().byte()))
106            .as_slices();
107        Slices([s0.iter(), s1.iter()])
108    }
109
110    /// An [`Iterator`] over the [`&str`]s of the [`Text`]
111    ///
112    /// # Note
113    ///
114    /// The reason why this function returns two strings is that the
115    /// contents of the text are stored in a [`GapBuffer`], which
116    /// works with two strings.
117    ///
118    /// If you want to iterate over them, you can do the following:
119    ///
120    /// ```rust
121    /// # duat_core::doc_duat!(duat);
122    /// # use duat::prelude::*;
123    /// # let (p0, p1) = (Point::default(), Point::default());
124    /// # let text = Text::new();
125    /// let bytes = text.bytes();
126    ///
127    /// for char in bytes.strs(p0..p1).unwrap().chars() {
128    ///     todo!();
129    /// }
130    /// ```
131    ///
132    /// Do note that you should avoid iterators like [`str::lines`],
133    /// as they will separate the line that is partially owned by each
134    /// [`&str`]:
135    ///
136    /// ```rust
137    /// let broken_up_line = [
138    ///     "This is line 1, business as usual.\nThis is line 2, but it",
139    ///     "is broken into two separate strings.\nSo 4 lines would be counted, instead of 3",
140    /// ];
141    /// ```
142    ///
143    /// This is one way that the inner [`GapBuffer`] could be set up,
144    /// where one of the lines is split among the two slices.
145    ///
146    /// If you wish to iterate over the lines, see [`Bytes::lines`].
147    ///
148    /// [`&str`]: str
149    /// [`Text`]: super::Text
150    /// [range]: TextRange
151    /// [`strs`]: Self::strs
152    #[track_caller]
153    pub fn strs(&self, range: impl TextRange) -> Option<Strs<'_>> {
154        let range = range.to_range(self.len().byte());
155        Some(Strs {
156            bytes: self,
157            range: range.clone(),
158            arr: self.strs_inner(range)?,
159            fwd: 0,
160            rev: 2,
161        })
162    }
163
164    /// Returns an iterator over the lines in a given range
165    ///
166    /// The lines are inclusive, that is, it will iterate over the
167    /// whole line, not just the parts within the range.
168    ///
169    /// [range]: TextRange
170    #[track_caller]
171    pub fn lines(&self, range: impl TextRange) -> Lines<'_> {
172        let range = range.to_range(self.len().byte());
173        let start = self.point_at_line(self.point_at_byte(range.start).line());
174        let end = {
175            let end = self.point_at_byte(range.end);
176            let line_start = self.point_at_line(end.line());
177            match line_start == end {
178                true => end,
179                false => self.point_at_line((end.line() + 1).min(self.len().line())),
180            }
181        };
182
183        // If the gap is outside of the range, we can just iterate through it
184        // regularly
185        let (fwd_i, rev_i) = (start.line(), end.line());
186        if let Some(str) = self.get_contiguous(start..end) {
187            let lines = [str.lines(), "".lines()];
188            Lines::new(lines, None, fwd_i, rev_i)
189        // If the gap is within the range, but on a line split, we
190        // can just iterate through two sets of lines.
191        } else if end.byte() > start.byte() && self.buf[self.buf.gap() - 1] == b'\n' {
192            let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
193            let lines = [s0.lines(), s1.lines()];
194            Lines::new(lines, None, fwd_i, rev_i)
195            // Otherwise, the line that was split will need to be
196            // allocated and returned separately.
197        } else {
198            let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
199
200            let (before, split0) = match s0.rsplit_once('\n') {
201                Some((before, split)) => (before, split),
202                None => ("", s0),
203            };
204            let (after, split1) = match s1.split_once('\n') {
205                Some((after, split)) => (after, split),
206                None => ("", s1),
207            };
208
209            let lines = [before.lines(), after.lines()];
210            let split_line = Some(split0.to_string() + split1);
211            Lines::new(lines, split_line, fwd_i, rev_i)
212        }
213    }
214
215    /// Returns the two `&str`s in the byte range.
216    #[track_caller]
217    fn strs_inner(&self, range: impl RangeBounds<usize>) -> Option<[&str; 2]> {
218        let (start, end) = crate::utils::get_ends(range, self.len().byte());
219        use std::str::from_utf8_unchecked;
220
221        let (s0, s1) = self.buf.as_slices();
222
223        // Check if the slices match utf8 boundaries.
224        if s0.first().is_some_and(|b| utf8_char_width(*b) == 0)
225            || s1.first().is_some_and(|b| utf8_char_width(*b) == 0)
226            || self.buf.get(end).is_some_and(|b| utf8_char_width(*b) == 0)
227        {
228            return None;
229        }
230
231        Some(unsafe {
232            let r0 = start.min(s0.len())..end.min(s0.len());
233            let r1 = start.saturating_sub(s0.len()).min(s1.len())
234                ..end.saturating_sub(s0.len()).min(s1.len());
235
236            [from_utf8_unchecked(&s0[r0]), from_utf8_unchecked(&s1[r1])]
237        })
238    }
239
240    /// The [`Point`] corresponding to the byte position, 0 indexed
241    ///
242    /// If the byte position would fall in between two characters
243    /// (because the first one comprises more than one byte), the
244    /// first character is chosen as the [`Point`] where the byte is
245    /// located.
246    ///
247    /// # Panics
248    ///
249    /// Will panic if `b` is greater than the length of the text
250    #[inline(always)]
251    #[track_caller]
252    pub fn point_at_byte(&self, b: usize) -> Point {
253        assert!(
254            b <= self.len().byte(),
255            "byte out of bounds: the len is {}, but the byte is {b}",
256            self.len().byte()
257        );
258
259        let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(b, |[b, ..]| b);
260
261        let found = if b >= c_b {
262            let [s0, s1] = self.strs_inner(c_b..).unwrap();
263
264            s0.char_indices()
265                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
266                .enumerate()
267                .map(|(i, (this_b, char))| {
268                    c_l += (char == '\n') as usize;
269                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
270                })
271                .take_while(|&(rhs, ..)| b >= rhs)
272                .last()
273        } else {
274            let mut c_len = 0;
275            self.strs_inner(..c_b)
276                .unwrap()
277                .into_iter()
278                .flat_map(str::chars)
279                .rev()
280                .enumerate()
281                .map(|(i, char)| {
282                    c_l -= (char == '\n') as usize;
283                    c_len += char.len_utf8();
284                    (c_b - c_len, c_c - (i + 1), c_l)
285                })
286                .take_while(|&(rhs, ..)| b <= rhs)
287                .last()
288        };
289
290        found
291            .map(|(b, c, l)| Point::from_raw(b, c, l))
292            .unwrap_or(self.len())
293    }
294
295    /// The [`Point`] associated with the `c`th char
296    ///
297    /// # Panics
298    ///
299    /// Will panic if `c` is greater than the number of chars in the
300    /// text.
301    #[inline(always)]
302    #[track_caller]
303    pub fn point_at_char(&self, c: usize) -> Point {
304        assert!(
305            c <= self.len().char(),
306            "char out of bounds: the len is {}, but the char is {c}",
307            self.len().char()
308        );
309
310        let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(c, |[_, c, _]| c);
311
312        let found = if c >= c_c {
313            let [s0, s1] = self.strs_inner(c_b..).unwrap();
314
315            s0.char_indices()
316                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
317                .enumerate()
318                .map(|(i, (this_b, char))| {
319                    c_l += (char == '\n') as usize;
320                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
321                })
322                .take_while(|&(_, rhs, _)| c >= rhs)
323                .last()
324        } else {
325            let mut c_len = 0;
326            self.strs_inner(..c_b)
327                .unwrap()
328                .into_iter()
329                .flat_map(str::chars)
330                .rev()
331                .enumerate()
332                .map(|(i, char)| {
333                    c_l -= (char == '\n') as usize;
334                    c_len += char.len_utf8();
335                    (c_b - c_len, c_c - (i + 1), c_l)
336                })
337                .take_while(|&(_, rhs, _)| c <= rhs)
338                .last()
339        };
340
341        found
342            .map(|(b, c, l)| Point::from_raw(b, c, l))
343            .unwrap_or(self.len())
344    }
345
346    /// The [`Point`] where the `l`th line starts, 0 indexed
347    ///
348    /// If `l == number_of_lines`, returns the last point of the
349    /// text.
350    ///
351    /// # Panics
352    ///
353    /// Will panic if the number `l` is greater than the number of
354    /// lines on the text
355    #[inline(always)]
356    #[track_caller]
357    pub fn point_at_line(&self, l: usize) -> Point {
358        assert!(
359            l <= self.len().line(),
360            "line out of bounds: the len is {}, but the line is {l}",
361            self.len().line()
362        );
363
364        let (c_b, c_c, mut c_l) = {
365            let [b, c, l] = self.records.closest_to_by_key(l, |[.., l]| l);
366            let (b, c) = self
367                .strs_inner(..b)
368                .unwrap()
369                .into_iter()
370                .flat_map(str::chars)
371                .rev()
372                .take_while(|c| *c != '\n')
373                .fold((b, c), |(b, c), char| (b - char.len_utf8(), c - 1));
374            (b, c, l)
375        };
376
377        let found = if l >= c_l {
378            let [s0, s1] = self.strs_inner(c_b..).unwrap();
379
380            s0.char_indices()
381                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
382                .enumerate()
383                .map(|(i, (this_b, char))| {
384                    c_l += (char == '\n') as usize;
385                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
386                })
387                .find(|&(.., rhs)| l == rhs)
388        } else {
389            let mut c_len = 0;
390            self.strs_inner(..c_b)
391                .unwrap()
392                .into_iter()
393                .flat_map(str::chars)
394                .rev()
395                .enumerate()
396                .map(|(i, char)| {
397                    c_l -= (char == '\n') as usize;
398                    c_len += char.len_utf8();
399                    (c_b - c_len, c_c - (i + 1), c_l)
400                })
401                .take_while(|&(.., rhs)| l <= rhs)
402                .last()
403        };
404
405        found
406            .map(|(b, c, l)| Point::from_raw(b, c, l))
407            .unwrap_or(self.len())
408    }
409
410    /// The start and end [`Point`]s for the `l`th line
411    ///
412    /// If `l == number_of_lines`, these points will be the same.
413    ///
414    /// The second number _includes_ the `\n` at the end of the line.
415    ///
416    /// # Panics
417    ///
418    /// Will panic if the number `l` is greater than the number of
419    /// lines on the text
420    #[inline(always)]
421    #[track_caller]
422    pub fn line_range(&self, l: usize) -> Range<Point> {
423        assert!(
424            l <= self.len().line(),
425            "line out of bounds: the len is {}, but the line is {l}",
426            self.len().line()
427        );
428
429        let start = self.point_at_line(l);
430        let end = self
431            .chars_fwd(start..)
432            .unwrap()
433            .find_map(|(p, _)| (p.line() > start.line()).then_some(p))
434            .unwrap_or(self.len());
435        start..end
436    }
437
438    /// The last [`Point`] associated with a `char`
439    ///
440    /// This will give the [`Point`] of the last `char` of the text.
441    /// The difference between this method and [`len`] is that
442    /// it will return a [`Point`] one position earlier than it. If
443    /// the text is completely empty, it will return [`None`].
444    ///
445    /// [`len`]: Self::len
446    pub fn last_point(&self) -> Point {
447        self.len().rev('\n')
448    }
449
450    /// A forward iterator of the [`char`]s of [`Bytes`]
451    ///
452    /// Each [`char`] will be accompanied by a [`Point`], which is the
453    /// position where said character starts, e.g.
454    /// [`Point::default()`] for the first character
455    #[track_caller]
456    pub fn chars_fwd(
457        &self,
458        range: impl TextRange,
459    ) -> Option<impl Iterator<Item = (Point, char)> + '_> {
460        let range = range.to_range(self.len().byte());
461        let p = self.point_at_byte(range.start);
462        Some(self.strs(range)?.chars().scan(p, |p, char| {
463            let old_p = *p;
464            *p = p.fwd(char);
465            Some((old_p, char))
466        }))
467    }
468
469    /// A reverse iterator of the [`char`]s in [`Bytes`]
470    ///
471    /// Each [`char`] will be accompanied by a [`Point`], which is the
472    /// position where said character starts, e.g.
473    /// [`Point::default()`] for the first character
474    #[track_caller]
475    pub fn chars_rev(
476        &self,
477        range: impl TextRange,
478    ) -> Option<impl Iterator<Item = (Point, char)> + '_> {
479        let range = range.to_range(self.len().byte());
480        let p = self.point_at_byte(range.end);
481        Some(self.strs(range)?.chars().rev().scan(p, |p, char| {
482            *p = p.rev(char);
483            Some((*p, char))
484        }))
485    }
486
487    /// Gets the indentation level on the current line
488    pub fn indent(&self, p: Point, opts: PrintOpts) -> usize {
489        let range = self.line_range(p.line());
490        self.chars_fwd(range.start..)
491            .unwrap()
492            .map_while(|(_, c)| match c {
493                ' ' => Some(1),
494                '\t' => Some(opts.tabstop as usize),
495                _ => None,
496            })
497            .sum()
498    }
499
500    ////////// Modification functions
501
502    /// Applies a [`Change`] to the [`GapBuffer`] within
503    ///
504    /// [`Change`]: super::Change
505    pub(crate) fn apply_change(&mut self, change: super::Change<&str>) {
506        let edit = change.added_str();
507        let start = change.start();
508
509        let range = start.byte()..change.taken_end().byte();
510        self.buf.splice(range, edit.bytes());
511
512        let start_rec = [start.byte(), start.char(), start.line()];
513        let old_len = [
514            change.taken_end().byte() - start.byte(),
515            change.taken_end().char() - start.char(),
516            change.taken_end().line() - start.line(),
517        ];
518        let new_len = [
519            change.added_end().byte() - start.byte(),
520            change.added_end().char() - start.char(),
521            change.added_end().line() - start.line(),
522        ];
523
524        self.records.transform(start_rec, old_len, new_len);
525        self.records.insert(start_rec);
526    }
527
528    /// Adds a record in the given position
529    #[track_caller]
530    pub(super) fn add_record(&mut self, [b, c, l]: [usize; 3]) {
531        self.records.insert([b, c, l]);
532    }
533
534    ////////// One str functions
535
536    /// Tries to get a contiguous [`&str`] from the [`Bytes`]
537    ///
538    /// Returns [`None`] if the gap of the inner buffer was within the
539    /// given range *OR*.
540    ///
541    /// [`&str`]: str
542    pub fn get_contiguous(&self, range: impl TextRange) -> Option<&str> {
543        let range = range.to_range(self.len().byte());
544        let [s0, s1] = self.strs_inner(..).unwrap();
545
546        if range.end <= self.buf.gap() {
547            s0.get(range)
548        } else {
549            let gap = self.buf.gap();
550            s1.get(range.start.checked_sub(gap)?..range.end.checked_sub(gap)?)
551        }
552    }
553}
554
555/// A [`Lender`] over the lines on [`Bytes`]
556///
557/// The reason for this being a [`Lender`], rather than a regular
558/// [`Iterator`] is because the [`Bytes`] use a [`GapBuffer`] within,
559/// which means that any line may be split in two. In order to still
560/// return it as an `&str`, a new [`String`] needs to be allocated,
561/// which will be owned by the [`Lines`], hence the [`Lender`] trait.
562pub struct Lines<'a> {
563    lines: [std::str::Lines<'a>; 2],
564    split_line: Option<String>,
565    fwd_i: usize,
566    rev_i: usize,
567    split_line_used: bool,
568}
569
570impl<'a> Lines<'a> {
571    fn new(
572        lines: [std::str::Lines<'a>; 2],
573        split_line: Option<String>,
574        fwd_i: usize,
575        rev_i: usize,
576    ) -> Self {
577        Self {
578            lines,
579            split_line,
580            fwd_i,
581            rev_i,
582            split_line_used: false,
583        }
584    }
585}
586
587impl<'a, 'text> Lending<'a> for Lines<'text> {
588    type Lend = (usize, &'a str);
589}
590
591impl<'a> Lender for Lines<'a> {
592    fn next(&mut self) -> Option<lender::Lend<'_, Self>> {
593        self.lines[0]
594            .next()
595            .or_else(|| {
596                if self.split_line_used {
597                    None
598                } else {
599                    self.split_line_used = true;
600                    self.split_line.as_deref()
601                }
602            })
603            .or_else(|| self.lines[1].next())
604            .map(|line| {
605                self.fwd_i += 1;
606                (self.fwd_i - 1, line)
607            })
608    }
609
610    fn size_hint(&self) -> (usize, Option<usize>) {
611        (self.rev_i - self.fwd_i, Some(self.rev_i - self.fwd_i))
612    }
613}
614
615impl<'a> DoubleEndedLender for Lines<'a> {
616    fn next_back(&mut self) -> Option<lender::Lend<'_, Self>> {
617        self.lines[1]
618            .next_back()
619            .or_else(|| {
620                if self.split_line_used {
621                    None
622                } else {
623                    self.split_line_used = true;
624                    self.split_line.as_deref()
625                }
626            })
627            .or_else(|| self.lines[0].next_back())
628            .map(|line| {
629                self.rev_i -= 1;
630                (self.rev_i, line)
631            })
632    }
633}
634
635impl<'a> ExactSizeLender for Lines<'a> {}
636
637/// An [`Iterator`] over the bytes in a [`Text`]
638///
639/// [`Text`]: super::Text
640#[derive(Clone)]
641pub struct Slices<'a>([std::slice::Iter<'a, u8>; 2]);
642
643impl<'a> Slices<'a> {
644    /// Converts this [`Iterator`] into an array of its two parts
645    pub fn to_array(&self) -> [&'a [u8]; 2] {
646        self.0.clone().map(|iter| iter.as_slice())
647    }
648
649    /// Tries to create a [`String`] out of the two buffers
650    ///
651    /// # Errors
652    ///
653    /// This function will return an error if the bounds of the slices
654    /// don't correspond to utf8 character boundaries, or if the gap
655    /// within these slices doesn't correspond to a utf8 character
656    /// boundary.
657    pub fn try_to_string(self) -> Result<String, Utf8Error> {
658        let [s0, s1] = self.0.map(|arr| arr.as_slice());
659        Ok([str::from_utf8(s0)?, str::from_utf8(s1)?].join(""))
660    }
661
662    /// Treats the inner slices as `&str`s and iterates over their
663    /// characters
664    ///
665    /// You will want to use this function iff you don't want to check
666    /// for character boundaries at the edges (very rarely). Otherwise
667    /// [`bytes.strs({byte_range}).chars()`] instead.
668    ///
669    /// # Safety
670    ///
671    /// You must ensure that the `Slices` were acquired from valid
672    /// byte ranges which coincide with character terminations. If you
673    /// are unsure of that, you should use [`Strs::chars`] instead.
674    ///
675    /// [`bytes.strs({byte_range}).chars()`]: Bytes::strs
676    pub unsafe fn chars_unchecked(self) -> impl Iterator<Item = char> {
677        self.0
678            .into_iter()
679            .flat_map(|iter| unsafe { str::from_utf8_unchecked(iter.as_slice()) }.chars())
680    }
681}
682
683impl<'a> Iterator for Slices<'a> {
684    type Item = u8;
685
686    fn next(&mut self) -> Option<Self::Item> {
687        self.0[0].next().or_else(|| self.0[1].next()).copied()
688    }
689
690    fn size_hint(&self) -> (usize, Option<usize>) {
691        let (l0, u0) = self.0[0].size_hint();
692        let (l1, u1) = self.0[1].size_hint();
693        (l0 + l1, Some(u0.unwrap() + u1.unwrap()))
694    }
695}
696
697impl<'a> ExactSizeIterator for Slices<'a> {}
698
699impl<'a> DoubleEndedIterator for Slices<'a> {
700    fn next_back(&mut self) -> Option<Self::Item> {
701        self.0[1]
702            .next_back()
703            .or_else(|| self.0[0].next_back())
704            .copied()
705    }
706}
707
708/// An [`Iterator`] over the [`&str`]s in a [`Text`]
709///
710/// [`&str`]: str
711/// [`Text`]: super::Text
712#[derive(Clone)]
713pub struct Strs<'a> {
714    bytes: &'a Bytes,
715    range: Range<usize>,
716    arr: [&'a str; 2],
717    fwd: usize,
718    rev: usize,
719}
720
721impl<'a> Strs<'a> {
722    /// Converts this [`Iterator`] into an array of its two parts
723    pub fn to_array(&self) -> [&'a str; 2] {
724        self.arr
725    }
726
727    /// Iterates over the [`char`]s of both [`&str`]s
728    ///
729    /// [`&str`]: str
730    pub fn chars(self) -> impl DoubleEndedIterator<Item = char> + 'a {
731        let [s0, s1] = self.arr;
732        s0.chars().chain(s1.chars())
733    }
734
735    /// Returns `true` if the [`RegexPattern`] can be found in the
736    /// [`Strs`]s
737    pub fn contains<P: RegexPattern>(&self, pat: P) -> Result<bool, Box<regex_syntax::Error>> {
738        self.bytes
739            .search_fwd(pat, self.range.clone())
740            .map(|mut iter| iter.next().is_some())
741    }
742}
743
744impl Strs<'static> {
745    /// An empty `Strs`, useful in some circumstances
746    pub fn empty() -> Self {
747        Self {
748            bytes: &*EMPTY_BYTES,
749            range: 0..0,
750            arr: [""; 2],
751            fwd: 0,
752            rev: 0,
753        }
754    }
755}
756
757impl<'a> Iterator for Strs<'a> {
758    type Item = &'a str;
759
760    fn next(&mut self) -> Option<Self::Item> {
761        match self.fwd {
762            0 | 1 if self.fwd != self.rev => {
763                self.fwd += 1;
764                Some(self.arr[self.fwd - 1])
765            }
766            _ => None,
767        }
768    }
769
770    fn size_hint(&self) -> (usize, Option<usize>) {
771        (self.rev - self.fwd, Some(self.rev - self.fwd))
772    }
773}
774
775impl ExactSizeIterator for Strs<'_> {}
776
777impl DoubleEndedIterator for Strs<'_> {
778    fn next_back(&mut self) -> Option<Self::Item> {
779        match self.rev {
780            1 | 2 if self.fwd != self.rev => {
781                self.rev -= 1;
782                Some(self.arr[self.rev])
783            }
784            _ => None,
785        }
786    }
787}
788
789impl FusedIterator for Strs<'_> {}
790
791impl AsRef<Bytes> for Bytes {
792    fn as_ref(&self) -> &Bytes {
793        self
794    }
795}
796
797impl std::fmt::Display for Strs<'_> {
798    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
799        let [s0, s1] = self.to_array();
800        write!(f, "{s0}{s1}")
801    }
802}
803
804impl std::fmt::Debug for Bytes {
805    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
806        f.debug_struct("Bytes")
807            .field("buf", &self.strs_inner(..))
808            .field("records", &self.records)
809            .finish()
810    }
811}
812
813impl PartialEq for Bytes {
814    fn eq(&self, other: &Self) -> bool {
815        self.buf.as_slices() == other.buf.as_slices()
816    }
817}
818
819impl PartialEq<&str> for Bytes {
820    fn eq(&self, other: &&str) -> bool {
821        let [s0, s1] = self.strs_inner(..).unwrap();
822        other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
823    }
824}
825
826impl PartialEq<String> for Bytes {
827    fn eq(&self, other: &String) -> bool {
828        let [s0, s1] = self.strs_inner(..).unwrap();
829        other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
830    }
831}
832
833impl PartialEq for Strs<'_> {
834    fn eq(&self, other: &Self) -> bool {
835        self.to_array() == other.to_array()
836    }
837}
838
839impl PartialEq<&str> for Strs<'_> {
840    fn eq(&self, other: &&str) -> bool {
841        let [s0, s1] = self.to_array();
842        other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
843    }
844}
845
846impl PartialEq<String> for Strs<'_> {
847    fn eq(&self, other: &String) -> bool {
848        let [s0, s1] = self.to_array();
849        other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
850    }
851}
852
853impl PartialEq<Bytes> for &str {
854    fn eq(&self, other: &Bytes) -> bool {
855        other == self
856    }
857}
858
859impl PartialEq<Bytes> for String {
860    fn eq(&self, other: &Bytes) -> bool {
861        other == self
862    }
863}
864
865impl PartialEq<Strs<'_>> for &str {
866    fn eq(&self, other: &Strs) -> bool {
867        other == self
868    }
869}
870
871impl PartialEq<Strs<'_>> for String {
872    fn eq(&self, other: &Strs) -> bool {
873        other == self
874    }
875}
876/// Given a first byte, determines how many bytes are in this UTF-8
877/// character.
878#[must_use]
879#[inline]
880pub const fn utf8_char_width(b: u8) -> usize {
881    // https://tools.ietf.org/html/rfc3629
882    const UTF8_CHAR_WIDTH: &[u8; 256] = &[
883        // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
884        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
885        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
886        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
887        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
888        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
889        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
890        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
891        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
892        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
893        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
894        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
895        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
896        0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
897        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
898        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
899        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
900    ];
901    UTF8_CHAR_WIDTH[b as usize] as usize
902}
duat_core/text/bytes.rs

duat_core/text/
bytes.rs