duat_core/text/bytes/
mod.rs

1use std::{
2    ops::{ControlFlow, Range, RangeBounds},
3    str::Utf8Error,
4};
5
6use gapbuf::GapBuffer;
7use lender::{DoubleEndedLender, ExactSizeLender, Lender, Lending};
8
9pub use crate::text::bytes::strs::Strs;
10use crate::{
11    buffer::Change,
12    opts::PrintOpts,
13    text::{Point, TextIndex, TextRange, records::Records, utils::implPartialEq},
14};
15
16mod strs;
17
18/// The bytes of a [`Text`], encoded in UTF-8
19///
20/// [`Text`]: super::Text
21#[derive(Default, Clone)]
22pub struct Bytes {
23    buf: GapBuffer<u8>,
24    records: Records,
25    pub(super) bytes_state: u64,
26}
27
28impl Bytes {
29    /// Returns a new instance of [`Bytes`]
30    ///
31    /// Not intended for public use, it is necessary in duat
32    #[doc(hidden)]
33    pub(crate) fn new(string: &str) -> Self {
34        let buf = GapBuffer::from_iter(string.bytes());
35
36        let len = buf.len();
37        let chars = string.chars().count();
38        let lines = buf.iter().filter(|b| **b == b'\n').count();
39        Self {
40            buf,
41            records: Records::new([len, chars, lines]),
42            bytes_state: 0,
43        }
44    }
45
46    ////////// Querying functions
47
48    /// The [`Point`] at the end of the text
49    pub fn len(&self) -> Point {
50        let [b, c, l] = self.records.max();
51        Point::from_raw(b, c, l)
52    }
53
54    /// Whether or not there are any characters in [`Bytes`], besides
55    /// the final `b'\n'`
56    ///
57    /// # Note
58    ///
59    /// This does not check for tags, so with a [`Tag::Ghost`],
60    /// there could actually be a "string" of characters on the
61    /// [`Text`], it just wouldn't be considered real "text". If you
62    /// want to check for the `InnerTags`'b possible emptyness as
63    /// well, see [`Text::is_empty_empty`].
64    ///
65    /// [`Tag::Ghost`]: super::Ghost
66    /// [`Text`]: super::Text
67    /// [`Text::is_empty_empty`]: super::Text::is_empty_empty
68    pub fn is_empty(&self) -> bool {
69        let (s0, s1) = self.buf.as_slices();
70        (s0 == b"\n" && s1 == b"") || (s0 == b"" && s1 == b"\n")
71    }
72
73    /// The `char` at the [`Point`]'b position
74    pub fn char_at(&self, p: impl TextIndex) -> Option<char> {
75        if p.to_byte_index() >= self.len().byte() {
76            return None;
77        }
78
79        let [s0, s1] = self.strs_inner(..).unwrap();
80        Some(if p.to_byte_index() < s0.len() {
81            s0[p.to_byte_index()..].chars().next().unwrap()
82        } else {
83            s1[p.to_byte_index() - s0.len()..]
84                .chars()
85                .next()
86                .unwrap_or_else(|| panic!("{self:#?}"))
87        })
88    }
89
90    /// A subslice of the [`Bytes`]
91    ///
92    /// Note that this `TextRange` is relative to the whole [`Bytes`]
93    /// struct, not just this [`Strs`]. This method also clips the
94    /// ranges so they fit into the range of these `Strs`.
95    ///
96    /// It will return [`None`] if the range does not start or end in
97    /// valid utf8 boundaries.
98    ///
99    /// # Note
100    ///
101    /// The reason why this function returns two strings is that the
102    /// contents of the text are stored in a [`GapBuffer`], which
103    /// works with two strings.
104    ///
105    /// If you want to iterate over them, you can do the following:
106    ///
107    /// ```rust
108    /// # duat_core::doc_duat!(duat);
109    /// # use duat::prelude::*;
110    /// # let (p0, p1) = (Point::default(), Point::default());
111    /// # let text = Text::new();
112    /// let bytes = text.bytes();
113    ///
114    /// for char in bytes.strs(p0..p1).unwrap().chars() {
115    ///     todo!();
116    /// }
117    /// ```
118    ///
119    /// Do note that you should avoid iterators like [`str::lines`],
120    /// as they will separate the line that is partially owned by each
121    /// [`&str`]:
122    ///
123    /// ```rust
124    /// let broken_up_line = [
125    ///     "This is line 1, business as usual.\nThis is line 2, but it",
126    ///     "is broken into two separate strings.\nSo 4 lines would be counted, instead of 3",
127    /// ];
128    /// ```
129    ///
130    /// This is one way that the inner [`GapBuffer`] could be set up,
131    /// where one of the lines is split among the two slices.
132    ///
133    /// If you wish to iterate over the lines, see [`Bytes::lines`].
134    ///
135    /// [`&str`]: str
136    /// [`Text`]: super::Text
137    /// [range]: TextRange
138    /// [`strs`]: Self::strs
139    pub fn strs(&self, range: impl TextRange) -> Option<Strs<'_>> {
140        let range = range.to_range(self.len().byte());
141
142        Some(Strs::new(
143            self,
144            (range.start, range.end),
145            self.strs_inner(range)?,
146        ))
147    }
148
149    /// An [`Iterator`] over the bytes in a given _byte_ range
150    ///
151    /// Unlike [`strs`], this function works with _byte_ ranges, not
152    /// [`TextRange`]s. That'b because [`Strs`] is supposed to return
153    /// valid UTF-8 strings, which need to have valid character
154    /// terminations, so they should be indexed by a character range,
155    /// not a byte range.
156    ///
157    /// Since buffers is based on `[u8]`s, not `str`s, it doesn't have
158    /// the same restrictions, so a byte range can be used instead.
159    ///
160    /// If the range is fully or partially out of bounds, one or both
161    /// of the slices might be empty.
162    ///
163    /// [`strs`]: Self::strs
164    #[track_caller]
165    pub fn slices(&self, range: impl TextRange) -> Slices<'_> {
166        let (s0, s1) = self
167            .buf
168            .range(range.to_range(self.len().byte()))
169            .as_slices();
170        Slices([s0.iter(), s1.iter()])
171    }
172
173    /// Returns an iterator over the lines in a given range
174    ///
175    /// The lines are inclusive, that is, it will iterate over the
176    /// whole line, not just the parts within the range.
177    ///
178    /// [range]: TextRange
179    #[track_caller]
180    pub fn lines(&self, range: impl TextRange) -> Lines<'_> {
181        let range = range.to_range(self.len().byte());
182        let start = self.point_at_line(self.point_at_byte(range.start).line());
183        let end = {
184            let end = self.point_at_byte(range.end);
185            let line_start = self.point_at_line(end.line());
186            if line_start == end {
187                end
188            } else {
189                self.point_at_line((end.line() + 1).min(self.len().line()))
190            }
191        };
192
193        // If the gap is outside of the range, we can just iterate through it
194        // regularly
195        let (fwd_i, rev_i) = (start.line(), end.line());
196        if let Some(str) = self.get_contiguous(start..end) {
197            let lines = [str.lines(), "".lines()];
198            Lines::new(lines, None, fwd_i, rev_i)
199        // If the gap is within the range, but on a line split, we
200        // can just iterate through two sets of lines.
201        } else if end.byte() > start.byte() && self.buf[self.buf.gap() - 1] == b'\n' {
202            let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
203            let lines = [s0.lines(), s1.lines()];
204            Lines::new(lines, None, fwd_i, rev_i)
205            // Otherwise, the line that was split will need to be
206            // allocated and returned separately.
207        } else {
208            let [s0, s1] = self.strs_inner(start.byte()..end.byte()).unwrap();
209
210            let (before, split0) = match s0.rsplit_once('\n') {
211                Some((before, split)) => (before, split),
212                None => ("", s0),
213            };
214            let (after, split1) = match s1.split_once('\n') {
215                Some((split, after)) => (after, split),
216                None => ("", s1),
217            };
218
219            let lines = [before.lines(), after.lines()];
220            let split_line = Some(split0.to_string() + split1);
221            Lines::new(lines, split_line, fwd_i, rev_i)
222        }
223    }
224
225    /// Returns the two `&str`s in the byte range.
226    #[track_caller]
227    fn strs_inner(&self, range: impl RangeBounds<usize>) -> Option<[&str; 2]> {
228        let range = crate::utils::get_range(range, self.len().byte());
229        use std::str::from_utf8_unchecked;
230
231        let (s0, s1) = self.buf.as_slices();
232
233        // Check if the slices match utf8 boundaries.
234        if s0.first().is_some_and(|b| utf8_char_width(*b) == 0)
235            || s1.first().is_some_and(|b| utf8_char_width(*b) == 0)
236            || self
237                .buf
238                .get(range.end)
239                .is_some_and(|b| utf8_char_width(*b) == 0)
240        {
241            return None;
242        }
243
244        Some(unsafe {
245            let r0 = range.start.min(s0.len())..range.end.min(s0.len());
246            let r1 = range.start.saturating_sub(s0.len()).min(s1.len())
247                ..range.end.saturating_sub(s0.len()).min(s1.len());
248
249            [from_utf8_unchecked(&s0[r0]), from_utf8_unchecked(&s1[r1])]
250        })
251    }
252
253    /// The [`Point`] corresponding to the byte position, 0 indexed
254    ///
255    /// If the byte position would fall in between two characters
256    /// (because the first one comprises more than one byte), the
257    /// first character is chosen as the [`Point`] where the byte is
258    /// located.
259    ///
260    /// # Panics
261    ///
262    /// Will panic if `b` is greater than the length of the text
263    #[inline(always)]
264    #[track_caller]
265    pub fn point_at_byte(&self, b: usize) -> Point {
266        assert!(
267            b <= self.len().byte(),
268            "byte out of bounds: the len is {}, but the byte is {b}",
269            self.len().byte()
270        );
271
272        let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(b, |[b, ..]| b);
273
274        let found = if b >= c_b {
275            let [s0, s1] = self.strs_inner(c_b..).unwrap();
276
277            s0.char_indices()
278                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
279                .enumerate()
280                .map(|(i, (this_b, char))| {
281                    c_l += (char == '\n') as usize;
282                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
283                })
284                .take_while(|&(rhs, ..)| b >= rhs)
285                .last()
286        } else {
287            let mut c_len = 0;
288            self.strs_inner(..c_b)
289                .unwrap()
290                .into_iter()
291                .flat_map(str::chars)
292                .rev()
293                .enumerate()
294                .map(|(i, char)| {
295                    c_l -= (char == '\n') as usize;
296                    c_len += char.len_utf8();
297                    (c_b - c_len, c_c - (i + 1), c_l)
298                })
299                .take_while(|&(rhs, ..)| b <= rhs)
300                .last()
301        };
302
303        found
304            .map(|(b, c, l)| Point::from_raw(b, c, l))
305            .unwrap_or(self.len())
306    }
307
308    /// The [`Point`] associated with the `c`th char
309    ///
310    /// # Panics
311    ///
312    /// Will panic if `c` is greater than the number of chars in the
313    /// text.
314    #[inline(always)]
315    #[track_caller]
316    pub fn point_at_char(&self, c: usize) -> Point {
317        assert!(
318            c <= self.len().char(),
319            "char out of bounds: the len is {}, but the char is {c}",
320            self.len().char()
321        );
322
323        let [c_b, c_c, mut c_l] = self.records.closest_to_by_key(c, |[_, c, _]| c);
324
325        let found = if c >= c_c {
326            let [s0, s1] = self.strs_inner(c_b..).unwrap();
327
328            s0.char_indices()
329                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
330                .enumerate()
331                .map(|(i, (this_b, char))| {
332                    c_l += (char == '\n') as usize;
333                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
334                })
335                .take_while(|&(_, rhs, _)| c >= rhs)
336                .last()
337        } else {
338            let mut c_len = 0;
339            self.strs_inner(..c_b)
340                .unwrap()
341                .into_iter()
342                .flat_map(str::chars)
343                .rev()
344                .enumerate()
345                .map(|(i, char)| {
346                    c_l -= (char == '\n') as usize;
347                    c_len += char.len_utf8();
348                    (c_b - c_len, c_c - (i + 1), c_l)
349                })
350                .take_while(|&(_, rhs, _)| c <= rhs)
351                .last()
352        };
353
354        found
355            .map(|(b, c, l)| Point::from_raw(b, c, l))
356            .unwrap_or(self.len())
357    }
358
359    /// The [`Point`] where the `l`th line starts, 0 indexed
360    ///
361    /// If `l == number_of_lines`, returns the last point of the
362    /// text.
363    ///
364    /// # Panics
365    ///
366    /// Will panic if the number `l` is greater than the number of
367    /// lines on the text
368    #[inline(always)]
369    #[track_caller]
370    pub fn point_at_line(&self, l: usize) -> Point {
371        assert!(
372            l <= self.len().line(),
373            "line out of bounds: the len is {}, but the line is {l}",
374            self.len().line()
375        );
376
377        let (c_b, c_c, mut c_l) = {
378            let [b, c, l] = self.records.closest_to_by_key(l, |[.., l]| l);
379            let (b, c) = self
380                .strs_inner(..b)
381                .unwrap()
382                .into_iter()
383                .flat_map(str::chars)
384                .rev()
385                .take_while(|c| *c != '\n')
386                .fold((b, c), |(b, c), char| (b - char.len_utf8(), c - 1));
387            (b, c, l)
388        };
389
390        let found = if l >= c_l {
391            let [s0, s1] = self.strs_inner(c_b..).unwrap();
392
393            s0.char_indices()
394                .chain(s1.char_indices().map(|(b, char)| (b + s0.len(), char)))
395                .enumerate()
396                .map(|(i, (this_b, char))| {
397                    c_l += (char == '\n') as usize;
398                    (c_b + this_b, c_c + i, c_l - (char == '\n') as usize)
399                })
400                .find(|&(.., rhs)| l == rhs)
401        } else {
402            let mut c_len = 0;
403            self.strs_inner(..c_b)
404                .unwrap()
405                .into_iter()
406                .flat_map(str::chars)
407                .rev()
408                .enumerate()
409                .map(|(i, char)| {
410                    c_l -= (char == '\n') as usize;
411                    c_len += char.len_utf8();
412                    (c_b - c_len, c_c - (i + 1), c_l)
413                })
414                .take_while(|&(.., rhs)| l <= rhs)
415                .last()
416        };
417
418        found
419            .map(|(b, c, l)| Point::from_raw(b, c, l))
420            .unwrap_or(self.len())
421    }
422
423    /// The start and end [`Point`]s for the `l`th line
424    ///
425    /// If `l == number_of_lines`, these points will be the same.
426    ///
427    /// The second number _includes_ the `\n` at the end of the line.
428    ///
429    /// # Panics
430    ///
431    /// Will panic if the number `l` is greater than the number of
432    /// lines on the text
433    #[inline(always)]
434    #[track_caller]
435    pub fn line_range(&self, l: usize) -> Range<Point> {
436        assert!(
437            l <= self.len().line(),
438            "line out of bounds: the len is {}, but the line is {l}",
439            self.len().line()
440        );
441
442        let start = self.point_at_line(l);
443        let (ControlFlow::Continue(end) | ControlFlow::Break(end)) = self
444            .chars_fwd(start..)
445            .unwrap()
446            .try_fold(start, |end, (_, char)| match end.line() == start.line() {
447                true => ControlFlow::Continue(end.fwd(char)),
448                false => ControlFlow::Break(end),
449            });
450
451        start..end
452    }
453
454    /// The last [`Point`] associated with a `char`
455    ///
456    /// This will give the [`Point`] of the last `char` of the text.
457    /// The difference between this method and [`len`] is that
458    /// it will return a [`Point`] one position earlier than it. If
459    /// the text is completely empty, it will return [`None`].
460    ///
461    /// [`len`]: Self::len
462    pub fn last_point(&self) -> Point {
463        self.len().rev('\n')
464    }
465
466    /// A forward iterator of the [`char`]s of [`Bytes`]
467    ///
468    /// Each [`char`] will be accompanied by a byte index, which is
469    /// the position where said character starts, e.g. `0` for the
470    /// first character.
471    #[track_caller]
472    pub fn chars_fwd(
473        &self,
474        range: impl TextRange,
475    ) -> Option<impl Iterator<Item = (usize, char)> + '_> {
476        let mut range = range.to_range(self.len().byte());
477        Some(self.strs(range.clone())?.chars().map(move |char| {
478            let byte = range.start;
479            range.start += char.len_utf8();
480            (byte, char)
481        }))
482    }
483
484    /// A reverse iterator of the [`char`]s in [`Bytes`]
485    ///
486    /// Each [`char`] will be accompanied by a byte index, which is
487    /// the position where said character starts, e.g. `0` for the
488    /// first character.
489    #[track_caller]
490    pub fn chars_rev(
491        &self,
492        range: impl TextRange,
493    ) -> Option<impl Iterator<Item = (usize, char)> + '_> {
494        let mut range = range.to_range(self.len().byte());
495        Some(self.strs(range.clone())?.chars().rev().map(move |char| {
496            range.end -= char.len_utf8();
497            (range.end, char)
498        }))
499    }
500
501    /// Gets the indentation level on the current line
502    pub fn indent(&self, p: Point, opts: PrintOpts) -> usize {
503        let range = self.line_range(p.line());
504        self.chars_fwd(range.start..)
505            .unwrap()
506            .map_while(|(_, c)| match c {
507                ' ' => Some(1),
508                '\t' => Some(opts.tabstop as usize),
509                _ => None,
510            })
511            .sum()
512    }
513
514    ////////// Modification functions
515
516    /// Replaces a [`TextRange`] with a `&str`
517    ///
518    /// If you want to apply a [`Change`] to the `Bytes` this way, you
519    /// can use [`Change::taken_range`] as the `TextRange`, and
520    /// [`Change::added_str`] as the replacement text.
521    pub fn replace_range(&mut self, range: impl TextRange, new: impl AsRef<str>) {
522        let edit = new.as_ref();
523        let range = range.to_range(self.len().byte());
524
525        let start = self.point_at_byte(range.start);
526        let taken_len = self.point_at_byte(range.end) - start;
527        let added_len = Point::len_of(edit);
528
529        self.buf.splice(range, edit.bytes());
530
531        let start_rec = [start.byte(), start.char(), start.line()];
532        let old_len = [taken_len.byte(), taken_len.char(), taken_len.line()];
533        let new_len = [added_len.byte(), added_len.char(), added_len.line()];
534
535        self.records.transform(start_rec, old_len, new_len);
536        self.records.insert(start_rec);
537    }
538
539    /// Applies a [`Change`] to the [`GapBuffer`] within
540    pub(crate) fn apply_change(&mut self, change: Change<&str>) {
541        let edit = change.added_str();
542        let start = change.start();
543
544        let range = start.byte()..change.taken_end().byte();
545        self.buf.splice(range, edit.bytes());
546
547        let start_rec = [start.byte(), start.char(), start.line()];
548        let old_len = [
549            change.taken_end().byte() - start.byte(),
550            change.taken_end().char() - start.char(),
551            change.taken_end().line() - start.line(),
552        ];
553        let new_len = [
554            change.added_end().byte() - start.byte(),
555            change.added_end().char() - start.char(),
556            change.added_end().line() - start.line(),
557        ];
558
559        self.records.transform(start_rec, old_len, new_len);
560        self.records.insert(start_rec);
561    }
562
563    /// Adds a record in the given position
564    #[track_caller]
565    pub(crate) fn add_record(&mut self, [b, c, l]: [usize; 3]) {
566        self.records.insert([b, c, l]);
567    }
568
569    ////////// One str functions
570
571    /// Tries to get a contiguous [`&str`] from the [`Bytes`]
572    ///
573    /// Returns [`None`] if the gap of the inner buffer was within the
574    /// given range *OR*.
575    ///
576    /// [`&str`]: str
577    pub fn get_contiguous(&self, range: impl TextRange) -> Option<&str> {
578        let range = range.to_range(self.len().byte());
579        let [s0, s1] = self.strs_inner(..).unwrap();
580
581        if range.end <= self.buf.gap() {
582            s0.get(range)
583        } else {
584            let gap = self.buf.gap();
585            s1.get(range.start.checked_sub(gap)?..range.end.checked_sub(gap)?)
586        }
587    }
588}
589
590/// A [`Lender`] over the lines on [`Bytes`]
591///
592/// The reason for this being a [`Lender`], rather than a regular
593/// [`Iterator`] is because the [`Bytes`] use a [`GapBuffer`] within,
594/// which means that any line may be split in two. In order to still
595/// return it as an `&str`, a new [`String`] needs to be allocated,
596/// which will be owned by the [`Lines`], hence the [`Lender`] trait.
597pub struct Lines<'b> {
598    lines: [std::str::Lines<'b>; 2],
599    split_line: Option<String>,
600    fwd_i: usize,
601    rev_i: usize,
602    split_line_used: bool,
603}
604
605impl<'b> Lines<'b> {
606    fn new(
607        lines: [std::str::Lines<'b>; 2],
608        split_line: Option<String>,
609        fwd_i: usize,
610        rev_i: usize,
611    ) -> Self {
612        Self {
613            lines,
614            split_line,
615            fwd_i,
616            rev_i,
617            split_line_used: false,
618        }
619    }
620}
621
622impl<'b, 'text> Lending<'b> for Lines<'text> {
623    type Lend = (usize, &'b str);
624}
625
626impl<'b> Lender for Lines<'b> {
627    fn next(&mut self) -> Option<lender::Lend<'_, Self>> {
628        self.lines[0]
629            .next()
630            .or_else(|| {
631                if self.split_line_used {
632                    None
633                } else {
634                    self.split_line_used = true;
635                    self.split_line.as_deref()
636                }
637            })
638            .or_else(|| self.lines[1].next())
639            .map(|line| {
640                self.fwd_i += 1;
641                (self.fwd_i - 1, line)
642            })
643    }
644
645    fn size_hint(&self) -> (usize, Option<usize>) {
646        (self.rev_i - self.fwd_i, Some(self.rev_i - self.fwd_i))
647    }
648}
649
650impl<'b> DoubleEndedLender for Lines<'b> {
651    fn next_back(&mut self) -> Option<lender::Lend<'_, Self>> {
652        self.lines[1]
653            .next_back()
654            .or_else(|| {
655                if self.split_line_used {
656                    None
657                } else {
658                    self.split_line_used = true;
659                    self.split_line.as_deref()
660                }
661            })
662            .or_else(|| self.lines[0].next_back())
663            .map(|line| {
664                self.rev_i -= 1;
665                (self.rev_i, line)
666            })
667    }
668}
669
670impl<'b> ExactSizeLender for Lines<'b> {}
671
672/// An [`Iterator`] over the bytes in a [`Text`]
673///
674/// [`Text`]: super::Text
675#[derive(Clone)]
676pub struct Slices<'b>(pub(super) [std::slice::Iter<'b, u8>; 2]);
677
678impl<'b> Slices<'b> {
679    /// Converts this [`Iterator`] into an array of its two parts
680    pub fn to_array(&self) -> [&'b [u8]; 2] {
681        self.0.clone().map(|iter| iter.as_slice())
682    }
683
684    /// Tries to create a [`String`] out of the two buffers
685    ///
686    /// # Errors
687    ///
688    /// This function will return an error if the bounds of the slices
689    /// don't correspond to utf8 character boundaries, or if the gap
690    /// within these slices doesn't correspond to a utf8 character
691    /// boundary.
692    pub fn try_to_string(self) -> Result<String, Utf8Error> {
693        let [s0, s1] = self.0.map(|arr| arr.as_slice());
694        Ok([str::from_utf8(s0)?, str::from_utf8(s1)?].join(""))
695    }
696
697    /// Treats the inner slices as `&str`s and iterates over their
698    /// characters
699    ///
700    /// You will want to use this function iff you don't want to check
701    /// for character boundaries at the edges (very rarely). Otherwise
702    /// [`bytes.strs({byte_range}).chars()`] instead.
703    ///
704    /// # Safety
705    ///
706    /// You must ensure that the `Slices` were acquired from valid
707    /// byte ranges which coincide with character terminations. If you
708    /// are unsure of that, you should use [`Strs::chars`] instead.
709    ///
710    /// [`bytes.strs({byte_range}).chars()`]: Bytes::strs
711    pub unsafe fn chars_unchecked(self) -> impl Iterator<Item = char> {
712        self.0
713            .into_iter()
714            .flat_map(|iter| unsafe { str::from_utf8_unchecked(iter.as_slice()) }.chars())
715    }
716}
717
718impl<'b> Iterator for Slices<'b> {
719    type Item = u8;
720
721    fn next(&mut self) -> Option<Self::Item> {
722        self.0[0].next().or_else(|| self.0[1].next()).copied()
723    }
724
725    fn size_hint(&self) -> (usize, Option<usize>) {
726        let (l0, u0) = self.0[0].size_hint();
727        let (l1, u1) = self.0[1].size_hint();
728        (l0 + l1, Some(u0.unwrap() + u1.unwrap()))
729    }
730}
731
732impl<'b> ExactSizeIterator for Slices<'b> {}
733
734impl<'b> DoubleEndedIterator for Slices<'b> {
735    fn next_back(&mut self) -> Option<Self::Item> {
736        self.0[1]
737            .next_back()
738            .or_else(|| self.0[0].next_back())
739            .copied()
740    }
741}
742
743/// Given a first byte, determines how many bytes are in this UTF-8
744/// character.
745#[must_use]
746#[inline]
747pub const fn utf8_char_width(b: u8) -> usize {
748    // https://tools.ietf.org/html/rfc3629
749    const UTF8_CHAR_WIDTH: &[u8; 256] = &[
750        // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
751        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
752        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
753        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
754        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
755        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
756        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
757        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
758        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
759        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
760        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
761        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
762        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
763        0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
764        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
765        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
766        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
767    ];
768    UTF8_CHAR_WIDTH[b as usize] as usize
769}
770
771impl Eq for Bytes {}
772implPartialEq!(bytes: Bytes, other: Bytes, {
773    let (l_s0, l_s1) = bytes.buf.as_slices();
774    let (r_s0, r_s1) = other.buf.as_slices();
775    (l_s0.len() + l_s1.len() == r_s0.len() + r_s1.len()) && l_s0.iter().chain(l_s1).eq(r_s0.iter().chain(r_s1))
776});
777implPartialEq!(bytes: Bytes, other: &str, {
778    let [s0, s1] = bytes.strs_inner(..).unwrap();
779    other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
780});
781implPartialEq!(bytes: Bytes, other: String, bytes == &&other.as_str());
782implPartialEq!(str: &str, other: Bytes, other == *str);
783implPartialEq!(string: String, other: Bytes, other == *string);
784
785impl Eq for Strs<'_> {}
786implPartialEq!(strs: Strs<'_>, other: Strs<'_>, {
787    let [l_s0, l_s1] = strs.to_array();
788    let [r_s0, r_s1] = other.to_array();
789    (l_s0.len() + l_s1.len() == r_s0.len() + r_s1.len()) && l_s0.bytes().chain(l_s1.bytes()).eq(r_s0.bytes().chain(r_s1.bytes()))
790});
791implPartialEq!(strs: Strs<'_>, other: &str, {
792    let [s0, s1] = strs.to_array();
793    other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
794});
795implPartialEq!(strs: Strs<'_>, other: String, strs == &&other.as_str());
796implPartialEq!(str: &str, other: Strs<'_>, other == *str);
797implPartialEq!(string: String, other: Strs<'_>, other == *string);
798
799/// Implements [`From<$T>`] for [`Bytes`] where `$T: ToString`
800macro_rules! implFromToString {
801    ($T:ty) => {
802        impl From<$T> for Bytes {
803            fn from(value: $T) -> Self {
804                let string = <$T as ToString>::to_string(&value);
805                Bytes::new(&string)
806            }
807        }
808    };
809}
810
811implFromToString!(u8);
812implFromToString!(u16);
813implFromToString!(u32);
814implFromToString!(u64);
815implFromToString!(u128);
816implFromToString!(usize);
817implFromToString!(i8);
818implFromToString!(i16);
819implFromToString!(i32);
820implFromToString!(i64);
821implFromToString!(i128);
822implFromToString!(isize);
823implFromToString!(f32);
824implFromToString!(f64);
825implFromToString!(char);
826implFromToString!(&str);
827implFromToString!(String);
828implFromToString!(Box<str>);
829implFromToString!(std::rc::Rc<str>);
830implFromToString!(std::sync::Arc<str>);
831implFromToString!(std::borrow::Cow<'_, str>);
832implFromToString!(std::io::Error);
833implFromToString!(Box<dyn std::error::Error>);
834
835impl From<std::path::PathBuf> for Bytes {
836    fn from(value: std::path::PathBuf) -> Self {
837        let value = value.to_string_lossy();
838        Self::from(value)
839    }
840}
841
842impl From<&std::path::Path> for Bytes {
843    fn from(value: &std::path::Path) -> Self {
844        let value = value.to_string_lossy();
845        Self::from(value)
846    }
847}
848
849impl std::fmt::Debug for Bytes {
850    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
851        f.debug_struct("Bytes")
852            .field("buf", &self.strs_inner(..))
853            .field("records", &self.records)
854            .finish()
855    }
856}
duat_core/text/bytes/mod.rs

duat_core/text/bytes/
mod.rs