Skip to main content

duat_core/text/strs/
buf.rs

1//! The equivalent of [`String`] for Duat.
2//!
3//! You can't access this struct directly from Duat's API. This is
4//! because it should only be modified alongside a [`Text`], which
5//! would prevent the desynchronization of state with the
6//! [`InnerTags`].
7//!
8//! Instead, you're only ever meant to access this through the
9//! [`Strs`] struct, which is duat's equivalent of [`str`].
10//!
11//! [`Text`]: crate::text::Text
12//! [`InnerTags`]: crate::text::InnerTags
13use gap_buf::GapBuffer;
14
15use crate::{
16    buffer::Change,
17    text::{
18        strs::{Strs, line_ranges::LineRanges},
19        utils::implPartialEq,
20    },
21};
22
23/// The bytes of a [`Text`], encoded in UTF-8
24///
25/// [`Text`]: crate::text::Text
26#[derive(Default, Clone, bincode::Decode, bincode::Encode)]
27pub struct StrsBuf {
28    pub(super) gapbuf: GapBuffer<u8>,
29    pub(super) line_ranges: LineRanges,
30    version: u64,
31}
32
33impl StrsBuf {
34    /// Returns a new instance of [`StrsBuf`]
35    ///
36    /// Not intended for public use, it is necessary in duat
37    #[doc(hidden)]
38    #[track_caller]
39    pub(crate) fn new(string: String) -> Self {
40        assert!(
41            string.len() <= u32::MAX as usize,
42            "For now, you can't have a Text larger than u32::MAX"
43        );
44        let buf = GapBuffer::from(string.into_bytes());
45
46        let slices = unsafe {
47            let (s0, s1) = buf.as_slices();
48            [str::from_utf8_unchecked(s0), str::from_utf8_unchecked(s1)]
49        };
50
51        let records = LineRanges::new(slices);
52
53        let mut buf = Self {
54            gapbuf: buf,
55            line_ranges: records,
56            version: 0,
57        };
58
59        if buf.bytes().next_back().is_none_or(|b| b != b'\n') {
60            let end = buf.end_point();
61            buf.apply_change(Change::str_insert("\n", end));
62        }
63
64        buf
65    }
66
67    ////////// Modification functions
68
69    /// Applies a [`Change`] to the [`GapBuffer`] within
70    #[track_caller]
71    pub(crate) fn apply_change(&mut self, change: Change<&str>) {
72        assert!(
73            self.len() + change.added_str().len() - change.taken_str().len() <= u32::MAX as usize,
74            "For now, you can't have a Text larger than u32::MAX"
75        );
76
77        assert_utf8_boundary(self, change.start().byte());
78        assert_utf8_boundary(self, change.taken_end().byte());
79
80        let edit = change.added_str();
81        let start = change.start();
82
83        let range = start.byte()..change.taken_end().byte();
84        self.gapbuf.splice(range, edit.bytes());
85
86        let start_rec = [start.byte(), start.char(), start.line()];
87        let old_len = [
88            change.taken_end().byte() - start.byte(),
89            change.taken_end().char() - start.char(),
90            change.taken_end().line() - start.line(),
91        ];
92        let new_len = [
93            change.added_end().byte() - start.byte(),
94            change.added_end().char() - start.char(),
95            change.added_end().line() - start.line(),
96        ];
97
98        let array = unsafe {
99            let (s0, s1) = self.gapbuf.as_slices();
100            [str::from_utf8_unchecked(s0), str::from_utf8_unchecked(s1)]
101        };
102
103        self.line_ranges
104            .transform(start_rec, old_len, new_len, array);
105    }
106
107    /// Increment the version of the `StrsBuf` by 1
108    pub fn increment_version(&mut self) {
109        self.version += 1;
110    }
111
112    /// Get the current version of the `StrsBuf`
113    pub fn version(&self) -> u64 {
114        self.version
115    }
116}
117
118impl std::ops::Deref for StrsBuf {
119    type Target = Strs;
120
121    fn deref(&self) -> &Self::Target {
122        Strs::new(self, 0, self.gapbuf.len() as u32)
123    }
124}
125
126/// Given a first byte, determines how many bytes are in this UTF-8
127/// character.
128#[must_use]
129#[inline]
130pub const fn utf8_char_width(b: u8) -> usize {
131    // https://tools.ietf.org/html/rfc3629
132    const UTF8_CHAR_WIDTH: &[u8; 256] = &[
133        // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
134        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
135        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
136        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
137        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
138        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
139        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
140        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
141        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
142        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
143        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
144        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
145        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
146        0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
147        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
148        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
149        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
150    ];
151    UTF8_CHAR_WIDTH[b as usize] as usize
152}
153
154impl Eq for StrsBuf {}
155implPartialEq!(bytes: StrsBuf, other: StrsBuf, {
156    let (l_s0, l_s1) = bytes.gapbuf.as_slices();
157    let (r_s0, r_s1) = other.gapbuf.as_slices();
158    (l_s0.len() + l_s1.len() == r_s0.len() + r_s1.len()) && l_s0.iter().chain(l_s1).eq(r_s0.iter().chain(r_s1))
159});
160implPartialEq!(bytes: StrsBuf, other: &str, {
161    let [s0, s1] = bytes.to_array();
162    other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
163});
164implPartialEq!(bytes: StrsBuf, other: String, bytes == &&other.as_str());
165implPartialEq!(str: &str, other: StrsBuf, other == *str);
166implPartialEq!(string: String, other: StrsBuf, other == *string);
167
168impl Eq for &Strs {}
169implPartialEq!(strs: &Strs, other: &Strs, {
170    let [l_s0, l_s1] = strs.to_array();
171    let [r_s0, r_s1] = other.to_array();
172    (l_s0.len() + l_s1.len() == r_s0.len() + r_s1.len()) && l_s0.bytes().chain(l_s1.bytes()).eq(r_s0.bytes().chain(r_s1.bytes()))
173});
174implPartialEq!(strs: &Strs, other: &str, {
175    let [s0, s1] = strs.to_array();
176    other.len() == s0.len() + s1.len() && &other[..s0.len()] == s0 && &other[s0.len()..] == s1
177});
178implPartialEq!(strs: &Strs, other: String, strs == &&other.as_str());
179implPartialEq!(str: &str, other: &Strs, other == *str);
180implPartialEq!(string: String, other: &Strs, other == *string);
181
182/// Implements [`From<$T>`] for [`StrsBuf`] where `$T: ToString`
183macro_rules! implFromToString {
184    ($T:ty) => {
185        impl From<$T> for StrsBuf {
186            fn from(value: $T) -> Self {
187                StrsBuf::new(<$T as ToString>::to_string(&value))
188            }
189        }
190    };
191}
192
193implFromToString!(u8);
194implFromToString!(u16);
195implFromToString!(u32);
196implFromToString!(u64);
197implFromToString!(u128);
198implFromToString!(usize);
199implFromToString!(i8);
200implFromToString!(i16);
201implFromToString!(i32);
202implFromToString!(i64);
203implFromToString!(i128);
204implFromToString!(isize);
205implFromToString!(f32);
206implFromToString!(f64);
207implFromToString!(char);
208implFromToString!(&str);
209implFromToString!(String);
210implFromToString!(Box<str>);
211implFromToString!(std::rc::Rc<str>);
212implFromToString!(std::sync::Arc<str>);
213implFromToString!(std::borrow::Cow<'_, str>);
214implFromToString!(std::io::Error);
215implFromToString!(Box<dyn std::error::Error>);
216
217impl From<std::path::PathBuf> for StrsBuf {
218    fn from(value: std::path::PathBuf) -> Self {
219        let value = value.to_string_lossy();
220        Self::from(value)
221    }
222}
223
224impl From<&std::path::Path> for StrsBuf {
225    fn from(value: &std::path::Path) -> Self {
226        let value = value.to_string_lossy();
227        Self::from(value)
228    }
229}
230
231impl std::fmt::Debug for StrsBuf {
232    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233        f.debug_struct("StrsBuf")
234            .field("buf", &self[..].to_array())
235            .field("records", &self.line_ranges)
236            .finish()
237    }
238}
239
240#[track_caller]
241pub fn assert_utf8_boundary(bytes: &StrsBuf, idx: usize) {
242    assert!(
243        bytes
244            .gapbuf
245            .get(idx)
246            .is_none_or(|b| utf8_char_width(*b) != 0),
247        "byte index {} is not a valid char boundary; it is inside '{}'",
248        idx,
249        {
250            let (n, len) = bytes
251                .gapbuf
252                .range(..idx)
253                .iter()
254                .rev()
255                .enumerate()
256                .find_map(|(i, &b)| (utf8_char_width(b) != 0).then_some((i, utf8_char_width(b))))
257                .unwrap();
258
259            String::from_utf8(
260                bytes
261                    .gapbuf
262                    .range(idx - (n + 1)..idx - (n + 1) + len)
263                    .iter()
264                    .copied()
265                    .collect(),
266            )
267            .unwrap()
268        }
269    );
270}