duat_core/text/
ops.rs

1//! Convenience operations for the [`Text`]
2//!
3//! These include the [`Point`] struct and traits that are meant to
4//! take many kinds of inputs, like the [`TwoPoints`], which is meant
5//! to interpret up to 2 [`Point`]s as a real and ghost position in
6//! the [`Text`].
7//!
8//! [`Text`]: super::Text
9use std::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
10
11use bincode::{Decode, Encode};
12
13/// A position in [`Text`]
14///
15/// [`Text`]: super::Text
16#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Encode, Decode)]
17pub struct Point {
18    byte: u32,
19    char: u32,
20    line: u32,
21}
22
23impl Point {
24    /// Returns a new [`Point`], at the first byte
25    pub const fn new() -> Self {
26        Point { byte: 0, char: 0, line: 0 }
27    }
28
29    /// Internal function to create [`Point`]s
30    pub(super) const fn from_raw(b: usize, c: usize, l: usize) -> Self {
31        let (b, c, l) = (b as u32, c as u32, l as u32);
32        Self { byte: b, char: c, line: l }
33    }
34
35    /// Returns a new [`TwoPoints`] that includes the [`Ghost`]s in
36    /// the same byte, if there is one
37    ///
38    /// [`Ghost`]: super::Ghost
39    pub const fn to_two_points_before(self) -> TwoPoints {
40        TwoPoints::new_before_ghost(self)
41    }
42
43    /// Returns a new [`TwoPoints`] that skips the [`Ghost`]s in the
44    /// same byte, if there is one
45    ///
46    /// [`Ghost`]: super::Ghost
47    pub const fn to_two_points_after(self) -> TwoPoints {
48        TwoPoints::new_after_ghost(self)
49    }
50
51    ////////// Querying functions
52
53    /// The len [`Point`] of a [`&str`]
54    ///
55    /// This is the equivalent of [`Text::len`], but for types
56    /// other than [`Text`]
57    ///
58    /// [`&str`]: str
59    /// [`Text::len`]: super::Bytes::len
60    /// [`Text`]: super::Text
61    pub fn len_of(str: impl AsRef<str>) -> Self {
62        let str = str.as_ref();
63        Self {
64            byte: str.len() as u32,
65            char: str.chars().count() as u32,
66            line: str.bytes().filter(|c| *c == b'\n').count() as u32,
67        }
68    }
69
70    /// Returns the byte (relative to the beginning of the buffer)
71    /// of self. Indexed at 0
72    ///
73    /// You can use byte indices to index the [`Text`] or [`Bytes`]
74    /// with the [`Bytes::point_at_byte`] function.
75    ///
76    /// [`Text`]: super::Text
77    /// [`Bytes`]: super::Bytes
78    /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
79    pub const fn byte(&self) -> usize {
80        self.byte as usize
81    }
82
83    /// Returns the char index (relative to the beginning of the
84    /// buffer). Indexed at 0
85    ///
86    /// This is the primary value used when indexing the [`Text`] and
87    /// [`Bytes`]. That is, the [`Bytes::point_at_byte`],
88    /// [`Bytes::strs`], and most other [`Bytes`] functions rely
89    /// on a character indices (or [`Point`]s) for indexing a
90    /// [`Text`].
91    ///
92    /// [`Text`]: super::Text
93    /// [`Bytes`]: super::Bytes
94    /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
95    /// [`Bytes::strs`]: super::Bytes::strs
96    pub const fn char(&self) -> usize {
97        self.char as usize
98    }
99
100    /// Returns the line. Indexed at 0
101    ///
102    /// You can use byte indices to index the [`Text`] or [`Bytes`]
103    /// with the [`Bytes::point_at_line`] function.
104    ///
105    /// [`Text`]: super::Text
106    /// [`Bytes`]: super::Bytes
107    /// [`Bytes::point_at_line`]: super::Bytes::point_at_line
108    pub const fn line(&self) -> usize {
109        self.line as usize
110    }
111
112    /// Checked [`Point`] subtraction
113    pub fn checked_sub(self, rhs: Point) -> Option<Point> {
114        Some(Self {
115            byte: self.byte.checked_sub(rhs.byte)?,
116            char: self.char.checked_sub(rhs.char)?,
117            line: self.line.checked_sub(rhs.line)?,
118        })
119    }
120
121    ////////// Shifting functions
122
123    /// Moves a [`Point`] forward by one character
124    #[inline(always)]
125    pub(crate) const fn fwd(self, char: char) -> Self {
126        Self {
127            byte: self.byte + char.len_utf8() as u32,
128            char: self.char + 1,
129            line: self.line + (char == '\n') as u32,
130        }
131    }
132
133    /// Moves a [`Point`] in reverse by one character
134    #[inline(always)]
135    pub(crate) const fn rev(self, char: char) -> Self {
136        Self {
137            byte: self.byte - char.len_utf8() as u32,
138            char: self.char - 1,
139            line: self.line - (char == '\n') as u32,
140        }
141    }
142
143    /// Shifts the [`Point`] by a "signed point"
144    ///
145    /// This assumes that no overflow is going to happen
146    pub(crate) const fn shift_by(self, [b, c, l]: [i32; 3]) -> Self {
147        Self {
148            byte: (self.byte as i32 + b) as u32,
149            char: (self.char as i32 + c) as u32,
150            line: (self.line as i32 + l) as u32,
151        }
152    }
153
154    /// Returns a signed representation of this [`Point`]
155    ///
156    /// In this representation, the indices 0, 1 and 2 are the byte,
157    /// char and line, respectively.
158    pub(crate) const fn as_signed(self) -> [i32; 3] {
159        [self.byte as i32, self.char as i32, self.line as i32]
160    }
161}
162
163impl std::fmt::Debug for Point {
164    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
165        write!(
166            f,
167            "Point {{ b: {}, c: {}, l: {} }}",
168            self.byte, self.char, self.line
169        )
170    }
171}
172
173impl std::fmt::Display for Point {
174    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
175        write!(f, "{}, {}, {}", self.byte, self.char, self.line)
176    }
177}
178
179impl std::ops::Add for Point {
180    type Output = Self;
181
182    fn add(self, rhs: Self) -> Self::Output {
183        Self {
184            byte: self.byte + rhs.byte,
185            char: self.char + rhs.char,
186            line: self.line + rhs.line,
187        }
188    }
189}
190
191impl std::ops::AddAssign for Point {
192    fn add_assign(&mut self, rhs: Self) {
193        *self = *self + rhs;
194    }
195}
196
197impl std::ops::Sub for Point {
198    type Output = Self;
199
200    fn sub(self, rhs: Self) -> Self::Output {
201        Self {
202            byte: self.byte - rhs.byte,
203            char: self.char - rhs.char,
204            line: self.line - rhs.line,
205        }
206    }
207}
208
209impl std::ops::SubAssign for Point {
210    fn sub_assign(&mut self, rhs: Self) {
211        *self = *self - rhs;
212    }
213}
214
215/// A [`Point`] or a `usize`, representing a byte index
216///
217/// In Duat, [`Point`]s are _usually_ just "thin wrappers" around a
218/// byte index, useful for getting other information about a place in
219/// the [`Text`], but that extra information is normally ignored when
220/// doing internal calculations.
221///
222/// For that reason, Duat allows users to use either [`Point`]s _or_
223/// byte indices in order to index the [`Text`], for convenience's
224/// sake.
225///
226/// [`Text`]: super::Text
227pub trait TextIndex: Clone + Copy + std::fmt::Debug {
228    /// Converts this type into a byte index.
229    fn to_byte_index(self) -> usize;
230}
231
232impl TextIndex for Point {
233    fn to_byte_index(self) -> usize {
234        self.byte()
235    }
236}
237
238impl TextIndex for usize {
239    fn to_byte_index(self) -> usize {
240        self
241    }
242}
243
244/// Ranges that can be used to index the [`Text`]
245///
246/// All of the [ranges] in [`std`] that implement either
247/// [`RangeBounds<usize>`] or [`RangeBounds<Point>`] should work as an
248/// argument. If it implements [`RangeBounds<usize>`], then the
249/// `usize` represents the a byte index in the [`Text`].
250///
251/// [`Text`]: super::Text
252/// [ranges]: std::range
253/// [`RangeBounds<usize>`]: std::ops::RangeBounds
254/// [`RangeBounds<Point>`]: std::ops::RangeBounds
255pub trait TextRange: Clone {
256    /// A "forward facing range"
257    ///
258    /// If given a single [`usize`]/[`Point`], acts like [`RangeFrom`]
259    fn to_range(self, max: usize) -> Range<usize>;
260}
261
262implTextRange!(Range, r, r.start, r.end, r.start.byte(), r.end.byte());
263implTextRange!(
264    RangeInclusive,
265    r,
266    *r.start(),
267    r.end() + 1,
268    r.start().byte(),
269    r.end().byte() + 1
270);
271implTextRange!(RangeTo, r, 0, r.end, 0, r.end.byte());
272implTextRange!(RangeToInclusive, r, 0, r.end, 0, r.end.byte());
273implTextRange!(RangeFrom, r, r.start, MAX, r.start.byte(), MAX);
274
275impl TextRange for RangeFull {
276    fn to_range(self, max: usize) -> Range<usize> {
277        0..max
278    }
279}
280
281/// Either a [`TextRange`], a [`usize`] or a [`Point`]
282///
283/// In all cases, they represent a byte index from the start of the
284/// [`Text`]
285///
286/// This trait's purpose is to be used for [`Tag`] removal in the
287/// [`Tags::remove`] and [`Text::remove_tags`] functions. This is
288/// useful in order to reduce the number of functions exposed to API
289/// users.
290///
291/// [`Tag`]: super::Tag
292/// [`Tags::remove`]: super::Tags::remove
293/// [`Text::remove_tags`]: super::Text::remove_tags
294/// [`Text`]: super::Text
295pub trait TextRangeOrIndex {
296    /// Transforms `self` into a [`Range<usize>`]
297    fn to_range(self, max: usize) -> Range<usize>;
298}
299
300impl TextRangeOrIndex for usize {
301    fn to_range(self, max: usize) -> Range<usize> {
302        max.min(self)..max.min(self + 1)
303    }
304}
305
306impl TextRangeOrIndex for Point {
307    fn to_range(self, max: usize) -> Range<usize> {
308        max.min(self.byte())..max.min(self.byte() + 1)
309    }
310}
311
312impl TextRangeOrIndex for RangeFull {
313    fn to_range(self, max: usize) -> Range<usize> {
314        TextRange::to_range(self, max)
315    }
316}
317
318implTextRangeOrIndex!(Range);
319implTextRangeOrIndex!(RangeInclusive);
320implTextRangeOrIndex!(RangeTo);
321implTextRangeOrIndex!(RangeToInclusive);
322implTextRangeOrIndex!(RangeFrom);
323
324/// A struct used to exactly pinpoint a position in [`Text`], used
325/// when printing
326///
327/// This struct has two inner components, a `real` [`Point`], and a
328/// `ghost` [`Option<Point>`]. The second component is used whenever
329/// you want to print a [`Ghost`] `Text`, either fully or partially.
330///
331/// The `ghost` component represents the "sum position" of all
332/// `Ghost`s in that same byte. For example if there are two ghosts in
333/// a single byte, if you pass `ghost == ghost1.len()`, then only the
334/// second ghost will be included in this iteration.
335///
336/// [`TwoPoints::default`] will include the first [`Ghost`].
337///
338/// [`Text`]: super::Text
339/// [`Ghost`]: super::Ghost
340#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Encode, Decode)]
341pub struct TwoPoints {
342    /// The real `Point` in the [`Text`]
343    ///
344    /// [`Text`]: super::Text
345    pub real: Point = Point::new(),
346    /// A possible point in a [`Ghost`]
347    ///
348    /// [`Ghost`]: super::Ghost
349    pub ghost: Option<Point> = Some(Point::new()),
350}
351
352impl TwoPoints {
353    /// Returns a fully qualified `TwoPoints`
354    ///
355    /// This will include a precise `real` [`Point`] as well as a
356    /// precise `ghost` [`Point`].
357    ///
358    /// If you don't want to deal with ghosts, see
359    /// [`TwoPoints::new_before_ghost`] and
360    /// [`TwoPoints::new_after_ghost`].
361    pub const fn new(real: Point, ghost: Point) -> Self {
362        Self { real, ghost: Some(ghost) }
363    }
364
365    /// Returns a new `TwoPoints` that will include the [`Ghost`]
366    /// before the real [`Point`]
367    ///
368    /// [`Ghost`]: super::Ghost
369    pub const fn new_before_ghost(real: Point) -> Self {
370        Self { real, ghost: Some(Point::new()) }
371    }
372
373    /// Returns a new `TwoPoints` that will exclude the [`Ghost`]
374    /// before the real [`Point`]
375    ///
376    /// [`Ghost`]: super::Ghost
377    pub const fn new_after_ghost(real: Point) -> Self {
378        Self { real, ghost: None }
379    }
380}
381
382impl std::cmp::PartialOrd for TwoPoints {
383    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
384        Some(self.cmp(other))
385    }
386}
387
388impl Ord for TwoPoints {
389    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
390        match self.real.cmp(&other.real) {
391            core::cmp::Ordering::Equal => {}
392            ord => return ord,
393        }
394        match (&self.ghost, &other.ghost) {
395            (Some(l), Some(r)) => l.cmp(r),
396            (Some(_), None) => std::cmp::Ordering::Less,
397            (None, Some(_)) => std::cmp::Ordering::Greater,
398            (None, None) => std::cmp::Ordering::Equal,
399        }
400    }
401}
402
403const MAX: usize = usize::MAX;
404
405macro implTextRange($range:ident, $r:ident, $sb:expr, $eb:expr, $sp:expr, $ep:expr) {
406    impl TextRange for $range<usize> {
407        fn to_range(self, max: usize) -> Range<usize> {
408            let $r = self;
409            max.min($sb)..max.min($eb)
410        }
411    }
412
413    impl TextRange for $range<Point> {
414        fn to_range(self, max: usize) -> Range<usize> {
415            let $r = self;
416            max.min($sp)..max.min($ep)
417        }
418    }
419}
420
421macro implTextRangeOrIndex($range:ident) {
422    impl TextRangeOrIndex for $range<usize> {
423        fn to_range(self, max: usize) -> Range<usize> {
424            TextRange::to_range(self, max)
425        }
426    }
427
428    impl TextRangeOrIndex for $range<Point> {
429        fn to_range(self, max: usize) -> Range<usize> {
430            TextRange::to_range(self, max)
431        }
432    }
433}
434
435/// Given a first byte, determines how many bytes are in this
436/// UTF-8 character
437#[inline]
438pub const fn utf8_char_width(b: u8) -> u32 {
439    // https://tools.ietf.org/html/rfc3629
440    const UTF8_CHAR_WIDTH: &[u8; 256] = &[
441        // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
442        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
443        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
444        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
445        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
446        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
447        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
448        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
449        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
450        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
451        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
452        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
453        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
454        0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
455        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
456        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
457        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
458    ];
459
460    UTF8_CHAR_WIDTH[b as usize] as u32
461}