Skip to main content

duat_core/text/
utils.rs

1//! Convenience operations for the [`Text`].
2//!
3//! These include the [`Point`] struct and traits that are meant to
4//! take many kinds of inputs, like the [`TwoPoints`], which is meant
5//! to interpret up to 2 [`Point`]s as a real and ghost position in
6//! the [`Text`].
7//!
8//! [`Text`]: super::Text
9use std::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
10
11use bincode::{Decode, Encode};
12
13macro_rules! implPartialEq {
14    ($self:ident: $Self:ty, $other:ident: $Other:ty, $($impl:tt)+) => {
15        impl PartialEq<$Other> for $Self {
16            fn eq(&self, other: &$Other) -> bool {
17                let ($self, $other) = (&self, other);
18                $($impl)+
19            }
20        }
21    }
22}
23
24pub(super) use implPartialEq;
25
26macro_rules! implTextRange {
27    ($range:ident, $r:ident, $max:ident, $sb:expr, $eb:expr, $sp:expr, $ep:expr) => {
28        impl TextRange for $range<usize> {
29            #[track_caller]
30            fn to_range(self, max: usize) -> Range<usize> {
31                let $max = max;
32                let $r = self;
33                $crate::utils::get_range($sb..$eb, max)
34            }
35
36            fn try_to_range(self, max: usize) -> Option<Range<usize>> {
37                let $max = max;
38                let $r = self;
39                $crate::utils::try_get_range($sb..$eb, max)
40            }
41        }
42
43        impl TextRange for $range<Point> {
44            #[track_caller]
45            fn to_range(self, max: usize) -> Range<usize> {
46                let $max = max;
47                let $r = self;
48                $crate::utils::get_range($sp..$ep, max)
49            }
50
51            fn try_to_range(self, max: usize) -> Option<Range<usize>> {
52                let $max = max;
53                let $r = self;
54                $crate::utils::try_get_range($sp..$ep, max)
55            }
56        }
57    };
58}
59
60macro_rules! implTextRangeOrIndex {
61    ($range:ident) => {
62        impl TextRangeOrIndex for $range<usize> {
63            #[track_caller]
64            fn to_range(self, max: usize) -> Range<usize> {
65                TextRange::to_range(self, max)
66            }
67        }
68
69        impl TextRangeOrIndex for $range<Point> {
70            #[track_caller]
71            fn to_range(self, max: usize) -> Range<usize> {
72                TextRange::to_range(self, max)
73            }
74        }
75    };
76}
77
78/// A position in [`Text`].
79///
80/// This position is composed of a byte index, a character index, and
81/// a line index, all from the start of the `Text`.
82///
83/// [`Text`]: super::Text
84#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Encode, Decode)]
85pub struct Point {
86    byte: u32,
87    char: u32,
88    line: u32,
89}
90
91impl Point {
92    /// Returns a new `Point`, at the first byte.
93    pub const fn new() -> Self {
94        Point { byte: 0, char: 0, line: 0 }
95    }
96
97    /// A `Point` from raw indices.
98    pub const fn from_raw(b: usize, c: usize, l: usize) -> Self {
99        let (b, c, l) = (b as u32, c as u32, l as u32);
100        Self { byte: b, char: c, line: l }
101    }
102
103    /// Returns a new [`TwoPoints`] that includes the [`Ghost`]s in
104    /// the same byte, if there is one.
105    ///
106    /// [`Ghost`]: super::Ghost
107    pub const fn to_two_points_before(self) -> TwoPoints {
108        TwoPoints::new_before_ghost(self)
109    }
110
111    /// Returns a new [`TwoPoints`] that skips the [`Ghost`]s in the
112    /// same byte, if there is one.
113    ///
114    /// [`Ghost`]: super::Ghost
115    pub const fn to_two_points_after(self) -> TwoPoints {
116        TwoPoints::new_after_ghost(self)
117    }
118
119    ////////// Querying functions
120
121    /// The len [`Point`] of a [`&str`].
122    ///
123    /// This is the equivalent of [`Strs::end_point`], but for types
124    /// other than [`Text`]
125    ///
126    /// [`&str`]: str
127    /// [`Strs::end_point`]: super::Strs::end_point
128    /// [`Text`]: super::Text
129    pub fn end_point_of(str: impl AsRef<str>) -> Self {
130        let str = str.as_ref();
131        Self {
132            byte: str.len() as u32,
133            char: str.chars().count() as u32,
134            line: str.bytes().filter(|c| *c == b'\n').count() as u32,
135        }
136    }
137
138    /// Returns the byte (relative to the beginning of the buffer)
139    /// of self. Indexed at 0.
140    ///
141    /// You can use byte indices to index the [`Text`], [`Strs`], or
142    /// [`Tags`] with the [`Strs::point_at_byte`] function.
143    ///
144    /// [`Text`]: super::Text
145    /// [`Strs`]: super::Strs
146    /// [`Tags`]: super::Tags
147    /// [`Strs::point_at_byte`]: super::Strs::point_at_byte
148    pub const fn byte(&self) -> usize {
149        self.byte as usize
150    }
151
152    /// Returns the char index (relative to the beginning of the
153    /// buffer). Indexed at 0.
154    pub const fn char(&self) -> usize {
155        self.char as usize
156    }
157
158    /// Returns the line. Indexed at 0.
159    pub const fn line(&self) -> usize {
160        self.line as usize
161    }
162
163    /// Checked [`Point`] subtraction.
164    pub fn checked_sub(self, rhs: Point) -> Option<Point> {
165        Some(Self {
166            byte: self.byte.checked_sub(rhs.byte)?,
167            char: self.char.checked_sub(rhs.char)?,
168            line: self.line.checked_sub(rhs.line)?,
169        })
170    }
171
172    ////////// Shifting functions
173
174    /// Moves a [`Point`] forward by one character.
175    #[inline(always)]
176    pub(crate) const fn fwd(self, char: char) -> Self {
177        Self {
178            byte: self.byte + char.len_utf8() as u32,
179            char: self.char + 1,
180            line: self.line + (char == '\n') as u32,
181        }
182    }
183
184    /// Moves a [`Point`] in reverse by one character.
185    #[inline(always)]
186    pub(crate) const fn rev(self, char: char) -> Self {
187        Self {
188            byte: self.byte - char.len_utf8() as u32,
189            char: self.char - 1,
190            line: self.line - (char == '\n') as u32,
191        }
192    }
193
194    /// Shifts the [`Point`] by a "signed point".
195    ///
196    /// This assumes that no overflow is going to happen
197    pub(crate) const fn shift_by(self, [b, c, l]: [i32; 3]) -> Self {
198        Self {
199            byte: (self.byte as i32 + b) as u32,
200            char: (self.char as i32 + c) as u32,
201            line: (self.line as i32 + l) as u32,
202        }
203    }
204
205    /// Returns a signed representation of this [`Point`].
206    ///
207    /// In this representation, the indices 0, 1 and 2 are the byte,
208    /// char and line, respectively.
209    pub(crate) const fn as_signed(self) -> [i32; 3] {
210        [self.byte as i32, self.char as i32, self.line as i32]
211    }
212}
213
214impl std::fmt::Debug for Point {
215    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216        write!(
217            f,
218            "Point {{ b: {}, c: {}, l: {} }}",
219            self.byte, self.char, self.line
220        )
221    }
222}
223
224impl std::fmt::Display for Point {
225    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
226        write!(f, "{}, {}, {}", self.byte, self.char, self.line)
227    }
228}
229
230impl std::ops::Add for Point {
231    type Output = Self;
232
233    fn add(self, rhs: Self) -> Self::Output {
234        Self {
235            byte: self.byte + rhs.byte,
236            char: self.char + rhs.char,
237            line: self.line + rhs.line,
238        }
239    }
240}
241
242impl std::ops::AddAssign for Point {
243    fn add_assign(&mut self, rhs: Self) {
244        *self = *self + rhs;
245    }
246}
247
248impl std::ops::Sub for Point {
249    type Output = Self;
250
251    fn sub(self, rhs: Self) -> Self::Output {
252        Self {
253            byte: self.byte - rhs.byte,
254            char: self.char - rhs.char,
255            line: self.line - rhs.line,
256        }
257    }
258}
259
260impl std::ops::SubAssign for Point {
261    fn sub_assign(&mut self, rhs: Self) {
262        *self = *self - rhs;
263    }
264}
265
266/// A [`Point`] or a `usize`, representing a byte index.
267///
268/// In Duat, [`Point`]s are _usually_ just "thin wrappers" around a
269/// byte index, useful for getting other information about a place in
270/// the [`Text`], but that extra information is normally ignored when
271/// doing internal calculations.
272///
273/// For that reason, Duat allows users to use either [`Point`]s _or_
274/// byte indices in order to index the [`Text`], for convenience's
275/// sake.
276///
277/// [`Text`]: super::Text
278pub trait TextIndex: Clone + Copy + std::fmt::Debug {
279    /// Converts this type into a byte index.
280    fn to_byte_index(self) -> usize;
281}
282
283impl TextIndex for Point {
284    fn to_byte_index(self) -> usize {
285        self.byte()
286    }
287}
288
289impl TextIndex for usize {
290    fn to_byte_index(self) -> usize {
291        self
292    }
293}
294
295/// Ranges that can be used to index the [`Text`].
296///
297/// All of the [ranges] in [`std`] that implement either
298/// [`RangeBounds<usize>`] or [`RangeBounds<Point>`] should work as an
299/// argument. If it implements [`RangeBounds<usize>`], then the
300/// `usize` represents the a byte index in the [`Text`].
301///
302/// [`Text`]: super::Text
303/// [ranges]: std::range
304/// [`RangeBounds<usize>`]: std::ops::RangeBounds
305/// [`RangeBounds<Point>`]: std::ops::RangeBounds
306#[doc(hidden)]
307pub trait TextRange: Clone + std::fmt::Debug {
308    /// A "forward facing range".
309    ///
310    /// If given a single [`usize`]/[`Point`], acts like [`RangeFrom`]
311    fn to_range(self, max: usize) -> Range<usize>;
312
313    /// Tries to get a "forward facing range".
314    ///
315    /// If given a single [`usize`]/[`Point`], acts like [`RangeFrom`]
316    fn try_to_range(self, max: usize) -> Option<Range<usize>>;
317}
318
319implTextRange!(Range, r, _max, r.start, r.end, r.start.byte(), r.end.byte());
320implTextRange!(
321    RangeInclusive,
322    r,
323    _max,
324    *r.start(),
325    r.end() + 1,
326    r.start().byte(),
327    r.end().byte() + 1
328);
329implTextRange!(RangeTo, r, _max, 0, r.end, 0, r.end.byte());
330implTextRange!(RangeToInclusive, r, _max, 0, r.end, 0, r.end.byte());
331implTextRange!(RangeFrom, r, max, r.start, max, r.start.byte(), max);
332
333impl TextRange for RangeFull {
334    fn to_range(self, max: usize) -> Range<usize> {
335        0..max
336    }
337
338    fn try_to_range(self, max: usize) -> Option<Range<usize>> {
339        Some(0..max)
340    }
341}
342
343/// Either a [`TextRange`], a [`usize`] or a [`Point`].
344///
345/// In all cases, they represent a byte index from the start of the
346/// [`Text`]
347///
348/// This trait's purpose is to be used for [`Tag`] removal in the
349/// [`Tags::remove`] and [`Text::remove_tags`] functions. This is
350/// useful in order to reduce the number of functions exposed to API
351/// users.
352///
353/// [`Tag`]: super::Tag
354/// [`Tags::remove`]: super::Tags::remove
355/// [`Text::remove_tags`]: super::Text::remove_tags
356/// [`Text`]: super::Text
357pub trait TextRangeOrIndex {
358    /// Transforms `self` into a [`Range<usize>`]
359    fn to_range(self, max: usize) -> Range<usize>;
360}
361
362impl TextRangeOrIndex for usize {
363    #[track_caller]
364    fn to_range(self, max: usize) -> Range<usize> {
365        crate::utils::get_range(self..self + 1, max)
366    }
367}
368
369impl TextRangeOrIndex for Point {
370    #[track_caller]
371    fn to_range(self, max: usize) -> Range<usize> {
372        crate::utils::get_range(self.byte()..self.byte() + 1, max)
373    }
374}
375
376impl TextRangeOrIndex for RangeFull {
377    fn to_range(self, max: usize) -> Range<usize> {
378        0..max
379    }
380}
381
382implTextRangeOrIndex!(Range);
383implTextRangeOrIndex!(RangeInclusive);
384implTextRangeOrIndex!(RangeTo);
385implTextRangeOrIndex!(RangeToInclusive);
386implTextRangeOrIndex!(RangeFrom);
387
388/// A struct used to exactly pinpoint a position in [`Text`], used
389/// when printing.
390///
391/// This struct has two inner components, a `real` [`Point`], and a
392/// `ghost` [`Option<Point>`]. The second component is used whenever
393/// you want to print a [`Ghost`] `Text`, either fully or partially.
394///
395/// The `ghost` component represents the "sum position" of all
396/// `Ghost`s in that same byte. For example if there are two ghosts in
397/// a single byte, if you pass `ghost == ghost1.len()`, then only the
398/// second ghost will be included in this iteration.
399///
400/// [`TwoPoints::default`] will include the first [`Ghost`].
401///
402/// [`Text`]: super::Text
403/// [`Ghost`]: super::Ghost
404#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Encode, Decode, Hash)]
405pub struct TwoPoints {
406    /// The real `Point` in the [`Text`].
407    ///
408    /// [`Text`]: super::Text
409    pub real: Point,
410    /// A possible point in a [`Ghost`].
411    ///
412    /// A value of [`None`] means that this is either at the end of
413    /// the ghosts at a byte (i.e. this `TwoPoints` represents a real
414    /// character), or this byte index doesn't have any ghosts at all.
415    ///
416    /// A value of [`Some`] means that this `TwoPoints` does _not_
417    /// represent a real character, so it points to a character
418    /// belonging to a [`Ghost`]
419    ///
420    /// If you don't know how to set this value, you should try to use
421    /// the [`new`], [`new_before_ghost`] or [`new_after_ghost`]
422    /// functions.
423    ///
424    /// [`new`]: Self::new
425    /// [`new_before_ghost`]: Self::new_before_ghost
426    /// [`new_after_ghost`]: Self::new_after_ghost
427    /// [`Ghost`]: super::Ghost
428    pub ghost: Option<Point>,
429}
430
431impl TwoPoints {
432    /// Returns a fully qualified `TwoPoints`.
433    ///
434    /// This will include a precise `real` [`Point`] as well as a
435    /// precise `ghost` [`Point`].
436    ///
437    /// If you don't want to deal with ghosts, see
438    /// [`TwoPoints::new_before_ghost`] and
439    /// [`TwoPoints::new_after_ghost`].
440    pub const fn new(real: Point, ghost: Point) -> Self {
441        Self { real, ghost: Some(ghost) }
442    }
443
444    /// Returns a new `TwoPoints` that will include the [`Ghost`]
445    /// before the real [`Point`].
446    ///
447    /// [`Ghost`]: super::Ghost
448    pub const fn new_before_ghost(real: Point) -> Self {
449        Self { real, ghost: Some(Point::new()) }
450    }
451
452    /// Returns a new `TwoPoints` that will exclude the [`Ghost`]
453    /// before the real [`Point`].
454    ///
455    /// [`Ghost`]: super::Ghost
456    pub const fn new_after_ghost(real: Point) -> Self {
457        Self { real, ghost: None }
458    }
459}
460
461impl std::cmp::PartialOrd for TwoPoints {
462    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
463        Some(self.cmp(other))
464    }
465}
466
467impl Ord for TwoPoints {
468    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
469        match self.real.cmp(&other.real) {
470            core::cmp::Ordering::Equal => {}
471            ord => return ord,
472        }
473        match (&self.ghost, &other.ghost) {
474            (Some(l), Some(r)) => l.cmp(r),
475            (Some(_), None) => std::cmp::Ordering::Less,
476            (None, Some(_)) => std::cmp::Ordering::Greater,
477            (None, None) => std::cmp::Ordering::Equal,
478        }
479    }
480}
481
482/// Given a first byte, determines how many bytes are in this
483/// UTF-8 character.
484#[inline]
485pub const fn utf8_char_width(b: u8) -> u32 {
486    // https://tools.ietf.org/html/rfc3629
487    const UTF8_CHAR_WIDTH: &[u8; 256] = &[
488        // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
489        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
490        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
491        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
492        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
493        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
494        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
495        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
496        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
497        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
498        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
499        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
500        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
501        0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
502        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
503        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
504        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
505    ];
506
507    UTF8_CHAR_WIDTH[b as usize] as u32
508}