Skip to main content

duat_core/text/
utils.rs

1//! Convenience operations for the [`Text`].
2//!
3//! These include the [`Point`] struct and traits that are meant to
4//! take many kinds of inputs, like the [`TwoPoints`], which is meant
5//! to interpret up to 2 [`Point`]s as a real and ghost position in
6//! the [`Text`].
7//!
8//! [`Text`]: super::Text
9use std::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
10
11use bincode::{Decode, Encode};
12
13macro_rules! implPartialEq {
14    ($self:ident: $Self:ty, $other:ident: $Other:ty, $($impl:tt)+) => {
15        impl PartialEq<$Other> for $Self {
16            fn eq(&self, other: &$Other) -> bool {
17                let ($self, $other) = (&self, other);
18                $($impl)+
19            }
20        }
21    }
22}
23
24pub(super) use implPartialEq;
25
26use crate::text::Strs;
27
28macro_rules! implTextRange {
29    ($range:ident, $r:ident, $max:ident, $sb:expr, $eb:expr, $sp:expr, $ep:expr) => {
30        impl TextRange for $range<usize> {
31            #[track_caller]
32            fn to_range(self, max: usize) -> Range<usize> {
33                let $max = max;
34                let $r = self;
35                $crate::utils::get_range($sb..$eb, max)
36            }
37
38            fn try_to_range(self, max: usize) -> Option<Range<usize>> {
39                let $max = max;
40                let $r = self;
41                $crate::utils::try_get_range($sb..$eb, max)
42            }
43        }
44
45        impl TextRange for $range<Point> {
46            #[track_caller]
47            fn to_range(self, max: usize) -> Range<usize> {
48                let $max = max;
49                let $r = self;
50                $crate::utils::get_range($sp..$ep, max)
51            }
52
53            fn try_to_range(self, max: usize) -> Option<Range<usize>> {
54                let $max = max;
55                let $r = self;
56                $crate::utils::try_get_range($sp..$ep, max)
57            }
58        }
59    };
60}
61
62macro_rules! implTextRangeOrIndex {
63    ($range:ident) => {
64        impl TextRangeOrIndex for $range<usize> {
65            #[track_caller]
66            fn to_range(self, max: usize) -> Range<usize> {
67                TextRange::to_range(self, max)
68            }
69        }
70
71        impl TextRangeOrIndex for $range<Point> {
72            #[track_caller]
73            fn to_range(self, max: usize) -> Range<usize> {
74                TextRange::to_range(self, max)
75            }
76        }
77    };
78}
79
80/// A position in [`Text`].
81///
82/// This position is composed of a byte index, a character index, and
83/// a line index, all from the start of the `Text`.
84///
85/// [`Text`]: super::Text
86#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Encode, Decode)]
87pub struct Point {
88    byte: u32,
89    char: u32,
90    line: u32,
91}
92
93impl Point {
94    /// Returns a new `Point`, at the first byte.
95    pub const fn new() -> Self {
96        Point { byte: 0, char: 0, line: 0 }
97    }
98
99    /// A `Point` from raw indices.
100    pub const fn from_raw(b: usize, c: usize, l: usize) -> Self {
101        let (b, c, l) = (b as u32, c as u32, l as u32);
102        Self { byte: b, char: c, line: l }
103    }
104
105    /// Returns a new [`TwoPoints`] that includes the [`Inlay`]s in
106    /// the same byte, if there is one.
107    ///
108    /// [`Inlay`]: super::Inlay
109    pub const fn to_two_points_before(self) -> TwoPoints {
110        TwoPoints::new_before_ghost(self)
111    }
112
113    /// Returns a new [`TwoPoints`] that skips the [`Inlay`]s in the
114    /// same byte, if there is one.
115    ///
116    /// [`Inlay`]: super::Inlay
117    pub const fn to_two_points_after(self) -> TwoPoints {
118        TwoPoints::new_after_ghost(self)
119    }
120
121    ////////// Querying functions
122
123    /// The len [`Point`] of a [`&str`].
124    ///
125    /// This is the equivalent of [`Strs::end_point`], but for types
126    /// other than [`Text`]
127    ///
128    /// [`&str`]: str
129    /// [`Strs::end_point`]: super::Strs::end_point
130    /// [`Text`]: super::Text
131    pub fn end_point_of(str: impl AsRef<str>) -> Self {
132        let str = str.as_ref();
133        Self {
134            byte: str.len() as u32,
135            char: str.chars().count() as u32,
136            line: str.bytes().filter(|c| *c == b'\n').count() as u32,
137        }
138    }
139
140    /// Returns the byte (relative to the beginning of the buffer)
141    /// of self. Indexed at 0.
142    ///
143    /// You can use byte indices to index the [`Text`], [`Strs`], or
144    /// [`Tags`] with the [`Strs::point_at_byte`] function.
145    ///
146    /// [`Text`]: super::Text
147    /// [`Strs`]: super::Strs
148    /// [`Tags`]: super::Tags
149    /// [`Strs::point_at_byte`]: super::Strs::point_at_byte
150    pub const fn byte(&self) -> usize {
151        self.byte as usize
152    }
153
154    /// Returns the char index (relative to the beginning of the
155    /// buffer). Indexed at 0.
156    pub const fn char(&self) -> usize {
157        self.char as usize
158    }
159
160    /// Returns the line. Indexed at 0.
161    pub const fn line(&self) -> usize {
162        self.line as usize
163    }
164
165    /// Returns the number of bytes between this `Point` and the start
166    /// of the line.
167    pub fn byte_col(&self, strs: &Strs) -> usize {
168        self.byte() - strs.point_at_coords(self.line(), 0).byte()
169    }
170
171    /// Returns the numbers of utf8 characters between this `Point`
172    /// and the start of the line.
173    ///
174    /// Note that this counts `characters`, which may not align with a
175    /// human conception of what a charcter is.
176    pub fn char_col(&self, strs: &Strs) -> usize {
177        self.char() - strs.point_at_coords(self.line(), 0).char()
178    }
179
180    /// Checked [`Point`] subtraction.
181    pub fn checked_sub(self, rhs: Point) -> Option<Point> {
182        Some(Self {
183            byte: self.byte.checked_sub(rhs.byte)?,
184            char: self.char.checked_sub(rhs.char)?,
185            line: self.line.checked_sub(rhs.line)?,
186        })
187    }
188
189    ////////// Shifting functions
190
191    /// Moves a [`Point`] forward by one character.
192    #[inline(always)]
193    pub(crate) const fn fwd(self, char: char) -> Self {
194        Self {
195            byte: self.byte + char.len_utf8() as u32,
196            char: self.char + 1,
197            line: self.line + (char == '\n') as u32,
198        }
199    }
200
201    /// Moves a [`Point`] in reverse by one character.
202    #[inline(always)]
203    pub(crate) const fn rev(self, char: char) -> Self {
204        Self {
205            byte: self.byte - char.len_utf8() as u32,
206            char: self.char - 1,
207            line: self.line - (char == '\n') as u32,
208        }
209    }
210
211    /// Shifts the [`Point`] by a "signed point".
212    ///
213    /// This assumes that no overflow is going to happen
214    pub(crate) const fn shift_by(self, [b, c, l]: [i32; 3]) -> Self {
215        Self {
216            byte: (self.byte as i32 + b) as u32,
217            char: (self.char as i32 + c) as u32,
218            line: (self.line as i32 + l) as u32,
219        }
220    }
221
222    /// Returns a signed representation of this [`Point`].
223    ///
224    /// In this representation, the indices 0, 1 and 2 are the byte,
225    /// char and line, respectively.
226    pub(crate) const fn as_signed(self) -> [i32; 3] {
227        [self.byte as i32, self.char as i32, self.line as i32]
228    }
229}
230
231impl std::fmt::Debug for Point {
232    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233        write!(
234            f,
235            "Point {{ b: {}, c: {}, l: {} }}",
236            self.byte, self.char, self.line
237        )
238    }
239}
240
241impl std::fmt::Display for Point {
242    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
243        write!(f, "{}, {}, {}", self.byte, self.char, self.line)
244    }
245}
246
247impl std::ops::Add for Point {
248    type Output = Self;
249
250    fn add(self, rhs: Self) -> Self::Output {
251        Self {
252            byte: self.byte + rhs.byte,
253            char: self.char + rhs.char,
254            line: self.line + rhs.line,
255        }
256    }
257}
258
259impl std::ops::AddAssign for Point {
260    fn add_assign(&mut self, rhs: Self) {
261        *self = *self + rhs;
262    }
263}
264
265impl std::ops::Sub for Point {
266    type Output = Self;
267
268    fn sub(self, rhs: Self) -> Self::Output {
269        Self {
270            byte: self.byte - rhs.byte,
271            char: self.char - rhs.char,
272            line: self.line - rhs.line,
273        }
274    }
275}
276
277impl std::ops::SubAssign for Point {
278    fn sub_assign(&mut self, rhs: Self) {
279        *self = *self - rhs;
280    }
281}
282
283/// A [`Point`] or a `usize`, representing a byte index.
284///
285/// In Duat, [`Point`]s are _usually_ just "thin wrappers" around a
286/// byte index, useful for getting other information about a place in
287/// the [`Text`], but that extra information is normally ignored when
288/// doing internal calculations.
289///
290/// For that reason, Duat allows users to use either [`Point`]s _or_
291/// byte indices in order to index the [`Text`], for convenience's
292/// sake.
293///
294/// [`Text`]: super::Text
295#[doc(hidden)]
296pub trait TextIndex: Clone + Copy + std::fmt::Debug {
297    /// Converts this type into a byte index.
298    fn to_byte_index(self) -> usize;
299}
300
301impl TextIndex for Point {
302    fn to_byte_index(self) -> usize {
303        self.byte()
304    }
305}
306
307impl TextIndex for usize {
308    fn to_byte_index(self) -> usize {
309        self
310    }
311}
312
313/// Ranges that can be used to index the [`Text`].
314///
315/// All of the [ranges] in [`std`] that implement either
316/// [`RangeBounds<usize>`] or [`RangeBounds<Point>`] should work as an
317/// argument. If it implements [`RangeBounds<usize>`], then the
318/// `usize` represents the a byte index in the [`Text`].
319///
320/// [`Text`]: super::Text
321/// [ranges]: std::range
322/// [`RangeBounds<usize>`]: std::ops::RangeBounds
323/// [`RangeBounds<Point>`]: std::ops::RangeBounds
324#[doc(hidden)]
325pub trait TextRange: Clone + std::fmt::Debug {
326    /// A "forward facing range".
327    ///
328    /// If given a single [`usize`]/[`Point`], acts like [`RangeFrom`]
329    fn to_range(self, max: usize) -> Range<usize>;
330
331    /// Tries to get a "forward facing range".
332    ///
333    /// If given a single [`usize`]/[`Point`], acts like [`RangeFrom`]
334    fn try_to_range(self, max: usize) -> Option<Range<usize>>;
335}
336
337implTextRange!(Range, r, _max, r.start, r.end, r.start.byte(), r.end.byte());
338implTextRange!(
339    RangeInclusive,
340    r,
341    _max,
342    *r.start(),
343    r.end() + 1,
344    r.start().byte(),
345    r.end().byte() + 1
346);
347implTextRange!(RangeTo, r, _max, 0, r.end, 0, r.end.byte());
348implTextRange!(RangeToInclusive, r, _max, 0, r.end, 0, r.end.byte());
349implTextRange!(RangeFrom, r, max, r.start, max, r.start.byte(), max);
350
351impl TextRange for RangeFull {
352    fn to_range(self, max: usize) -> Range<usize> {
353        0..max
354    }
355
356    fn try_to_range(self, max: usize) -> Option<Range<usize>> {
357        Some(0..max)
358    }
359}
360
361/// Either a [`TextRange`], a [`usize`] or a [`Point`].
362///
363/// In all cases, they represent a byte index from the start of the
364/// [`Text`]
365///
366/// This trait's purpose is to be used for [`Tag`] removal in the
367/// [`Tags::remove`] and [`Text::remove_tags`] functions. This is
368/// useful in order to reduce the number of functions exposed to API
369/// users.
370///
371/// [`Tag`]: super::Tag
372/// [`Tags::remove`]: super::Tags::remove
373/// [`Text::remove_tags`]: super::Text::remove_tags
374/// [`Text`]: super::Text
375#[doc(hidden)]
376pub trait TextRangeOrIndex {
377    /// Transforms `self` into a [`Range<usize>`]
378    fn to_range(self, max: usize) -> Range<usize>;
379}
380
381impl TextRangeOrIndex for usize {
382    #[track_caller]
383    fn to_range(self, max: usize) -> Range<usize> {
384        crate::utils::get_range(self..self + 1, max)
385    }
386}
387
388impl TextRangeOrIndex for Point {
389    #[track_caller]
390    fn to_range(self, max: usize) -> Range<usize> {
391        crate::utils::get_range(self.byte()..self.byte() + 1, max)
392    }
393}
394
395impl TextRangeOrIndex for RangeFull {
396    fn to_range(self, max: usize) -> Range<usize> {
397        0..max
398    }
399}
400
401implTextRangeOrIndex!(Range);
402implTextRangeOrIndex!(RangeInclusive);
403implTextRangeOrIndex!(RangeTo);
404implTextRangeOrIndex!(RangeToInclusive);
405implTextRangeOrIndex!(RangeFrom);
406
407/// A struct used to exactly pinpoint a position in [`Text`], used
408/// when printing.
409///
410/// This struct has two inner components, a `real` [`Point`], and a
411/// `ghost` [`Option<Point>`]. The second component is used whenever
412/// you want to print a [`Inlay`] `Text`, either fully or partially.
413///
414/// The `ghost` component represents the "sum position" of all
415/// `Inlay`s in that same byte. For example if there are two ghosts in
416/// a single byte, if you pass `ghost == ghost1.len()`, then only the
417/// second ghost will be included in this iteration.
418///
419/// [`TwoPoints::default`] will include the first [`Inlay`].
420///
421/// [`Text`]: super::Text
422/// [`Inlay`]: super::Inlay
423#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Encode, Decode, Hash)]
424pub struct TwoPoints {
425    /// The real `Point` in the [`Text`].
426    ///
427    /// [`Text`]: super::Text
428    pub real: Point,
429    /// A possible point in a [`Inlay`].
430    ///
431    /// A value of [`None`] means that this is either at the end of
432    /// the ghosts at a byte (i.e. this `TwoPoints` represents a real
433    /// character), or this byte index doesn't have any ghosts at all.
434    ///
435    /// A value of [`Some`] means that this `TwoPoints` does _not_
436    /// represent a real character, so it points to a character
437    /// belonging to a [`Inlay`]
438    ///
439    /// If you don't know how to set this value, you should try to use
440    /// the [`new`], [`new_before_ghost`] or [`new_after_ghost`]
441    /// functions.
442    ///
443    /// [`new`]: Self::new
444    /// [`new_before_ghost`]: Self::new_before_ghost
445    /// [`new_after_ghost`]: Self::new_after_ghost
446    /// [`Inlay`]: super::Inlay
447    pub ghost: Option<Point>,
448}
449
450impl TwoPoints {
451    /// Returns a fully qualified `TwoPoints`.
452    ///
453    /// This will include a precise `real` [`Point`] as well as a
454    /// precise `ghost` [`Point`].
455    ///
456    /// If you don't want to deal with ghosts, see
457    /// [`TwoPoints::new_before_ghost`] and
458    /// [`TwoPoints::new_after_ghost`].
459    pub const fn new(real: Point, ghost: Point) -> Self {
460        Self { real, ghost: Some(ghost) }
461    }
462
463    /// Returns a new `TwoPoints` that will include the [`Inlay`]
464    /// before the real [`Point`].
465    ///
466    /// [`Inlay`]: super::Inlay
467    pub const fn new_before_ghost(real: Point) -> Self {
468        Self { real, ghost: Some(Point::new()) }
469    }
470
471    /// Returns a new `TwoPoints` that will exclude the [`Inlay`]
472    /// before the real [`Point`].
473    ///
474    /// [`Inlay`]: super::Inlay
475    pub const fn new_after_ghost(real: Point) -> Self {
476        Self { real, ghost: None }
477    }
478}
479
480impl std::cmp::PartialOrd for TwoPoints {
481    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
482        Some(self.cmp(other))
483    }
484}
485
486impl Ord for TwoPoints {
487    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
488        match self.real.cmp(&other.real) {
489            core::cmp::Ordering::Equal => {}
490            ord => return ord,
491        }
492        match (&self.ghost, &other.ghost) {
493            (Some(l), Some(r)) => l.cmp(r),
494            (Some(_), None) => std::cmp::Ordering::Less,
495            (None, Some(_)) => std::cmp::Ordering::Greater,
496            (None, None) => std::cmp::Ordering::Equal,
497        }
498    }
499}
500
501/// Given a first byte, determines how many bytes are in this
502/// UTF-8 character.
503#[inline]
504pub const fn utf8_char_width(b: u8) -> u32 {
505    // https://tools.ietf.org/html/rfc3629
506    const UTF8_CHAR_WIDTH: &[u8; 256] = &[
507        // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
508        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
509        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
510        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
511        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
512        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
513        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
514        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
515        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
516        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
517        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
518        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
519        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
520        0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
521        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
522        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
523        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
524    ];
525
526    UTF8_CHAR_WIDTH[b as usize] as u32
527}