duat_core/text/
utils.rs

1//! Convenience operations for the [`Text`]
2//!
3//! These include the [`Point`] struct and traits that are meant to
4//! take many kinds of inputs, like the [`TwoPoints`], which is meant
5//! to interpret up to 2 [`Point`]s as a real and ghost position in
6//! the [`Text`].
7//!
8//! [`Text`]: super::Text
9use std::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
10
11use bincode::{Decode, Encode};
12
13macro_rules! implPartialEq {
14    ($self:ident: $Self:ty, $other:ident: $Other:ty, $($impl:tt)+) => {
15        impl PartialEq<$Other> for $Self {
16            fn eq(&self, other: &$Other) -> bool {
17                let ($self, $other) = (&self, other);
18                $($impl)+
19            }
20        }
21    }
22}
23
24pub(super) use implPartialEq;
25
26macro_rules! implTextRange {
27    ($range:ident, $r:ident, $sb:expr, $eb:expr, $sp:expr, $ep:expr) => {
28        impl TextRange for $range<usize> {
29            fn to_range(self, max: usize) -> Range<usize> {
30                let $r = self;
31                max.min($sb)..max.min($eb)
32            }
33        }
34
35        impl TextRange for $range<Point> {
36            fn to_range(self, max: usize) -> Range<usize> {
37                let $r = self;
38                max.min($sp)..max.min($ep)
39            }
40        }
41    };
42}
43
44macro_rules! implTextRangeOrIndex {
45    ($range:ident) => {
46        impl TextRangeOrIndex for $range<usize> {
47            fn to_range(self, max: usize) -> Range<usize> {
48                TextRange::to_range(self, max)
49            }
50        }
51
52        impl TextRangeOrIndex for $range<Point> {
53            fn to_range(self, max: usize) -> Range<usize> {
54                TextRange::to_range(self, max)
55            }
56        }
57    };
58}
59
60/// A position in [`Text`]
61///
62/// [`Text`]: super::Text
63#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Encode, Decode)]
64pub struct Point {
65    byte: u32,
66    char: u32,
67    line: u32,
68}
69
70impl Point {
71    /// Returns a new `Point`, at the first byte
72    pub const fn new() -> Self {
73        Point { byte: 0, char: 0, line: 0 }
74    }
75
76    /// A `Point` from raw indices
77    pub const fn from_raw(b: usize, c: usize, l: usize) -> Self {
78        let (b, c, l) = (b as u32, c as u32, l as u32);
79        Self { byte: b, char: c, line: l }
80    }
81
82    /// Returns a new [`TwoPoints`] that includes the [`Ghost`]s in
83    /// the same byte, if there is one
84    ///
85    /// [`Ghost`]: super::Ghost
86    pub const fn to_two_points_before(self) -> TwoPoints {
87        TwoPoints::new_before_ghost(self)
88    }
89
90    /// Returns a new [`TwoPoints`] that skips the [`Ghost`]s in the
91    /// same byte, if there is one
92    ///
93    /// [`Ghost`]: super::Ghost
94    pub const fn to_two_points_after(self) -> TwoPoints {
95        TwoPoints::new_after_ghost(self)
96    }
97
98    ////////// Querying functions
99
100    /// The len [`Point`] of a [`&str`]
101    ///
102    /// This is the equivalent of [`Text::len`], but for types
103    /// other than [`Text`]
104    ///
105    /// [`&str`]: str
106    /// [`Text::len`]: super::Bytes::len
107    /// [`Text`]: super::Text
108    pub fn len_of(str: impl AsRef<str>) -> Self {
109        let str = str.as_ref();
110        Self {
111            byte: str.len() as u32,
112            char: str.chars().count() as u32,
113            line: str.bytes().filter(|c| *c == b'\n').count() as u32,
114        }
115    }
116
117    /// Returns the byte (relative to the beginning of the buffer)
118    /// of self. Indexed at 0
119    ///
120    /// You can use byte indices to index the [`Text`] or [`Bytes`]
121    /// with the [`Bytes::point_at_byte`] function.
122    ///
123    /// [`Text`]: super::Text
124    /// [`Bytes`]: super::Bytes
125    /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
126    pub const fn byte(&self) -> usize {
127        self.byte as usize
128    }
129
130    /// Returns the char index (relative to the beginning of the
131    /// buffer). Indexed at 0
132    ///
133    /// This is the primary value used when indexing the [`Text`] and
134    /// [`Bytes`]. That is, the [`Bytes::point_at_byte`],
135    /// [`Bytes::strs`], and most other [`Bytes`] functions rely
136    /// on a character indices (or [`Point`]s) for indexing a
137    /// [`Text`].
138    ///
139    /// [`Text`]: super::Text
140    /// [`Bytes`]: super::Bytes
141    /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
142    /// [`Bytes::strs`]: super::Bytes::strs
143    pub const fn char(&self) -> usize {
144        self.char as usize
145    }
146
147    /// Returns the line. Indexed at 0
148    ///
149    /// You can use byte indices to index the [`Text`] or [`Bytes`]
150    /// with the [`Bytes::point_at_line`] function.
151    ///
152    /// [`Text`]: super::Text
153    /// [`Bytes`]: super::Bytes
154    /// [`Bytes::point_at_line`]: super::Bytes::point_at_line
155    pub const fn line(&self) -> usize {
156        self.line as usize
157    }
158
159    /// Checked [`Point`] subtraction
160    pub fn checked_sub(self, rhs: Point) -> Option<Point> {
161        Some(Self {
162            byte: self.byte.checked_sub(rhs.byte)?,
163            char: self.char.checked_sub(rhs.char)?,
164            line: self.line.checked_sub(rhs.line)?,
165        })
166    }
167
168    ////////// Shifting functions
169
170    /// Moves a [`Point`] forward by one character
171    #[inline(always)]
172    pub(crate) const fn fwd(self, char: char) -> Self {
173        Self {
174            byte: self.byte + char.len_utf8() as u32,
175            char: self.char + 1,
176            line: self.line + (char == '\n') as u32,
177        }
178    }
179
180    /// Moves a [`Point`] in reverse by one character
181    #[inline(always)]
182    pub(crate) const fn rev(self, char: char) -> Self {
183        Self {
184            byte: self.byte - char.len_utf8() as u32,
185            char: self.char - 1,
186            line: self.line - (char == '\n') as u32,
187        }
188    }
189
190    /// Shifts the [`Point`] by a "signed point"
191    ///
192    /// This assumes that no overflow is going to happen
193    pub(crate) const fn shift_by(self, [b, c, l]: [i32; 3]) -> Self {
194        Self {
195            byte: (self.byte as i32 + b) as u32,
196            char: (self.char as i32 + c) as u32,
197            line: (self.line as i32 + l) as u32,
198        }
199    }
200
201    /// Returns a signed representation of this [`Point`]
202    ///
203    /// In this representation, the indices 0, 1 and 2 are the byte,
204    /// char and line, respectively.
205    pub(crate) const fn as_signed(self) -> [i32; 3] {
206        [self.byte as i32, self.char as i32, self.line as i32]
207    }
208}
209
210impl std::fmt::Debug for Point {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        write!(
213            f,
214            "Point {{ b: {}, c: {}, l: {} }}",
215            self.byte, self.char, self.line
216        )
217    }
218}
219
220impl std::fmt::Display for Point {
221    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222        write!(f, "{}, {}, {}", self.byte, self.char, self.line)
223    }
224}
225
226impl std::ops::Add for Point {
227    type Output = Self;
228
229    fn add(self, rhs: Self) -> Self::Output {
230        Self {
231            byte: self.byte + rhs.byte,
232            char: self.char + rhs.char,
233            line: self.line + rhs.line,
234        }
235    }
236}
237
238impl std::ops::AddAssign for Point {
239    fn add_assign(&mut self, rhs: Self) {
240        *self = *self + rhs;
241    }
242}
243
244impl std::ops::Sub for Point {
245    type Output = Self;
246
247    fn sub(self, rhs: Self) -> Self::Output {
248        Self {
249            byte: self.byte - rhs.byte,
250            char: self.char - rhs.char,
251            line: self.line - rhs.line,
252        }
253    }
254}
255
256impl std::ops::SubAssign for Point {
257    fn sub_assign(&mut self, rhs: Self) {
258        *self = *self - rhs;
259    }
260}
261
262/// A [`Point`] or a `usize`, representing a byte index
263///
264/// In Duat, [`Point`]s are _usually_ just "thin wrappers" around a
265/// byte index, useful for getting other information about a place in
266/// the [`Text`], but that extra information is normally ignored when
267/// doing internal calculations.
268///
269/// For that reason, Duat allows users to use either [`Point`]s _or_
270/// byte indices in order to index the [`Text`], for convenience's
271/// sake.
272///
273/// [`Text`]: super::Text
274pub trait TextIndex: Clone + Copy + std::fmt::Debug {
275    /// Converts this type into a byte index.
276    fn to_byte_index(self) -> usize;
277}
278
279impl TextIndex for Point {
280    fn to_byte_index(self) -> usize {
281        self.byte()
282    }
283}
284
285impl TextIndex for usize {
286    fn to_byte_index(self) -> usize {
287        self
288    }
289}
290
291/// Ranges that can be used to index the [`Text`]
292///
293/// All of the [ranges] in [`std`] that implement either
294/// [`RangeBounds<usize>`] or [`RangeBounds<Point>`] should work as an
295/// argument. If it implements [`RangeBounds<usize>`], then the
296/// `usize` represents the a byte index in the [`Text`].
297///
298/// [`Text`]: super::Text
299/// [ranges]: std::range
300/// [`RangeBounds<usize>`]: std::ops::RangeBounds
301/// [`RangeBounds<Point>`]: std::ops::RangeBounds
302pub trait TextRange: Clone {
303    /// A "forward facing range"
304    ///
305    /// If given a single [`usize`]/[`Point`], acts like [`RangeFrom`]
306    fn to_range(self, max: usize) -> Range<usize>;
307}
308
309implTextRange!(Range, r, r.start, r.end, r.start.byte(), r.end.byte());
310implTextRange!(
311    RangeInclusive,
312    r,
313    *r.start(),
314    r.end() + 1,
315    r.start().byte(),
316    r.end().byte() + 1
317);
318implTextRange!(RangeTo, r, 0, r.end, 0, r.end.byte());
319implTextRange!(RangeToInclusive, r, 0, r.end, 0, r.end.byte());
320implTextRange!(RangeFrom, r, r.start, MAX, r.start.byte(), MAX);
321
322impl TextRange for RangeFull {
323    fn to_range(self, max: usize) -> Range<usize> {
324        0..max
325    }
326}
327
328/// Either a [`TextRange`], a [`usize`] or a [`Point`]
329///
330/// In all cases, they represent a byte index from the start of the
331/// [`Text`]
332///
333/// This trait's purpose is to be used for [`Tag`] removal in the
334/// [`Tags::remove`] and [`Text::remove_tags`] functions. This is
335/// useful in order to reduce the number of functions exposed to API
336/// users.
337///
338/// [`Tag`]: super::Tag
339/// [`Tags::remove`]: super::Tags::remove
340/// [`Text::remove_tags`]: super::Text::remove_tags
341/// [`Text`]: super::Text
342pub trait TextRangeOrIndex {
343    /// Transforms `self` into a [`Range<usize>`]
344    fn to_range(self, max: usize) -> Range<usize>;
345}
346
347impl TextRangeOrIndex for usize {
348    fn to_range(self, max: usize) -> Range<usize> {
349        max.min(self)..max.min(self + 1)
350    }
351}
352
353impl TextRangeOrIndex for Point {
354    fn to_range(self, max: usize) -> Range<usize> {
355        max.min(self.byte())..max.min(self.byte() + 1)
356    }
357}
358
359impl TextRangeOrIndex for RangeFull {
360    fn to_range(self, max: usize) -> Range<usize> {
361        TextRange::to_range(self, max)
362    }
363}
364
365implTextRangeOrIndex!(Range);
366implTextRangeOrIndex!(RangeInclusive);
367implTextRangeOrIndex!(RangeTo);
368implTextRangeOrIndex!(RangeToInclusive);
369implTextRangeOrIndex!(RangeFrom);
370
371/// A struct used to exactly pinpoint a position in [`Text`], used
372/// when printing
373///
374/// This struct has two inner components, a `real` [`Point`], and a
375/// `ghost` [`Option<Point>`]. The second component is used whenever
376/// you want to print a [`Ghost`] `Text`, either fully or partially.
377///
378/// The `ghost` component represents the "sum position" of all
379/// `Ghost`s in that same byte. For example if there are two ghosts in
380/// a single byte, if you pass `ghost == ghost1.len()`, then only the
381/// second ghost will be included in this iteration.
382///
383/// [`TwoPoints::default`] will include the first [`Ghost`].
384///
385/// [`Text`]: super::Text
386/// [`Ghost`]: super::Ghost
387#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Encode, Decode, Hash)]
388pub struct TwoPoints {
389    /// The real `Point` in the [`Text`]
390    ///
391    /// [`Text`]: super::Text
392    pub real: Point,
393    /// A possible point in a [`Ghost`]
394    ///
395    /// A value of [`None`] means that this is either at the end of
396    /// the ghosts at a byte (i.e. this `TwoPoints` represents a real
397    /// character), or this byte index doesn't have any ghosts at all.
398    ///
399    /// A value of [`Some`] means that this `TwoPoints` does _not_
400    /// represent a real character, so it points to a character
401    /// belonging to a [`Ghost`]
402    ///
403    /// If you don't know how to set this value, you should try to use
404    /// the [`new`], [`new_before_ghost`] or [`new_after_ghost`]
405    /// functions.
406    ///
407    /// [`new`]: Self::new
408    /// [`new_before_ghost`]: Self::new_before_ghost
409    /// [`new_after_ghost`]: Self::new_after_ghost
410    /// [`Ghost`]: super::Ghost
411    pub ghost: Option<Point>,
412}
413
414impl TwoPoints {
415    /// Returns a fully qualified `TwoPoints`
416    ///
417    /// This will include a precise `real` [`Point`] as well as a
418    /// precise `ghost` [`Point`].
419    ///
420    /// If you don't want to deal with ghosts, see
421    /// [`TwoPoints::new_before_ghost`] and
422    /// [`TwoPoints::new_after_ghost`].
423    pub const fn new(real: Point, ghost: Point) -> Self {
424        Self { real, ghost: Some(ghost) }
425    }
426
427    /// Returns a new `TwoPoints` that will include the [`Ghost`]
428    /// before the real [`Point`]
429    ///
430    /// [`Ghost`]: super::Ghost
431    pub const fn new_before_ghost(real: Point) -> Self {
432        Self { real, ghost: Some(Point::new()) }
433    }
434
435    /// Returns a new `TwoPoints` that will exclude the [`Ghost`]
436    /// before the real [`Point`]
437    ///
438    /// [`Ghost`]: super::Ghost
439    pub const fn new_after_ghost(real: Point) -> Self {
440        Self { real, ghost: None }
441    }
442}
443
444impl std::cmp::PartialOrd for TwoPoints {
445    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
446        Some(self.cmp(other))
447    }
448}
449
450impl Ord for TwoPoints {
451    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
452        match self.real.cmp(&other.real) {
453            core::cmp::Ordering::Equal => {}
454            ord => return ord,
455        }
456        match (&self.ghost, &other.ghost) {
457            (Some(l), Some(r)) => l.cmp(r),
458            (Some(_), None) => std::cmp::Ordering::Less,
459            (None, Some(_)) => std::cmp::Ordering::Greater,
460            (None, None) => std::cmp::Ordering::Equal,
461        }
462    }
463}
464
465const MAX: usize = usize::MAX;
466
467/// Given a first byte, determines how many bytes are in this
468/// UTF-8 character
469#[inline]
470pub const fn utf8_char_width(b: u8) -> u32 {
471    // https://tools.ietf.org/html/rfc3629
472    const UTF8_CHAR_WIDTH: &[u8; 256] = &[
473        // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
474        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
475        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
476        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
477        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
478        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
479        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
480        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
481        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
482        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
483        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
484        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
485        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
486        0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
487        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
488        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
489        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
490    ];
491
492    UTF8_CHAR_WIDTH[b as usize] as u32
493}