duat_core/text/
ops.rs

1//! Convenience operations for the [`Text`]
2//!
3//! These include the [`Point`] struct and traits that are meant to
4//! take many kinds of inputs, like the [`TwoPoints`], which is meant
5//! to interpret up to 2 [`Point`]s as a real and ghost position in
6//! the [`Text`].
7//!
8//! [`Text`]: super::Text
9use std::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
10
11use bincode::{Decode, Encode};
12
13use super::Item;
14
15/// A position in [`Text`]
16///
17/// [`Text`]: super::Text
18#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Encode, Decode)]
19pub struct Point {
20    b: u32,
21    c: u32,
22    l: u32,
23}
24
25impl Point {
26    ////////// Creation of a Point
27
28    /// Returns a new [`Point`], at the first byte
29    pub fn new() -> Self {
30        Self::default()
31    }
32
33    /// Internal function to create [`Point`]s
34    pub(super) fn from_raw(b: usize, c: usize, l: usize) -> Self {
35        let (b, c, l) = (b as u32, c as u32, l as u32);
36        Self { b, c, l }
37    }
38
39    ////////// Querying functions
40
41    /// The len [`Point`] of a [`&str`]
42    ///
43    /// This is the equivalent of [`Text::len`], but for types
44    /// other than [`Text`]
45    ///
46    /// [`&str`]: str
47    /// [`Text::len`]: super::Bytes::len
48    /// [`Text`]: super::Text
49    pub fn len_of(str: impl AsRef<str>) -> Self {
50        let str = str.as_ref();
51        Self {
52            b: str.len() as u32,
53            c: str.chars().count() as u32,
54            l: str.bytes().filter(|c| *c == b'\n').count() as u32,
55        }
56    }
57
58    /// Returns the byte (relative to the beginning of the file)
59    /// of self. Indexed at 0
60    ///
61    /// You can use byte indices to index the [`Text`] or [`Bytes`]
62    /// with the [`Bytes::point_at_byte`] function.
63    ///
64    /// [`Text`]: super::Text
65    /// [`Bytes`]: super::Bytes
66    /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
67    pub fn byte(&self) -> usize {
68        self.b as usize
69    }
70
71    /// Returns the char index (relative to the beginning of the
72    /// file). Indexed at 0
73    ///
74    /// This is the primary value used when indexing the [`Text`] and
75    /// [`Bytes`]. That is, the [`Bytes::point_at_byte`], [`Bytes::strs`],
76    /// and most other [`Bytes`] functions rely on a character indices
77    /// (or [`Point`]s) for indexing a [`Text`].
78    ///
79    /// [`Text`]: super::Text
80    /// [`Bytes`]: super::Bytes
81    /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
82    /// [`Bytes::strs`]: super::Bytes::strs
83    pub fn char(&self) -> usize {
84        self.c as usize
85    }
86
87    /// Returns the line. Indexed at 0
88    ///
89    /// You can use byte indices to index the [`Text`] or [`Bytes`]
90    /// with the [`Bytes::point_at_line`] function.
91    ///
92    /// [`Text`]: super::Text
93    /// [`Bytes`]: super::Bytes
94    /// [`Bytes::point_at_line`]: super::Bytes::point_at_line
95    pub fn line(&self) -> usize {
96        self.l as usize
97    }
98
99    /// Checked [`Point`] subtraction
100    pub fn checked_sub(self, rhs: Point) -> Option<Point> {
101        Some(Self {
102            b: self.b.checked_sub(rhs.b)?,
103            c: self.c.checked_sub(rhs.c)?,
104            l: self.l.checked_sub(rhs.l)?,
105        })
106    }
107
108    ////////// Shifting functions
109
110    /// Moves a [`Point`] forward by one character
111    #[inline(always)]
112    pub(crate) fn fwd(self, char: char) -> Self {
113        Self {
114            b: self.b + char.len_utf8() as u32,
115            c: self.c + 1,
116            l: self.l + (char == '\n') as u32,
117        }
118    }
119
120    /// Moves a [`Point`] in reverse by one character
121    #[inline(always)]
122    pub(crate) fn rev(self, char: char) -> Self {
123        Self {
124            b: self.b - char.len_utf8() as u32,
125            c: self.c - 1,
126            l: self.l - (char == '\n') as u32,
127        }
128    }
129
130    /// Shifts the [`Point`] by a "signed point"
131    ///
132    /// This assumes that no overflow is going to happen
133    pub(crate) fn shift_by(self, [b, c, l]: [i32; 3]) -> Self {
134        Self {
135            b: (self.b as i32 + b) as u32,
136            c: (self.c as i32 + c) as u32,
137            l: (self.l as i32 + l) as u32,
138        }
139    }
140}
141
142impl std::fmt::Debug for Point {
143    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144        write!(f, "Point {{ b: {}, c: {}, l: {} }}", self.b, self.c, self.l)
145    }
146}
147
148impl std::fmt::Display for Point {
149    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150        write!(f, "{}, {}, {}", self.b, self.c, self.l)
151    }
152}
153
154impl std::ops::Add for Point {
155    type Output = Self;
156
157    fn add(self, rhs: Self) -> Self::Output {
158        Self {
159            b: self.b + rhs.b,
160            c: self.c + rhs.c,
161            l: self.l + rhs.l,
162        }
163    }
164}
165
166impl std::ops::AddAssign for Point {
167    fn add_assign(&mut self, rhs: Self) {
168        *self = *self + rhs;
169    }
170}
171
172impl std::ops::Sub for Point {
173    type Output = Self;
174
175    fn sub(self, rhs: Self) -> Self::Output {
176        Self {
177            b: self.b - rhs.b,
178            c: self.c - rhs.c,
179            l: self.l - rhs.l,
180        }
181    }
182}
183
184impl std::ops::SubAssign for Point {
185    fn sub_assign(&mut self, rhs: Self) {
186        *self = *self - rhs;
187    }
188}
189
190/// Given a first byte, determines how many bytes are in this
191/// UTF-8 character
192#[inline]
193pub const fn utf8_char_width(b: u8) -> u32 {
194    // https://tools.ietf.org/html/rfc3629
195    const UTF8_CHAR_WIDTH: &[u8; 256] = &[
196        // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
197        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
198        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
199        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
200        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
201        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
202        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
203        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
204        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
205        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
206        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
207        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
208        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
209        0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
210        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
211        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
212        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
213    ];
214
215    UTF8_CHAR_WIDTH[b as usize] as u32
216}
217
218/// Ranges that can be used to index the [`Text`]
219///
220/// All of the [ranges] in [`std`] that implement either
221/// [`RangeBounds<usize>`] or [`RangeBounds<Point>`] should work as an
222/// argument. If it implements [`RangeBounds<usize>`], then the
223/// `usize` represents the a char index in the [`Text`].
224///
225/// [`Text`]: super::Text
226/// [ranges]: std::range
227/// [`RangeBounds<usize>`]: std::ops::RangeBounds
228/// [`RangeBounds<Point>`]: std::ops::RangeBounds
229pub trait TextRange: Clone {
230    /// A "forward facing range"
231    ///
232    /// If given a single [`usize`]/[`Point`], acts like [`RangeFrom`]
233    fn to_range(self, max: usize) -> Range<usize>;
234}
235
236implTextRange!(Range, r, r.start, r.end, r.start.byte(), r.end.byte());
237implTextRange!(
238    RangeInclusive,
239    r,
240    *r.start(),
241    r.end() + 1,
242    r.start().byte(),
243    r.end().byte() + 1
244);
245implTextRange!(RangeTo, r, 0, r.end, 0, r.end.byte());
246implTextRange!(RangeToInclusive, r, 0, r.end, 0, r.end.byte());
247implTextRange!(RangeFrom, r, r.start, MAX, r.start.byte(), MAX);
248
249impl TextRange for RangeFull {
250    fn to_range(self, max: usize) -> Range<usize> {
251        0..max
252    }
253}
254
255/// Either a [`TextRange`], a [`usize`] or a [`Point`]
256///
257/// This trait's purpose is to be used for [`Tag`] removal in the
258/// [`Tags::remove`] and [`Text::remove_tags`] functions. This is
259/// useful in order to reduce the number of functions exposed to API
260/// users.
261///
262/// [`Tag`]: super::Tag
263/// [`Tags::remove`]: super::Tags::remove
264/// [`Text::remove_tags`]: super::Text::remove_tags
265pub trait TextRangeOrPoint {
266    /// Transforms `self` into a [`Range<usize>`]
267    fn to_range(self, max: usize) -> Range<usize>;
268}
269
270impl TextRangeOrPoint for usize {
271    fn to_range(self, max: usize) -> Range<usize> {
272        max.min(self)..max.min(self + 1)
273    }
274}
275
276impl TextRangeOrPoint for Point {
277    fn to_range(self, max: usize) -> Range<usize> {
278        max.min(self.byte())..max.min(self.byte() + 1)
279    }
280}
281
282impl TextRangeOrPoint for RangeFull {
283    fn to_range(self, max: usize) -> Range<usize> {
284        TextRange::to_range(self, max)
285    }
286}
287
288implTextRangeOrPoint!(Range);
289implTextRangeOrPoint!(RangeInclusive);
290implTextRangeOrPoint!(RangeTo);
291implTextRangeOrPoint!(RangeToInclusive);
292implTextRangeOrPoint!(RangeFrom);
293
294/// Two positions, one for the [`Text`], and one for [ghost text]
295///
296/// This can either be a [`Point`] or `(Point, Option<Point>)` or
297/// even `(Point, Point)`. If a second [`Point`] is excluded, it
298/// is assumed to be [`Point::default()`], i.e., this
299/// [`TwoPoints`] represents the beginning of a [ghost text].
300///
301/// [`Text`]: super::Text
302/// [ghost text]: super::Ghost
303pub trait TwoPoints: Clone + Copy + std::fmt::Debug {
304    /// Returns two [`Point`]s, for `Text` and ghosts
305    fn to_points(self) -> (Point, Option<Point>);
306}
307
308impl TwoPoints for Point {
309    fn to_points(self) -> (Point, Option<Point>) {
310        (self, None)
311    }
312}
313
314impl TwoPoints for (Point, Point) {
315    fn to_points(self) -> (Point, Option<Point>) {
316        (self.0, Some(self.1))
317    }
318}
319
320impl TwoPoints for (Point, Option<Point>) {
321    fn to_points(self) -> (Point, Option<Point>) {
322        self
323    }
324}
325
326impl TwoPoints for Item {
327    fn to_points(self) -> (Point, Option<Point>) {
328        (self.real, self.ghost)
329    }
330}
331
332const MAX: usize = usize::MAX;
333
334macro implTextRange($range:ident, $r:ident, $sb:expr, $eb:expr, $sp:expr, $ep:expr) {
335    impl TextRange for $range<usize> {
336        fn to_range(self, max: usize) -> Range<usize> {
337            let $r = self;
338            max.min($sb)..max.min($eb)
339        }
340    }
341
342    impl TextRange for $range<Point> {
343        fn to_range(self, max: usize) -> Range<usize> {
344            let $r = self;
345            max.min($sp)..max.min($ep)
346        }
347    }
348}
349
350macro implTextRangeOrPoint($range:ident) {
351    impl TextRangeOrPoint for $range<usize> {
352        fn to_range(self, max: usize) -> Range<usize> {
353            TextRange::to_range(self, max)
354        }
355    }
356
357    impl TextRangeOrPoint for $range<Point> {
358        fn to_range(self, max: usize) -> Range<usize> {
359            TextRange::to_range(self, max)
360        }
361    }
362}