duat_core/text/
ops.rs

1//! Convenience operations for the [`Text`]
2//!
3//! These include the [`Point`] struct and traits that are meant to
4//! take many kinds of inputs, like the [`TwoPoints`], which is meant
5//! to interpret up to 2 [`Point`]s as a real and ghost position in
6//! the [`Text`].
7//!
8//! [`Text`]: super::Text
9use std::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
10
11use bincode::{Decode, Encode};
12
13macro_rules! implTextRange {
14    ($range:ident, $r:ident, $sb:expr, $eb:expr, $sp:expr, $ep:expr) => {
15        impl TextRange for $range<usize> {
16            fn to_range(self, max: usize) -> Range<usize> {
17                let $r = self;
18                max.min($sb)..max.min($eb)
19            }
20        }
21
22        impl TextRange for $range<Point> {
23            fn to_range(self, max: usize) -> Range<usize> {
24                let $r = self;
25                max.min($sp)..max.min($ep)
26            }
27        }
28    };
29}
30
31macro_rules! implTextRangeOrIndex {
32    ($range:ident) => {
33        impl TextRangeOrIndex for $range<usize> {
34            fn to_range(self, max: usize) -> Range<usize> {
35                TextRange::to_range(self, max)
36            }
37        }
38
39        impl TextRangeOrIndex for $range<Point> {
40            fn to_range(self, max: usize) -> Range<usize> {
41                TextRange::to_range(self, max)
42            }
43        }
44    };
45}
46
47/// A position in [`Text`]
48///
49/// [`Text`]: super::Text
50#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Encode, Decode)]
51pub struct Point {
52    byte: u32,
53    char: u32,
54    line: u32,
55}
56
57impl Point {
58    /// Returns a new [`Point`], at the first byte
59    pub const fn new() -> Self {
60        Point { byte: 0, char: 0, line: 0 }
61    }
62
63    /// Internal function to create [`Point`]s
64    pub(super) const fn from_raw(b: usize, c: usize, l: usize) -> Self {
65        let (b, c, l) = (b as u32, c as u32, l as u32);
66        Self { byte: b, char: c, line: l }
67    }
68
69    /// Returns a new [`TwoPoints`] that includes the [`Ghost`]s in
70    /// the same byte, if there is one
71    ///
72    /// [`Ghost`]: super::Ghost
73    pub const fn to_two_points_before(self) -> TwoPoints {
74        TwoPoints::new_before_ghost(self)
75    }
76
77    /// Returns a new [`TwoPoints`] that skips the [`Ghost`]s in the
78    /// same byte, if there is one
79    ///
80    /// [`Ghost`]: super::Ghost
81    pub const fn to_two_points_after(self) -> TwoPoints {
82        TwoPoints::new_after_ghost(self)
83    }
84
85    ////////// Querying functions
86
87    /// The len [`Point`] of a [`&str`]
88    ///
89    /// This is the equivalent of [`Text::len`], but for types
90    /// other than [`Text`]
91    ///
92    /// [`&str`]: str
93    /// [`Text::len`]: super::Bytes::len
94    /// [`Text`]: super::Text
95    pub fn len_of(str: impl AsRef<str>) -> Self {
96        let str = str.as_ref();
97        Self {
98            byte: str.len() as u32,
99            char: str.chars().count() as u32,
100            line: str.bytes().filter(|c| *c == b'\n').count() as u32,
101        }
102    }
103
104    /// Returns the byte (relative to the beginning of the buffer)
105    /// of self. Indexed at 0
106    ///
107    /// You can use byte indices to index the [`Text`] or [`Bytes`]
108    /// with the [`Bytes::point_at_byte`] function.
109    ///
110    /// [`Text`]: super::Text
111    /// [`Bytes`]: super::Bytes
112    /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
113    pub const fn byte(&self) -> usize {
114        self.byte as usize
115    }
116
117    /// Returns the char index (relative to the beginning of the
118    /// buffer). Indexed at 0
119    ///
120    /// This is the primary value used when indexing the [`Text`] and
121    /// [`Bytes`]. That is, the [`Bytes::point_at_byte`],
122    /// [`Bytes::strs`], and most other [`Bytes`] functions rely
123    /// on a character indices (or [`Point`]s) for indexing a
124    /// [`Text`].
125    ///
126    /// [`Text`]: super::Text
127    /// [`Bytes`]: super::Bytes
128    /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
129    /// [`Bytes::strs`]: super::Bytes::strs
130    pub const fn char(&self) -> usize {
131        self.char as usize
132    }
133
134    /// Returns the line. Indexed at 0
135    ///
136    /// You can use byte indices to index the [`Text`] or [`Bytes`]
137    /// with the [`Bytes::point_at_line`] function.
138    ///
139    /// [`Text`]: super::Text
140    /// [`Bytes`]: super::Bytes
141    /// [`Bytes::point_at_line`]: super::Bytes::point_at_line
142    pub const fn line(&self) -> usize {
143        self.line as usize
144    }
145
146    /// Checked [`Point`] subtraction
147    pub fn checked_sub(self, rhs: Point) -> Option<Point> {
148        Some(Self {
149            byte: self.byte.checked_sub(rhs.byte)?,
150            char: self.char.checked_sub(rhs.char)?,
151            line: self.line.checked_sub(rhs.line)?,
152        })
153    }
154
155    ////////// Shifting functions
156
157    /// Moves a [`Point`] forward by one character
158    #[inline(always)]
159    pub(crate) const fn fwd(self, char: char) -> Self {
160        Self {
161            byte: self.byte + char.len_utf8() as u32,
162            char: self.char + 1,
163            line: self.line + (char == '\n') as u32,
164        }
165    }
166
167    /// Moves a [`Point`] in reverse by one character
168    #[inline(always)]
169    pub(crate) const fn rev(self, char: char) -> Self {
170        Self {
171            byte: self.byte - char.len_utf8() as u32,
172            char: self.char - 1,
173            line: self.line - (char == '\n') as u32,
174        }
175    }
176
177    /// Shifts the [`Point`] by a "signed point"
178    ///
179    /// This assumes that no overflow is going to happen
180    pub(crate) const fn shift_by(self, [b, c, l]: [i32; 3]) -> Self {
181        Self {
182            byte: (self.byte as i32 + b) as u32,
183            char: (self.char as i32 + c) as u32,
184            line: (self.line as i32 + l) as u32,
185        }
186    }
187
188    /// Returns a signed representation of this [`Point`]
189    ///
190    /// In this representation, the indices 0, 1 and 2 are the byte,
191    /// char and line, respectively.
192    pub(crate) const fn as_signed(self) -> [i32; 3] {
193        [self.byte as i32, self.char as i32, self.line as i32]
194    }
195}
196
197impl std::fmt::Debug for Point {
198    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
199        write!(
200            f,
201            "Point {{ b: {}, c: {}, l: {} }}",
202            self.byte, self.char, self.line
203        )
204    }
205}
206
207impl std::fmt::Display for Point {
208    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209        write!(f, "{}, {}, {}", self.byte, self.char, self.line)
210    }
211}
212
213impl std::ops::Add for Point {
214    type Output = Self;
215
216    fn add(self, rhs: Self) -> Self::Output {
217        Self {
218            byte: self.byte + rhs.byte,
219            char: self.char + rhs.char,
220            line: self.line + rhs.line,
221        }
222    }
223}
224
225impl std::ops::AddAssign for Point {
226    fn add_assign(&mut self, rhs: Self) {
227        *self = *self + rhs;
228    }
229}
230
231impl std::ops::Sub for Point {
232    type Output = Self;
233
234    fn sub(self, rhs: Self) -> Self::Output {
235        Self {
236            byte: self.byte - rhs.byte,
237            char: self.char - rhs.char,
238            line: self.line - rhs.line,
239        }
240    }
241}
242
243impl std::ops::SubAssign for Point {
244    fn sub_assign(&mut self, rhs: Self) {
245        *self = *self - rhs;
246    }
247}
248
249/// A [`Point`] or a `usize`, representing a byte index
250///
251/// In Duat, [`Point`]s are _usually_ just "thin wrappers" around a
252/// byte index, useful for getting other information about a place in
253/// the [`Text`], but that extra information is normally ignored when
254/// doing internal calculations.
255///
256/// For that reason, Duat allows users to use either [`Point`]s _or_
257/// byte indices in order to index the [`Text`], for convenience's
258/// sake.
259///
260/// [`Text`]: super::Text
261pub trait TextIndex: Clone + Copy + std::fmt::Debug {
262    /// Converts this type into a byte index.
263    fn to_byte_index(self) -> usize;
264}
265
266impl TextIndex for Point {
267    fn to_byte_index(self) -> usize {
268        self.byte()
269    }
270}
271
272impl TextIndex for usize {
273    fn to_byte_index(self) -> usize {
274        self
275    }
276}
277
278/// Ranges that can be used to index the [`Text`]
279///
280/// All of the [ranges] in [`std`] that implement either
281/// [`RangeBounds<usize>`] or [`RangeBounds<Point>`] should work as an
282/// argument. If it implements [`RangeBounds<usize>`], then the
283/// `usize` represents the a byte index in the [`Text`].
284///
285/// [`Text`]: super::Text
286/// [ranges]: std::range
287/// [`RangeBounds<usize>`]: std::ops::RangeBounds
288/// [`RangeBounds<Point>`]: std::ops::RangeBounds
289pub trait TextRange: Clone {
290    /// A "forward facing range"
291    ///
292    /// If given a single [`usize`]/[`Point`], acts like [`RangeFrom`]
293    fn to_range(self, max: usize) -> Range<usize>;
294}
295
296implTextRange!(Range, r, r.start, r.end, r.start.byte(), r.end.byte());
297implTextRange!(
298    RangeInclusive,
299    r,
300    *r.start(),
301    r.end() + 1,
302    r.start().byte(),
303    r.end().byte() + 1
304);
305implTextRange!(RangeTo, r, 0, r.end, 0, r.end.byte());
306implTextRange!(RangeToInclusive, r, 0, r.end, 0, r.end.byte());
307implTextRange!(RangeFrom, r, r.start, MAX, r.start.byte(), MAX);
308
309impl TextRange for RangeFull {
310    fn to_range(self, max: usize) -> Range<usize> {
311        0..max
312    }
313}
314
315/// Either a [`TextRange`], a [`usize`] or a [`Point`]
316///
317/// In all cases, they represent a byte index from the start of the
318/// [`Text`]
319///
320/// This trait's purpose is to be used for [`Tag`] removal in the
321/// [`Tags::remove`] and [`Text::remove_tags`] functions. This is
322/// useful in order to reduce the number of functions exposed to API
323/// users.
324///
325/// [`Tag`]: super::Tag
326/// [`Tags::remove`]: super::Tags::remove
327/// [`Text::remove_tags`]: super::Text::remove_tags
328/// [`Text`]: super::Text
329pub trait TextRangeOrIndex {
330    /// Transforms `self` into a [`Range<usize>`]
331    fn to_range(self, max: usize) -> Range<usize>;
332}
333
334impl TextRangeOrIndex for usize {
335    fn to_range(self, max: usize) -> Range<usize> {
336        max.min(self)..max.min(self + 1)
337    }
338}
339
340impl TextRangeOrIndex for Point {
341    fn to_range(self, max: usize) -> Range<usize> {
342        max.min(self.byte())..max.min(self.byte() + 1)
343    }
344}
345
346impl TextRangeOrIndex for RangeFull {
347    fn to_range(self, max: usize) -> Range<usize> {
348        TextRange::to_range(self, max)
349    }
350}
351
352implTextRangeOrIndex!(Range);
353implTextRangeOrIndex!(RangeInclusive);
354implTextRangeOrIndex!(RangeTo);
355implTextRangeOrIndex!(RangeToInclusive);
356implTextRangeOrIndex!(RangeFrom);
357
358/// A struct used to exactly pinpoint a position in [`Text`], used
359/// when printing
360///
361/// This struct has two inner components, a `real` [`Point`], and a
362/// `ghost` [`Option<Point>`]. The second component is used whenever
363/// you want to print a [`Ghost`] `Text`, either fully or partially.
364///
365/// The `ghost` component represents the "sum position" of all
366/// `Ghost`s in that same byte. For example if there are two ghosts in
367/// a single byte, if you pass `ghost == ghost1.len()`, then only the
368/// second ghost will be included in this iteration.
369///
370/// [`TwoPoints::default`] will include the first [`Ghost`].
371///
372/// [`Text`]: super::Text
373/// [`Ghost`]: super::Ghost
374#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Encode, Decode)]
375pub struct TwoPoints {
376    /// The real `Point` in the [`Text`]
377    ///
378    /// [`Text`]: super::Text
379    pub real: Point,
380    /// A possible point in a [`Ghost`]
381    ///
382    /// A value of [`None`] means that this is either at the end of
383    /// the ghosts at a byte (i.e. this `TwoPoints` represents a real
384    /// character), or this byte index doesn't have any ghosts at all.
385    ///
386    /// A value of [`Some`] means that this `TwoPoints` does _not_
387    /// represent a real character, so it points to a character
388    /// belonging to a [`Ghost`]
389    ///
390    /// If you don't know how to set this value, you should try to use
391    /// the [`new`], [`new_before_ghost`] or [`new_after_ghost`]
392    /// functions.
393    ///
394    /// [`new`]: Self::new
395    /// [`new_before_ghost`]: Self::new_before_ghost
396    /// [`new_after_ghost`]: Self::new_after_ghost
397    /// [`Ghost`]: super::Ghost
398    pub ghost: Option<Point>,
399}
400
401impl TwoPoints {
402    /// Returns a fully qualified `TwoPoints`
403    ///
404    /// This will include a precise `real` [`Point`] as well as a
405    /// precise `ghost` [`Point`].
406    ///
407    /// If you don't want to deal with ghosts, see
408    /// [`TwoPoints::new_before_ghost`] and
409    /// [`TwoPoints::new_after_ghost`].
410    pub const fn new(real: Point, ghost: Point) -> Self {
411        Self { real, ghost: Some(ghost) }
412    }
413
414    /// Returns a new `TwoPoints` that will include the [`Ghost`]
415    /// before the real [`Point`]
416    ///
417    /// [`Ghost`]: super::Ghost
418    pub const fn new_before_ghost(real: Point) -> Self {
419        Self { real, ghost: Some(Point::new()) }
420    }
421
422    /// Returns a new `TwoPoints` that will exclude the [`Ghost`]
423    /// before the real [`Point`]
424    ///
425    /// [`Ghost`]: super::Ghost
426    pub const fn new_after_ghost(real: Point) -> Self {
427        Self { real, ghost: None }
428    }
429}
430
431impl std::cmp::PartialOrd for TwoPoints {
432    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
433        Some(self.cmp(other))
434    }
435}
436
437impl Ord for TwoPoints {
438    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
439        match self.real.cmp(&other.real) {
440            core::cmp::Ordering::Equal => {}
441            ord => return ord,
442        }
443        match (&self.ghost, &other.ghost) {
444            (Some(l), Some(r)) => l.cmp(r),
445            (Some(_), None) => std::cmp::Ordering::Less,
446            (None, Some(_)) => std::cmp::Ordering::Greater,
447            (None, None) => std::cmp::Ordering::Equal,
448        }
449    }
450}
451
452const MAX: usize = usize::MAX;
453
454/// Given a first byte, determines how many bytes are in this
455/// UTF-8 character
456#[inline]
457pub const fn utf8_char_width(b: u8) -> u32 {
458    // https://tools.ietf.org/html/rfc3629
459    const UTF8_CHAR_WIDTH: &[u8; 256] = &[
460        // 1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
461        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
462        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
463        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
464        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
465        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
466        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
467        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
468        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
469        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
470        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
471        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
472        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
473        0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
474        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
475        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
476        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
477    ];
478
479    UTF8_CHAR_WIDTH[b as usize] as u32
480}