duat_core/text/utils.rs
1//! Convenience operations for the [`Text`]
2//!
3//! These include the [`Point`] struct and traits that are meant to
4//! take many kinds of inputs, like the [`TwoPoints`], which is meant
5//! to interpret up to 2 [`Point`]s as a real and ghost position in
6//! the [`Text`].
7//!
8//! [`Text`]: super::Text
9use std::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
10
11use bincode::{Decode, Encode};
12
13macro_rules! implPartialEq {
14 ($self:ident: $Self:ty, $other:ident: $Other:ty, $($impl:tt)+) => {
15 impl PartialEq<$Other> for $Self {
16 fn eq(&self, other: &$Other) -> bool {
17 let ($self, $other) = (&self, other);
18 $($impl)+
19 }
20 }
21 }
22}
23
24pub(super) use implPartialEq;
25
26macro_rules! implTextRange {
27 ($range:ident, $r:ident, $sb:expr, $eb:expr, $sp:expr, $ep:expr) => {
28 impl TextRange for $range<usize> {
29 fn to_range(self, max: usize) -> Range<usize> {
30 let $r = self;
31 max.min($sb)..max.min($eb)
32 }
33 }
34
35 impl TextRange for $range<Point> {
36 fn to_range(self, max: usize) -> Range<usize> {
37 let $r = self;
38 max.min($sp)..max.min($ep)
39 }
40 }
41 };
42}
43
44macro_rules! implTextRangeOrIndex {
45 ($range:ident) => {
46 impl TextRangeOrIndex for $range<usize> {
47 fn to_range(self, max: usize) -> Range<usize> {
48 TextRange::to_range(self, max)
49 }
50 }
51
52 impl TextRangeOrIndex for $range<Point> {
53 fn to_range(self, max: usize) -> Range<usize> {
54 TextRange::to_range(self, max)
55 }
56 }
57 };
58}
59
60/// A position in [`Text`]
61///
62/// [`Text`]: super::Text
63#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Encode, Decode)]
64pub struct Point {
65 byte: u32,
66 char: u32,
67 line: u32,
68}
69
70impl Point {
71 /// Returns a new `Point`, at the first byte
72 pub const fn new() -> Self {
73 Point { byte: 0, char: 0, line: 0 }
74 }
75
76 /// A `Point` from raw indices
77 pub const fn from_raw(b: usize, c: usize, l: usize) -> Self {
78 let (b, c, l) = (b as u32, c as u32, l as u32);
79 Self { byte: b, char: c, line: l }
80 }
81
82 /// Returns a new [`TwoPoints`] that includes the [`Ghost`]s in
83 /// the same byte, if there is one
84 ///
85 /// [`Ghost`]: super::Ghost
86 pub const fn to_two_points_before(self) -> TwoPoints {
87 TwoPoints::new_before_ghost(self)
88 }
89
90 /// Returns a new [`TwoPoints`] that skips the [`Ghost`]s in the
91 /// same byte, if there is one
92 ///
93 /// [`Ghost`]: super::Ghost
94 pub const fn to_two_points_after(self) -> TwoPoints {
95 TwoPoints::new_after_ghost(self)
96 }
97
98 ////////// Querying functions
99
100 /// The len [`Point`] of a [`&str`]
101 ///
102 /// This is the equivalent of [`Text::len`], but for types
103 /// other than [`Text`]
104 ///
105 /// [`&str`]: str
106 /// [`Text::len`]: super::Bytes::len
107 /// [`Text`]: super::Text
108 pub fn len_of(str: impl AsRef<str>) -> Self {
109 let str = str.as_ref();
110 Self {
111 byte: str.len() as u32,
112 char: str.chars().count() as u32,
113 line: str.bytes().filter(|c| *c == b'\n').count() as u32,
114 }
115 }
116
117 /// Returns the byte (relative to the beginning of the buffer)
118 /// of self. Indexed at 0
119 ///
120 /// You can use byte indices to index the [`Text`] or [`Bytes`]
121 /// with the [`Bytes::point_at_byte`] function.
122 ///
123 /// [`Text`]: super::Text
124 /// [`Bytes`]: super::Bytes
125 /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
126 pub const fn byte(&self) -> usize {
127 self.byte as usize
128 }
129
130 /// Returns the char index (relative to the beginning of the
131 /// buffer). Indexed at 0
132 ///
133 /// This is the primary value used when indexing the [`Text`] and
134 /// [`Bytes`]. That is, the [`Bytes::point_at_byte`],
135 /// [`Bytes::strs`], and most other [`Bytes`] functions rely
136 /// on a character indices (or [`Point`]s) for indexing a
137 /// [`Text`].
138 ///
139 /// [`Text`]: super::Text
140 /// [`Bytes`]: super::Bytes
141 /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
142 /// [`Bytes::strs`]: super::Bytes::strs
143 pub const fn char(&self) -> usize {
144 self.char as usize
145 }
146
147 /// Returns the line. Indexed at 0
148 ///
149 /// You can use byte indices to index the [`Text`] or [`Bytes`]
150 /// with the [`Bytes::point_at_line`] function.
151 ///
152 /// [`Text`]: super::Text
153 /// [`Bytes`]: super::Bytes
154 /// [`Bytes::point_at_line`]: super::Bytes::point_at_line
155 pub const fn line(&self) -> usize {
156 self.line as usize
157 }
158
159 /// Checked [`Point`] subtraction
160 pub fn checked_sub(self, rhs: Point) -> Option<Point> {
161 Some(Self {
162 byte: self.byte.checked_sub(rhs.byte)?,
163 char: self.char.checked_sub(rhs.char)?,
164 line: self.line.checked_sub(rhs.line)?,
165 })
166 }
167
168 ////////// Shifting functions
169
170 /// Moves a [`Point`] forward by one character
171 #[inline(always)]
172 pub(crate) const fn fwd(self, char: char) -> Self {
173 Self {
174 byte: self.byte + char.len_utf8() as u32,
175 char: self.char + 1,
176 line: self.line + (char == '\n') as u32,
177 }
178 }
179
180 /// Moves a [`Point`] in reverse by one character
181 #[inline(always)]
182 pub(crate) const fn rev(self, char: char) -> Self {
183 Self {
184 byte: self.byte - char.len_utf8() as u32,
185 char: self.char - 1,
186 line: self.line - (char == '\n') as u32,
187 }
188 }
189
190 /// Shifts the [`Point`] by a "signed point"
191 ///
192 /// This assumes that no overflow is going to happen
193 pub(crate) const fn shift_by(self, [b, c, l]: [i32; 3]) -> Self {
194 Self {
195 byte: (self.byte as i32 + b) as u32,
196 char: (self.char as i32 + c) as u32,
197 line: (self.line as i32 + l) as u32,
198 }
199 }
200
201 /// Returns a signed representation of this [`Point`]
202 ///
203 /// In this representation, the indices 0, 1 and 2 are the byte,
204 /// char and line, respectively.
205 pub(crate) const fn as_signed(self) -> [i32; 3] {
206 [self.byte as i32, self.char as i32, self.line as i32]
207 }
208}
209
210impl std::fmt::Debug for Point {
211 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212 write!(
213 f,
214 "Point {{ b: {}, c: {}, l: {} }}",
215 self.byte, self.char, self.line
216 )
217 }
218}
219
220impl std::fmt::Display for Point {
221 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222 write!(f, "{}, {}, {}", self.byte, self.char, self.line)
223 }
224}
225
226impl std::ops::Add for Point {
227 type Output = Self;
228
229 fn add(self, rhs: Self) -> Self::Output {
230 Self {
231 byte: self.byte + rhs.byte,
232 char: self.char + rhs.char,
233 line: self.line + rhs.line,
234 }
235 }
236}
237
238impl std::ops::AddAssign for Point {
239 fn add_assign(&mut self, rhs: Self) {
240 *self = *self + rhs;
241 }
242}
243
244impl std::ops::Sub for Point {
245 type Output = Self;
246
247 fn sub(self, rhs: Self) -> Self::Output {
248 Self {
249 byte: self.byte - rhs.byte,
250 char: self.char - rhs.char,
251 line: self.line - rhs.line,
252 }
253 }
254}
255
256impl std::ops::SubAssign for Point {
257 fn sub_assign(&mut self, rhs: Self) {
258 *self = *self - rhs;
259 }
260}
261
262/// A [`Point`] or a `usize`, representing a byte index
263///
264/// In Duat, [`Point`]s are _usually_ just "thin wrappers" around a
265/// byte index, useful for getting other information about a place in
266/// the [`Text`], but that extra information is normally ignored when
267/// doing internal calculations.
268///
269/// For that reason, Duat allows users to use either [`Point`]s _or_
270/// byte indices in order to index the [`Text`], for convenience's
271/// sake.
272///
273/// [`Text`]: super::Text
274pub trait TextIndex: Clone + Copy + std::fmt::Debug {
275 /// Converts this type into a byte index.
276 fn to_byte_index(self) -> usize;
277}
278
279impl TextIndex for Point {
280 fn to_byte_index(self) -> usize {
281 self.byte()
282 }
283}
284
285impl TextIndex for usize {
286 fn to_byte_index(self) -> usize {
287 self
288 }
289}
290
291/// Ranges that can be used to index the [`Text`]
292///
293/// All of the [ranges] in [`std`] that implement either
294/// [`RangeBounds<usize>`] or [`RangeBounds<Point>`] should work as an
295/// argument. If it implements [`RangeBounds<usize>`], then the
296/// `usize` represents the a byte index in the [`Text`].
297///
298/// [`Text`]: super::Text
299/// [ranges]: std::range
300/// [`RangeBounds<usize>`]: std::ops::RangeBounds
301/// [`RangeBounds<Point>`]: std::ops::RangeBounds
302pub trait TextRange: Clone {
303 /// A "forward facing range"
304 ///
305 /// If given a single [`usize`]/[`Point`], acts like [`RangeFrom`]
306 fn to_range(self, max: usize) -> Range<usize>;
307}
308
309implTextRange!(Range, r, r.start, r.end, r.start.byte(), r.end.byte());
310implTextRange!(
311 RangeInclusive,
312 r,
313 *r.start(),
314 r.end() + 1,
315 r.start().byte(),
316 r.end().byte() + 1
317);
318implTextRange!(RangeTo, r, 0, r.end, 0, r.end.byte());
319implTextRange!(RangeToInclusive, r, 0, r.end, 0, r.end.byte());
320implTextRange!(RangeFrom, r, r.start, MAX, r.start.byte(), MAX);
321
322impl TextRange for RangeFull {
323 fn to_range(self, max: usize) -> Range<usize> {
324 0..max
325 }
326}
327
328/// Either a [`TextRange`], a [`usize`] or a [`Point`]
329///
330/// In all cases, they represent a byte index from the start of the
331/// [`Text`]
332///
333/// This trait's purpose is to be used for [`Tag`] removal in the
334/// [`Tags::remove`] and [`Text::remove_tags`] functions. This is
335/// useful in order to reduce the number of functions exposed to API
336/// users.
337///
338/// [`Tag`]: super::Tag
339/// [`Tags::remove`]: super::Tags::remove
340/// [`Text::remove_tags`]: super::Text::remove_tags
341/// [`Text`]: super::Text
342pub trait TextRangeOrIndex {
343 /// Transforms `self` into a [`Range<usize>`]
344 fn to_range(self, max: usize) -> Range<usize>;
345}
346
347impl TextRangeOrIndex for usize {
348 fn to_range(self, max: usize) -> Range<usize> {
349 max.min(self)..max.min(self + 1)
350 }
351}
352
353impl TextRangeOrIndex for Point {
354 fn to_range(self, max: usize) -> Range<usize> {
355 max.min(self.byte())..max.min(self.byte() + 1)
356 }
357}
358
359impl TextRangeOrIndex for RangeFull {
360 fn to_range(self, max: usize) -> Range<usize> {
361 TextRange::to_range(self, max)
362 }
363}
364
365implTextRangeOrIndex!(Range);
366implTextRangeOrIndex!(RangeInclusive);
367implTextRangeOrIndex!(RangeTo);
368implTextRangeOrIndex!(RangeToInclusive);
369implTextRangeOrIndex!(RangeFrom);
370
371/// A struct used to exactly pinpoint a position in [`Text`], used
372/// when printing
373///
374/// This struct has two inner components, a `real` [`Point`], and a
375/// `ghost` [`Option<Point>`]. The second component is used whenever
376/// you want to print a [`Ghost`] `Text`, either fully or partially.
377///
378/// The `ghost` component represents the "sum position" of all
379/// `Ghost`s in that same byte. For example if there are two ghosts in
380/// a single byte, if you pass `ghost == ghost1.len()`, then only the
381/// second ghost will be included in this iteration.
382///
383/// [`TwoPoints::default`] will include the first [`Ghost`].
384///
385/// [`Text`]: super::Text
386/// [`Ghost`]: super::Ghost
387#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Encode, Decode, Hash)]
388pub struct TwoPoints {
389 /// The real `Point` in the [`Text`]
390 ///
391 /// [`Text`]: super::Text
392 pub real: Point,
393 /// A possible point in a [`Ghost`]
394 ///
395 /// A value of [`None`] means that this is either at the end of
396 /// the ghosts at a byte (i.e. this `TwoPoints` represents a real
397 /// character), or this byte index doesn't have any ghosts at all.
398 ///
399 /// A value of [`Some`] means that this `TwoPoints` does _not_
400 /// represent a real character, so it points to a character
401 /// belonging to a [`Ghost`]
402 ///
403 /// If you don't know how to set this value, you should try to use
404 /// the [`new`], [`new_before_ghost`] or [`new_after_ghost`]
405 /// functions.
406 ///
407 /// [`new`]: Self::new
408 /// [`new_before_ghost`]: Self::new_before_ghost
409 /// [`new_after_ghost`]: Self::new_after_ghost
410 /// [`Ghost`]: super::Ghost
411 pub ghost: Option<Point>,
412}
413
414impl TwoPoints {
415 /// Returns a fully qualified `TwoPoints`
416 ///
417 /// This will include a precise `real` [`Point`] as well as a
418 /// precise `ghost` [`Point`].
419 ///
420 /// If you don't want to deal with ghosts, see
421 /// [`TwoPoints::new_before_ghost`] and
422 /// [`TwoPoints::new_after_ghost`].
423 pub const fn new(real: Point, ghost: Point) -> Self {
424 Self { real, ghost: Some(ghost) }
425 }
426
427 /// Returns a new `TwoPoints` that will include the [`Ghost`]
428 /// before the real [`Point`]
429 ///
430 /// [`Ghost`]: super::Ghost
431 pub const fn new_before_ghost(real: Point) -> Self {
432 Self { real, ghost: Some(Point::new()) }
433 }
434
435 /// Returns a new `TwoPoints` that will exclude the [`Ghost`]
436 /// before the real [`Point`]
437 ///
438 /// [`Ghost`]: super::Ghost
439 pub const fn new_after_ghost(real: Point) -> Self {
440 Self { real, ghost: None }
441 }
442}
443
444impl std::cmp::PartialOrd for TwoPoints {
445 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
446 Some(self.cmp(other))
447 }
448}
449
450impl Ord for TwoPoints {
451 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
452 match self.real.cmp(&other.real) {
453 core::cmp::Ordering::Equal => {}
454 ord => return ord,
455 }
456 match (&self.ghost, &other.ghost) {
457 (Some(l), Some(r)) => l.cmp(r),
458 (Some(_), None) => std::cmp::Ordering::Less,
459 (None, Some(_)) => std::cmp::Ordering::Greater,
460 (None, None) => std::cmp::Ordering::Equal,
461 }
462 }
463}
464
465const MAX: usize = usize::MAX;
466
467/// Given a first byte, determines how many bytes are in this
468/// UTF-8 character
469#[inline]
470pub const fn utf8_char_width(b: u8) -> u32 {
471 // https://tools.ietf.org/html/rfc3629
472 const UTF8_CHAR_WIDTH: &[u8; 256] = &[
473 // 1 2 3 4 5 6 7 8 9 A B C D E F
474 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
475 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
476 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
477 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
478 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
479 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
480 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
481 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
485 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
486 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
487 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
488 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
489 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
490 ];
491
492 UTF8_CHAR_WIDTH[b as usize] as u32
493}