duat_core/text/ops.rs
1//! Convenience operations for the [`Text`]
2//!
3//! These include the [`Point`] struct and traits that are meant to
4//! take many kinds of inputs, like the [`TwoPoints`], which is meant
5//! to interpret up to 2 [`Point`]s as a real and ghost position in
6//! the [`Text`].
7//!
8//! [`Text`]: super::Text
9use std::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
10
11use bincode::{Decode, Encode};
12
13macro_rules! implTextRange {
14 ($range:ident, $r:ident, $sb:expr, $eb:expr, $sp:expr, $ep:expr) => {
15 impl TextRange for $range<usize> {
16 fn to_range(self, max: usize) -> Range<usize> {
17 let $r = self;
18 max.min($sb)..max.min($eb)
19 }
20 }
21
22 impl TextRange for $range<Point> {
23 fn to_range(self, max: usize) -> Range<usize> {
24 let $r = self;
25 max.min($sp)..max.min($ep)
26 }
27 }
28 };
29}
30
31macro_rules! implTextRangeOrIndex {
32 ($range:ident) => {
33 impl TextRangeOrIndex for $range<usize> {
34 fn to_range(self, max: usize) -> Range<usize> {
35 TextRange::to_range(self, max)
36 }
37 }
38
39 impl TextRangeOrIndex for $range<Point> {
40 fn to_range(self, max: usize) -> Range<usize> {
41 TextRange::to_range(self, max)
42 }
43 }
44 };
45}
46
47/// A position in [`Text`]
48///
49/// [`Text`]: super::Text
50#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Encode, Decode)]
51pub struct Point {
52 byte: u32,
53 char: u32,
54 line: u32,
55}
56
57impl Point {
58 /// Returns a new [`Point`], at the first byte
59 pub const fn new() -> Self {
60 Point { byte: 0, char: 0, line: 0 }
61 }
62
63 /// Internal function to create [`Point`]s
64 pub(super) const fn from_raw(b: usize, c: usize, l: usize) -> Self {
65 let (b, c, l) = (b as u32, c as u32, l as u32);
66 Self { byte: b, char: c, line: l }
67 }
68
69 /// Returns a new [`TwoPoints`] that includes the [`Ghost`]s in
70 /// the same byte, if there is one
71 ///
72 /// [`Ghost`]: super::Ghost
73 pub const fn to_two_points_before(self) -> TwoPoints {
74 TwoPoints::new_before_ghost(self)
75 }
76
77 /// Returns a new [`TwoPoints`] that skips the [`Ghost`]s in the
78 /// same byte, if there is one
79 ///
80 /// [`Ghost`]: super::Ghost
81 pub const fn to_two_points_after(self) -> TwoPoints {
82 TwoPoints::new_after_ghost(self)
83 }
84
85 ////////// Querying functions
86
87 /// The len [`Point`] of a [`&str`]
88 ///
89 /// This is the equivalent of [`Text::len`], but for types
90 /// other than [`Text`]
91 ///
92 /// [`&str`]: str
93 /// [`Text::len`]: super::Bytes::len
94 /// [`Text`]: super::Text
95 pub fn len_of(str: impl AsRef<str>) -> Self {
96 let str = str.as_ref();
97 Self {
98 byte: str.len() as u32,
99 char: str.chars().count() as u32,
100 line: str.bytes().filter(|c| *c == b'\n').count() as u32,
101 }
102 }
103
104 /// Returns the byte (relative to the beginning of the buffer)
105 /// of self. Indexed at 0
106 ///
107 /// You can use byte indices to index the [`Text`] or [`Bytes`]
108 /// with the [`Bytes::point_at_byte`] function.
109 ///
110 /// [`Text`]: super::Text
111 /// [`Bytes`]: super::Bytes
112 /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
113 pub const fn byte(&self) -> usize {
114 self.byte as usize
115 }
116
117 /// Returns the char index (relative to the beginning of the
118 /// buffer). Indexed at 0
119 ///
120 /// This is the primary value used when indexing the [`Text`] and
121 /// [`Bytes`]. That is, the [`Bytes::point_at_byte`],
122 /// [`Bytes::strs`], and most other [`Bytes`] functions rely
123 /// on a character indices (or [`Point`]s) for indexing a
124 /// [`Text`].
125 ///
126 /// [`Text`]: super::Text
127 /// [`Bytes`]: super::Bytes
128 /// [`Bytes::point_at_byte`]: super::Bytes::point_at_byte
129 /// [`Bytes::strs`]: super::Bytes::strs
130 pub const fn char(&self) -> usize {
131 self.char as usize
132 }
133
134 /// Returns the line. Indexed at 0
135 ///
136 /// You can use byte indices to index the [`Text`] or [`Bytes`]
137 /// with the [`Bytes::point_at_line`] function.
138 ///
139 /// [`Text`]: super::Text
140 /// [`Bytes`]: super::Bytes
141 /// [`Bytes::point_at_line`]: super::Bytes::point_at_line
142 pub const fn line(&self) -> usize {
143 self.line as usize
144 }
145
146 /// Checked [`Point`] subtraction
147 pub fn checked_sub(self, rhs: Point) -> Option<Point> {
148 Some(Self {
149 byte: self.byte.checked_sub(rhs.byte)?,
150 char: self.char.checked_sub(rhs.char)?,
151 line: self.line.checked_sub(rhs.line)?,
152 })
153 }
154
155 ////////// Shifting functions
156
157 /// Moves a [`Point`] forward by one character
158 #[inline(always)]
159 pub(crate) const fn fwd(self, char: char) -> Self {
160 Self {
161 byte: self.byte + char.len_utf8() as u32,
162 char: self.char + 1,
163 line: self.line + (char == '\n') as u32,
164 }
165 }
166
167 /// Moves a [`Point`] in reverse by one character
168 #[inline(always)]
169 pub(crate) const fn rev(self, char: char) -> Self {
170 Self {
171 byte: self.byte - char.len_utf8() as u32,
172 char: self.char - 1,
173 line: self.line - (char == '\n') as u32,
174 }
175 }
176
177 /// Shifts the [`Point`] by a "signed point"
178 ///
179 /// This assumes that no overflow is going to happen
180 pub(crate) const fn shift_by(self, [b, c, l]: [i32; 3]) -> Self {
181 Self {
182 byte: (self.byte as i32 + b) as u32,
183 char: (self.char as i32 + c) as u32,
184 line: (self.line as i32 + l) as u32,
185 }
186 }
187
188 /// Returns a signed representation of this [`Point`]
189 ///
190 /// In this representation, the indices 0, 1 and 2 are the byte,
191 /// char and line, respectively.
192 pub(crate) const fn as_signed(self) -> [i32; 3] {
193 [self.byte as i32, self.char as i32, self.line as i32]
194 }
195}
196
197impl std::fmt::Debug for Point {
198 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
199 write!(
200 f,
201 "Point {{ b: {}, c: {}, l: {} }}",
202 self.byte, self.char, self.line
203 )
204 }
205}
206
207impl std::fmt::Display for Point {
208 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209 write!(f, "{}, {}, {}", self.byte, self.char, self.line)
210 }
211}
212
213impl std::ops::Add for Point {
214 type Output = Self;
215
216 fn add(self, rhs: Self) -> Self::Output {
217 Self {
218 byte: self.byte + rhs.byte,
219 char: self.char + rhs.char,
220 line: self.line + rhs.line,
221 }
222 }
223}
224
225impl std::ops::AddAssign for Point {
226 fn add_assign(&mut self, rhs: Self) {
227 *self = *self + rhs;
228 }
229}
230
231impl std::ops::Sub for Point {
232 type Output = Self;
233
234 fn sub(self, rhs: Self) -> Self::Output {
235 Self {
236 byte: self.byte - rhs.byte,
237 char: self.char - rhs.char,
238 line: self.line - rhs.line,
239 }
240 }
241}
242
243impl std::ops::SubAssign for Point {
244 fn sub_assign(&mut self, rhs: Self) {
245 *self = *self - rhs;
246 }
247}
248
249/// A [`Point`] or a `usize`, representing a byte index
250///
251/// In Duat, [`Point`]s are _usually_ just "thin wrappers" around a
252/// byte index, useful for getting other information about a place in
253/// the [`Text`], but that extra information is normally ignored when
254/// doing internal calculations.
255///
256/// For that reason, Duat allows users to use either [`Point`]s _or_
257/// byte indices in order to index the [`Text`], for convenience's
258/// sake.
259///
260/// [`Text`]: super::Text
261pub trait TextIndex: Clone + Copy + std::fmt::Debug {
262 /// Converts this type into a byte index.
263 fn to_byte_index(self) -> usize;
264}
265
266impl TextIndex for Point {
267 fn to_byte_index(self) -> usize {
268 self.byte()
269 }
270}
271
272impl TextIndex for usize {
273 fn to_byte_index(self) -> usize {
274 self
275 }
276}
277
278/// Ranges that can be used to index the [`Text`]
279///
280/// All of the [ranges] in [`std`] that implement either
281/// [`RangeBounds<usize>`] or [`RangeBounds<Point>`] should work as an
282/// argument. If it implements [`RangeBounds<usize>`], then the
283/// `usize` represents the a byte index in the [`Text`].
284///
285/// [`Text`]: super::Text
286/// [ranges]: std::range
287/// [`RangeBounds<usize>`]: std::ops::RangeBounds
288/// [`RangeBounds<Point>`]: std::ops::RangeBounds
289pub trait TextRange: Clone {
290 /// A "forward facing range"
291 ///
292 /// If given a single [`usize`]/[`Point`], acts like [`RangeFrom`]
293 fn to_range(self, max: usize) -> Range<usize>;
294}
295
296implTextRange!(Range, r, r.start, r.end, r.start.byte(), r.end.byte());
297implTextRange!(
298 RangeInclusive,
299 r,
300 *r.start(),
301 r.end() + 1,
302 r.start().byte(),
303 r.end().byte() + 1
304);
305implTextRange!(RangeTo, r, 0, r.end, 0, r.end.byte());
306implTextRange!(RangeToInclusive, r, 0, r.end, 0, r.end.byte());
307implTextRange!(RangeFrom, r, r.start, MAX, r.start.byte(), MAX);
308
309impl TextRange for RangeFull {
310 fn to_range(self, max: usize) -> Range<usize> {
311 0..max
312 }
313}
314
315/// Either a [`TextRange`], a [`usize`] or a [`Point`]
316///
317/// In all cases, they represent a byte index from the start of the
318/// [`Text`]
319///
320/// This trait's purpose is to be used for [`Tag`] removal in the
321/// [`Tags::remove`] and [`Text::remove_tags`] functions. This is
322/// useful in order to reduce the number of functions exposed to API
323/// users.
324///
325/// [`Tag`]: super::Tag
326/// [`Tags::remove`]: super::Tags::remove
327/// [`Text::remove_tags`]: super::Text::remove_tags
328/// [`Text`]: super::Text
329pub trait TextRangeOrIndex {
330 /// Transforms `self` into a [`Range<usize>`]
331 fn to_range(self, max: usize) -> Range<usize>;
332}
333
334impl TextRangeOrIndex for usize {
335 fn to_range(self, max: usize) -> Range<usize> {
336 max.min(self)..max.min(self + 1)
337 }
338}
339
340impl TextRangeOrIndex for Point {
341 fn to_range(self, max: usize) -> Range<usize> {
342 max.min(self.byte())..max.min(self.byte() + 1)
343 }
344}
345
346impl TextRangeOrIndex for RangeFull {
347 fn to_range(self, max: usize) -> Range<usize> {
348 TextRange::to_range(self, max)
349 }
350}
351
352implTextRangeOrIndex!(Range);
353implTextRangeOrIndex!(RangeInclusive);
354implTextRangeOrIndex!(RangeTo);
355implTextRangeOrIndex!(RangeToInclusive);
356implTextRangeOrIndex!(RangeFrom);
357
358/// A struct used to exactly pinpoint a position in [`Text`], used
359/// when printing
360///
361/// This struct has two inner components, a `real` [`Point`], and a
362/// `ghost` [`Option<Point>`]. The second component is used whenever
363/// you want to print a [`Ghost`] `Text`, either fully or partially.
364///
365/// The `ghost` component represents the "sum position" of all
366/// `Ghost`s in that same byte. For example if there are two ghosts in
367/// a single byte, if you pass `ghost == ghost1.len()`, then only the
368/// second ghost will be included in this iteration.
369///
370/// [`TwoPoints::default`] will include the first [`Ghost`].
371///
372/// [`Text`]: super::Text
373/// [`Ghost`]: super::Ghost
374#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Encode, Decode)]
375pub struct TwoPoints {
376 /// The real `Point` in the [`Text`]
377 ///
378 /// [`Text`]: super::Text
379 pub real: Point,
380 /// A possible point in a [`Ghost`]
381 ///
382 /// A value of [`None`] means that this is either at the end of
383 /// the ghosts at a byte (i.e. this `TwoPoints` represents a real
384 /// character), or this byte index doesn't have any ghosts at all.
385 ///
386 /// A value of [`Some`] means that this `TwoPoints` does _not_
387 /// represent a real character, so it points to a character
388 /// belonging to a [`Ghost`]
389 ///
390 /// If you don't know how to set this value, you should try to use
391 /// the [`new`], [`new_before_ghost`] or [`new_after_ghost`]
392 /// functions.
393 ///
394 /// [`new`]: Self::new
395 /// [`new_before_ghost`]: Self::new_before_ghost
396 /// [`new_after_ghost`]: Self::new_after_ghost
397 /// [`Ghost`]: super::Ghost
398 pub ghost: Option<Point>,
399}
400
401impl TwoPoints {
402 /// Returns a fully qualified `TwoPoints`
403 ///
404 /// This will include a precise `real` [`Point`] as well as a
405 /// precise `ghost` [`Point`].
406 ///
407 /// If you don't want to deal with ghosts, see
408 /// [`TwoPoints::new_before_ghost`] and
409 /// [`TwoPoints::new_after_ghost`].
410 pub const fn new(real: Point, ghost: Point) -> Self {
411 Self { real, ghost: Some(ghost) }
412 }
413
414 /// Returns a new `TwoPoints` that will include the [`Ghost`]
415 /// before the real [`Point`]
416 ///
417 /// [`Ghost`]: super::Ghost
418 pub const fn new_before_ghost(real: Point) -> Self {
419 Self { real, ghost: Some(Point::new()) }
420 }
421
422 /// Returns a new `TwoPoints` that will exclude the [`Ghost`]
423 /// before the real [`Point`]
424 ///
425 /// [`Ghost`]: super::Ghost
426 pub const fn new_after_ghost(real: Point) -> Self {
427 Self { real, ghost: None }
428 }
429}
430
431impl std::cmp::PartialOrd for TwoPoints {
432 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
433 Some(self.cmp(other))
434 }
435}
436
437impl Ord for TwoPoints {
438 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
439 match self.real.cmp(&other.real) {
440 core::cmp::Ordering::Equal => {}
441 ord => return ord,
442 }
443 match (&self.ghost, &other.ghost) {
444 (Some(l), Some(r)) => l.cmp(r),
445 (Some(_), None) => std::cmp::Ordering::Less,
446 (None, Some(_)) => std::cmp::Ordering::Greater,
447 (None, None) => std::cmp::Ordering::Equal,
448 }
449 }
450}
451
452const MAX: usize = usize::MAX;
453
454/// Given a first byte, determines how many bytes are in this
455/// UTF-8 character
456#[inline]
457pub const fn utf8_char_width(b: u8) -> u32 {
458 // https://tools.ietf.org/html/rfc3629
459 const UTF8_CHAR_WIDTH: &[u8; 256] = &[
460 // 1 2 3 4 5 6 7 8 9 A B C D E F
461 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0
462 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1
463 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2
464 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3
465 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
466 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5
467 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
468 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
469 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
473 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C
474 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D
475 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E
476 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F
477 ];
478
479 UTF8_CHAR_WIDTH[b as usize] as u32
480}