harper_core/
span.rs

1use std::{fmt::Display, marker::PhantomData, ops::Range};
2
3use serde::{Deserialize, Serialize};
4
5use crate::Token;
6
7/// A window in a [`T`] sequence.
8///
9/// Note that the range covered by a [`Span`] is end-exclusive, meaning that the end index is not
10/// included in the range covered by the [`Span`]. If you're familiar with the Rust range syntax,
11/// you could say the span covers the equivalent of `start..end`, *not* `start..=end`.
12///
13/// For a [`Span`] to be correct, its end index must be greater than or equal to its start
14/// index. Creating or using a [`Span`] which does not follow this rule may lead to unexpected
15/// behavior or panics.
16///
17/// Although specific to `harper.js`, [this page may clear up any questions you have](https://writewithharper.com/docs/harperjs/spans).
18#[derive(Debug, Serialize, Deserialize, Default, PartialEq, Eq)]
19pub struct Span<T> {
20    /// The start index of the span.
21    pub start: usize,
22    /// The end index of the span.
23    ///
24    /// Note that [`Span`] represents an exclusive range. This means that a `Span::new(0, 5)` will
25    /// cover the values `0, 1, 2, 3, 4`; it will not cover the `5`.
26    pub end: usize,
27    #[serde(skip)]
28    span_type: PhantomData<T>,
29}
30
31impl<T> Span<T> {
32    /// A [`Span`] with a start and end index of 0.
33    pub const ZERO: Self = Self::empty(0);
34
35    /// Creates a new [`Span`] with the provided start and end indices.
36    ///
37    /// # Panics
38    ///
39    /// This will panic if `start` is greater than `end`.
40    pub fn new(start: usize, end: usize) -> Self {
41        if start > end {
42            panic!("{start} > {end}");
43        }
44        Self {
45            start,
46            end,
47            span_type: PhantomData,
48        }
49    }
50
51    /// Creates a new [`Span`] from the provided start position and length.
52    pub fn new_with_len(start: usize, len: usize) -> Self {
53        Self {
54            start,
55            end: start + len,
56            span_type: PhantomData,
57        }
58    }
59
60    /// Creates a new empty [`Span`] with the provided position.
61    pub const fn empty(pos: usize) -> Self {
62        Self {
63            start: pos,
64            end: pos,
65            span_type: PhantomData,
66        }
67    }
68
69    /// The length of the [`Span`].
70    pub fn len(&self) -> usize {
71        self.end - self.start
72    }
73
74    /// Checks whether the [`Span`] is empty.
75    ///
76    /// A [`Span`] is considered empty if it has a length of 0.
77    pub fn is_empty(&self) -> bool {
78        self.len() == 0
79    }
80
81    /// Checks whether `idx` is within the range of the span.
82    pub fn contains(&self, idx: usize) -> bool {
83        self.start <= idx && idx < self.end
84    }
85
86    /// Checks whether this span's range overlaps with `other`.
87    pub fn overlaps_with(&self, other: Self) -> bool {
88        (self.start < other.end) && (other.start < self.end)
89    }
90
91    /// Get the associated content. Will return [`None`] if the span is non-empty and any aspect is
92    /// invalid.
93    pub fn try_get_content<'a>(&self, source: &'a [T]) -> Option<&'a [T]> {
94        if self.is_empty() {
95            Some(&source[0..0])
96        } else {
97            source.get(self.start..self.end)
98        }
99    }
100
101    /// Expand the span by either modifying [`Self::start`] or [`Self::end`] to include the target
102    /// index.
103    ///
104    /// Does nothing if the span already includes the target.
105    pub fn expand_to_include(&mut self, target: usize) {
106        if target < self.start {
107            self.start = target;
108        } else if target >= self.end {
109            self.end = target + 1;
110        }
111    }
112
113    /// Get the associated content. Will panic if any aspect is invalid.
114    pub fn get_content<'a>(&self, source: &'a [T]) -> &'a [T] {
115        match self.try_get_content(source) {
116            Some(v) => v,
117            None => panic!("Failed to get content for span."),
118        }
119    }
120
121    /// Set the span's length.
122    pub fn set_len(&mut self, length: usize) {
123        self.end = self.start + length;
124    }
125
126    /// Returns a copy of this [`Span`] with a new length.
127    pub fn with_len(&self, length: usize) -> Self {
128        let mut cloned = *self;
129        cloned.set_len(length);
130        cloned
131    }
132
133    /// Add an amount to both [`Self::start`] and [`Self::end`]
134    pub fn push_by(&mut self, by: usize) {
135        self.start += by;
136        self.end += by;
137    }
138
139    /// Subtract an amount from both [`Self::start`] and [`Self::end`]
140    pub fn pull_by(&mut self, by: usize) {
141        self.start -= by;
142        self.end -= by;
143    }
144
145    /// Add an amount to a copy of both [`Self::start`] and [`Self::end`]
146    pub fn pushed_by(&self, by: usize) -> Self {
147        let mut clone = *self;
148        clone.start += by;
149        clone.end += by;
150        clone
151    }
152
153    /// Subtract an amount to a copy of both [`Self::start`] and [`Self::end`]
154    pub fn pulled_by(&self, by: usize) -> Option<Self> {
155        if by > self.start {
156            return None;
157        }
158
159        let mut clone = *self;
160        clone.start -= by;
161        clone.end -= by;
162        Some(clone)
163    }
164}
165
166/// Additional functions for types that implement [`std::fmt::Debug`] and [`Display`].
167impl<T: Display + std::fmt::Debug> Span<T> {
168    /// Gets the content of this [`Span<T>`] as a [`String`].
169    pub fn get_content_string(&self, source: &[T]) -> String {
170        if let Some(content) = self.try_get_content(source) {
171            content.iter().map(|t| t.to_string()).collect()
172        } else {
173            panic!("Could not get position {self:?} within \"{source:?}\"")
174        }
175    }
176}
177
178/// Functionality specific to [`Token`] spans.
179impl Span<Token> {
180    /// Converts the [`Span<Token>`] into a [`Span<char>`].
181    ///
182    /// This requires knowing the character spans of the tokens covered by this
183    /// [`Span<Token>`]. Because of this, a reference to the source token sequence used to create
184    /// this span is required.
185    pub fn to_char_span(&self, source_document_tokens: &[Token]) -> Span<char> {
186        if self.is_empty() {
187            Span::ZERO
188        } else {
189            let target_tokens = &source_document_tokens[self.start..self.end];
190            Span::new(
191                target_tokens.first().unwrap().span.start,
192                target_tokens.last().unwrap().span.end,
193            )
194        }
195    }
196}
197
198impl<T> From<Range<usize>> for Span<T> {
199    /// Reinterprets the provided [`std::ops::Range`] as a [`Span`].
200    fn from(value: Range<usize>) -> Self {
201        Self::new(value.start, value.end)
202    }
203}
204
205impl<T> From<Span<T>> for Range<usize> {
206    /// Converts the [`Span`] to an [`std::ops::Range`].
207    fn from(value: Span<T>) -> Self {
208        value.start..value.end
209    }
210}
211
212impl<T> IntoIterator for Span<T> {
213    type Item = usize;
214
215    type IntoIter = Range<usize>;
216
217    /// Converts the [`Span`] into an iterator that yields the indices covered by its range.
218    ///
219    /// Note that [`Span`] is half-open, meaning that the value [`Self::end`] will not be yielded
220    /// by this iterator: it will stop at the index immediately preceding [`Self::end`].
221    fn into_iter(self) -> Self::IntoIter {
222        self.start..self.end
223    }
224}
225
226impl<T> Clone for Span<T> {
227    // Note: manual implementation so we don't unnecessarily require `T` to impl `Clone`.
228    fn clone(&self) -> Self {
229        *self
230    }
231}
232impl<T> Copy for Span<T> {}
233
234#[cfg(test)]
235mod tests {
236    use crate::{
237        Document,
238        expr::{ExprExt, SequenceExpr},
239    };
240
241    use super::Span;
242
243    type UntypedSpan = Span<()>;
244
245    #[test]
246    fn overlaps() {
247        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(3, 6)));
248        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(2, 3)));
249        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(4, 5)));
250        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(4, 4)));
251
252        assert!(!UntypedSpan::new(0, 3).overlaps_with(UntypedSpan::new(3, 5)));
253    }
254
255    #[test]
256    fn expands_properly() {
257        let mut span = UntypedSpan::new(2, 2);
258
259        span.expand_to_include(1);
260        assert_eq!(span, UntypedSpan::new(1, 2));
261
262        span.expand_to_include(2);
263        assert_eq!(span, UntypedSpan::new(1, 3));
264    }
265
266    #[test]
267    fn to_char_span_converts_correctly() {
268        let doc = Document::new_plain_english_curated("Hello world!");
269
270        // Empty span.
271        let token_span = Span::ZERO;
272        let converted = token_span.to_char_span(doc.get_tokens());
273        assert!(converted.is_empty());
274
275        // Span from `Expr`.
276        let token_span = SequenceExpr::default()
277            .then_any_word()
278            .t_ws()
279            .then_any_word()
280            .iter_matches_in_doc(&doc)
281            .next()
282            .unwrap();
283        let converted = token_span.to_char_span(doc.get_tokens());
284        assert_eq!(
285            converted.get_content_string(doc.get_source()),
286            "Hello world"
287        );
288    }
289}