harper_core/
span.rs

1use std::{fmt::Display, marker::PhantomData, ops::Range};
2
3use serde::{Deserialize, Serialize};
4
5use crate::Token;
6
7/// A window in a [`T`] sequence.
8///
9/// Note that the range covered by a [`Span`] is end-exclusive, meaning that the end index is not
10/// included in the range covered by the [`Span`]. If you're familiar with the Rust range syntax,
11/// you could say the span covers the equivalent of `start..end`, *not* `start..=end`.
12///
13/// For a [`Span`] to be correct, its end index must be greater than or equal to its start
14/// index. Creating or using a [`Span`] which does not follow this rule may lead to unexpected
15/// behavior or panics.
16///
17/// Although specific to `harper.js`, [this page may clear up any questions you have](https://writewithharper.com/docs/harperjs/spans).
18#[derive(Debug, Serialize, Deserialize, Default, PartialEq, Eq)]
19pub struct Span<T> {
20    /// The start index of the span.
21    pub start: usize,
22    /// The end index of the span.
23    ///
24    /// Note that [`Span`] represents an exclusive range. This means that a `Span::new(0, 5)` will
25    /// cover the values `0, 1, 2, 3, 4`; it will not cover the `5`.
26    pub end: usize,
27    span_type: PhantomData<T>,
28}
29
30impl<T> Span<T> {
31    /// An empty [`Span`].
32    pub const EMPTY: Self = Self {
33        start: 0,
34        end: 0,
35        span_type: PhantomData,
36    };
37
38    /// Creates a new [`Span`] with the provided start and end indices.
39    ///
40    /// # Panics
41    ///
42    /// This will panic if `start` is greater than `end`.
43    pub fn new(start: usize, end: usize) -> Self {
44        if start > end {
45            panic!("{start} > {end}");
46        }
47        Self {
48            start,
49            end,
50            span_type: PhantomData,
51        }
52    }
53
54    /// Creates a new [`Span`] from the provided start position and length.
55    pub fn new_with_len(start: usize, len: usize) -> Self {
56        Self {
57            start,
58            end: start + len,
59            span_type: PhantomData,
60        }
61    }
62
63    /// The length of the [`Span`].
64    pub fn len(&self) -> usize {
65        self.end - self.start
66    }
67
68    /// Checks whether the [`Span`] is empty.
69    ///
70    /// A [`Span`] is considered empty if it has a length of 0.
71    pub fn is_empty(&self) -> bool {
72        self.len() == 0
73    }
74
75    /// Checks whether `idx` is within the range of the span.
76    pub fn contains(&self, idx: usize) -> bool {
77        self.start <= idx && idx < self.end
78    }
79
80    /// Checks whether this span's range overlaps with `other`.
81    pub fn overlaps_with(&self, other: Self) -> bool {
82        (self.start < other.end) && (other.start < self.end)
83    }
84
85    /// Get the associated content. Will return [`None`] if the span is non-empty and any aspect is
86    /// invalid.
87    pub fn try_get_content<'a>(&self, source: &'a [T]) -> Option<&'a [T]> {
88        if self.is_empty() {
89            Some(&source[0..0])
90        } else {
91            source.get(self.start..self.end)
92        }
93    }
94
95    /// Expand the span by either modifying [`Self::start`] or [`Self::end`] to include the target
96    /// index.
97    ///
98    /// Does nothing if the span already includes the target.
99    pub fn expand_to_include(&mut self, target: usize) {
100        if target < self.start {
101            self.start = target;
102        } else if target >= self.end {
103            self.end = target + 1;
104        }
105    }
106
107    /// Get the associated content. Will panic if any aspect is invalid.
108    pub fn get_content<'a>(&self, source: &'a [T]) -> &'a [T] {
109        match self.try_get_content(source) {
110            Some(v) => v,
111            None => panic!("Failed to get content for span."),
112        }
113    }
114
115    /// Set the span's length.
116    pub fn set_len(&mut self, length: usize) {
117        self.end = self.start + length;
118    }
119
120    /// Returns a copy of this [`Span`] with a new length.
121    pub fn with_len(&self, length: usize) -> Self {
122        let mut cloned = *self;
123        cloned.set_len(length);
124        cloned
125    }
126
127    /// Add an amount to both [`Self::start`] and [`Self::end`]
128    pub fn push_by(&mut self, by: usize) {
129        self.start += by;
130        self.end += by;
131    }
132
133    /// Subtract an amount from both [`Self::start`] and [`Self::end`]
134    pub fn pull_by(&mut self, by: usize) {
135        self.start -= by;
136        self.end -= by;
137    }
138
139    /// Add an amount to a copy of both [`Self::start`] and [`Self::end`]
140    pub fn pushed_by(&self, by: usize) -> Self {
141        let mut clone = *self;
142        clone.start += by;
143        clone.end += by;
144        clone
145    }
146
147    /// Subtract an amount to a copy of both [`Self::start`] and [`Self::end`]
148    pub fn pulled_by(&self, by: usize) -> Option<Self> {
149        if by > self.start {
150            return None;
151        }
152
153        let mut clone = *self;
154        clone.start -= by;
155        clone.end -= by;
156        Some(clone)
157    }
158}
159
160/// Additional functions for types that implement [`std::fmt::Debug`] and [`Display`].
161impl<T: Display + std::fmt::Debug> Span<T> {
162    /// Gets the content of this [`Span<T>`] as a [`String`].
163    pub fn get_content_string(&self, source: &[T]) -> String {
164        if let Some(content) = self.try_get_content(source) {
165            content.iter().map(|t| t.to_string()).collect()
166        } else {
167            panic!("Could not get position {self:?} within \"{source:?}\"")
168        }
169    }
170}
171
172/// Functionality specific to [`Token`] spans.
173impl Span<Token> {
174    /// Converts the [`Span<Token>`] into a [`Span<char>`].
175    ///
176    /// This requires knowing the character spans of the tokens covered by this
177    /// [`Span<Token>`]. Because of this, a reference to the source token sequence used to create
178    /// this span is required.
179    pub fn to_char_span(&self, source_document_tokens: &[Token]) -> Span<char> {
180        if self.is_empty() {
181            Span::EMPTY
182        } else {
183            let target_tokens = &source_document_tokens[self.start..self.end];
184            Span::new(
185                target_tokens.first().unwrap().span.start,
186                target_tokens.last().unwrap().span.end,
187            )
188        }
189    }
190}
191
192impl<T> From<Range<usize>> for Span<T> {
193    /// Reinterprets the provided [`std::ops::Range`] as a [`Span`].
194    fn from(value: Range<usize>) -> Self {
195        Self::new(value.start, value.end)
196    }
197}
198
199impl<T> From<Span<T>> for Range<usize> {
200    /// Converts the [`Span`] to an [`std::ops::Range`].
201    fn from(value: Span<T>) -> Self {
202        value.start..value.end
203    }
204}
205
206impl<T> IntoIterator for Span<T> {
207    type Item = usize;
208
209    type IntoIter = Range<usize>;
210
211    /// Converts the [`Span`] into an iterator that yields the indices covered by its range.
212    ///
213    /// Note that [`Span`] is half-open, meaning that the value [`Self::end`] will not be yielded
214    /// by this iterator: it will stop at the index immediately preceding [`Self::end`].
215    fn into_iter(self) -> Self::IntoIter {
216        self.start..self.end
217    }
218}
219
220impl<T> Clone for Span<T> {
221    // Note: manual implementation so we don't unnecessarily require `T` to impl `Clone`.
222    fn clone(&self) -> Self {
223        *self
224    }
225}
226impl<T> Copy for Span<T> {}
227
228#[cfg(test)]
229mod tests {
230    use crate::{
231        Document,
232        expr::{ExprExt, SequenceExpr},
233    };
234
235    use super::Span;
236
237    type UntypedSpan = Span<()>;
238
239    #[test]
240    fn overlaps() {
241        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(3, 6)));
242        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(2, 3)));
243        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(4, 5)));
244        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(4, 4)));
245
246        assert!(!UntypedSpan::new(0, 3).overlaps_with(UntypedSpan::new(3, 5)));
247    }
248
249    #[test]
250    fn expands_properly() {
251        let mut span = UntypedSpan::new(2, 2);
252
253        span.expand_to_include(1);
254        assert_eq!(span, UntypedSpan::new(1, 2));
255
256        span.expand_to_include(2);
257        assert_eq!(span, UntypedSpan::new(1, 3));
258    }
259
260    #[test]
261    fn to_char_span_converts_correctly() {
262        let doc = Document::new_plain_english_curated("Hello world!");
263
264        // Empty span.
265        let token_span = Span::EMPTY;
266        let converted = token_span.to_char_span(doc.get_tokens());
267        assert!(converted.is_empty());
268
269        // Span from `Expr`.
270        let token_span = SequenceExpr::default()
271            .then_any_word()
272            .t_ws()
273            .then_any_word()
274            .iter_matches_in_doc(&doc)
275            .next()
276            .unwrap();
277        let converted = token_span.to_char_span(doc.get_tokens());
278        assert_eq!(
279            converted.get_content_string(doc.get_source()),
280            "Hello world"
281        );
282    }
283}