harper_core/
span.rs

1use std::{fmt::Display, marker::PhantomData, ops::Range};
2
3use serde::{Deserialize, Serialize};
4
5use crate::Token;
6
7/// A window in a [`T`] sequence.
8///
9/// Note that the range covered by a [`Span`] is end-exclusive, meaning that the end index is not
10/// included in the range covered by the [`Span`]. If you're familiar with the Rust range syntax,
11/// you could say the span covers the equivalent of `start..end`, *not* `start..=end`.
12///
13/// For a [`Span`] to be correct, its end index must be greater than or equal to its start
14/// index. Creating or using a [`Span`] which does not follow this rule may lead to unexpected
15/// behavior or panics.
16///
17/// Although specific to `harper.js`, [this page may clear up any questions you have](https://writewithharper.com/docs/harperjs/spans).
18#[derive(Debug, Serialize, Deserialize, Default, PartialEq, Eq)]
19pub struct Span<T> {
20    /// The start index of the span.
21    pub start: usize,
22    /// The end index of the span.
23    ///
24    /// Note that [`Span`] represents an exclusive range. This means that a `Span::new(0, 5)` will
25    /// cover the values `0, 1, 2, 3, 4`; it will not cover the `5`.
26    pub end: usize,
27    span_type: PhantomData<T>,
28}
29
30impl<T> Span<T> {
31    /// An empty [`Span`].
32    pub const EMPTY: Self = Self {
33        start: 0,
34        end: 0,
35        span_type: PhantomData,
36    };
37
38    /// Creates a new [`Span`] with the provided start and end indices.
39    ///
40    /// # Panics
41    ///
42    /// This will panic if `start` is greater than `end`.
43    pub fn new(start: usize, end: usize) -> Self {
44        if start > end {
45            panic!("{start} > {end}");
46        }
47        Self {
48            start,
49            end,
50            span_type: PhantomData,
51        }
52    }
53
54    /// Creates a new [`Span`] from the provided start position and length.
55    pub fn new_with_len(start: usize, len: usize) -> Self {
56        Self {
57            start,
58            end: start + len,
59            span_type: PhantomData,
60        }
61    }
62
63    /// The length of the [`Span`].
64    pub fn len(&self) -> usize {
65        self.end - self.start
66    }
67
68    /// Checks whether the [`Span`] is empty.
69    ///
70    /// A [`Span`] is considered empty if it has a length of 0.
71    pub fn is_empty(&self) -> bool {
72        self.len() == 0
73    }
74
75    /// Checks whether `idx` is within the range of the span.
76    pub fn contains(&self, idx: usize) -> bool {
77        assert!(self.start <= self.end);
78
79        self.start <= idx && idx < self.end
80    }
81
82    /// Checks whether this span's range overlaps with `other`.
83    pub fn overlaps_with(&self, other: Self) -> bool {
84        (self.start < other.end) && (other.start < self.end)
85    }
86
87    /// Get the associated content. Will return [`None`] if any aspect is
88    /// invalid.
89    pub fn try_get_content<'a>(&self, source: &'a [T]) -> Option<&'a [T]> {
90        if (self.start > self.end) || (self.start >= source.len()) || (self.end > source.len()) {
91            if self.is_empty() {
92                return Some(&source[0..0]);
93            }
94            return None;
95        }
96
97        Some(&source[self.start..self.end])
98    }
99
100    /// Expand the span by either modifying [`Self::start`] or [`Self::end`] to include the target
101    /// index.
102    ///
103    /// Does nothing if the span already includes the target.
104    pub fn expand_to_include(&mut self, target: usize) {
105        if target < self.start {
106            self.start = target;
107        } else if target >= self.end {
108            self.end = target + 1;
109        }
110    }
111
112    /// Get the associated content. Will panic if any aspect is invalid.
113    pub fn get_content<'a>(&self, source: &'a [T]) -> &'a [T] {
114        match self.try_get_content(source) {
115            Some(v) => v,
116            None => panic!("Failed to get content for span."),
117        }
118    }
119
120    /// Set the span's length.
121    pub fn set_len(&mut self, length: usize) {
122        self.end = self.start + length;
123    }
124
125    /// Returns a copy of this [`Span`] with a new length.
126    pub fn with_len(&self, length: usize) -> Self {
127        let mut cloned = *self;
128        cloned.set_len(length);
129        cloned
130    }
131
132    /// Add an amount to both [`Self::start`] and [`Self::end`]
133    pub fn push_by(&mut self, by: usize) {
134        self.start += by;
135        self.end += by;
136    }
137
138    /// Subtract an amount from both [`Self::start`] and [`Self::end`]
139    pub fn pull_by(&mut self, by: usize) {
140        self.start -= by;
141        self.end -= by;
142    }
143
144    /// Add an amount to a copy of both [`Self::start`] and [`Self::end`]
145    pub fn pushed_by(&self, by: usize) -> Self {
146        let mut clone = *self;
147        clone.start += by;
148        clone.end += by;
149        clone
150    }
151
152    /// Subtract an amount to a copy of both [`Self::start`] and [`Self::end`]
153    pub fn pulled_by(&self, by: usize) -> Option<Self> {
154        if by > self.start {
155            return None;
156        }
157
158        let mut clone = *self;
159        clone.start -= by;
160        clone.end -= by;
161        Some(clone)
162    }
163
164    /// Add an amount to a copy of both [`Self::start`] and [`Self::end`]
165    pub fn with_offset(&self, by: usize) -> Self {
166        let mut clone = *self;
167        clone.push_by(by);
168        clone
169    }
170}
171
172/// Additional functions for types that implement [`std::fmt::Debug`] and [`Display`].
173impl<T: Display + std::fmt::Debug> Span<T> {
174    /// Gets the content of this [`Span<T>`] as a [`String`].
175    pub fn get_content_string(&self, source: &[T]) -> String {
176        if let Some(content) = self.try_get_content(source) {
177            content.iter().map(|t| t.to_string()).collect()
178        } else {
179            panic!("Could not get position {self:?} within \"{source:?}\"")
180        }
181    }
182}
183
184/// Functionality specific to [`Token`] spans.
185impl Span<Token> {
186    /// Converts the [`Span<Token>`] into a [`Span<char>`].
187    ///
188    /// This requires knowing the character spans of the tokens covered by this
189    /// [`Span<Token>`]. Because of this, a reference to the source token sequence used to create
190    /// this span is required.
191    pub fn to_char_span(&self, source_document_tokens: &[Token]) -> Span<char> {
192        if self.is_empty() {
193            Span::EMPTY
194        } else {
195            let target_tokens = &source_document_tokens[self.start..self.end];
196            Span::new(
197                target_tokens.first().unwrap().span.start,
198                target_tokens.last().unwrap().span.end,
199            )
200        }
201    }
202}
203
204impl<T> From<Range<usize>> for Span<T> {
205    /// Reinterprets the provided [`std::ops::Range`] as a [`Span`].
206    fn from(value: Range<usize>) -> Self {
207        Self::new(value.start, value.end)
208    }
209}
210
211impl<T> From<Span<T>> for Range<usize> {
212    /// Converts the [`Span`] to an [`std::ops::Range`].
213    fn from(value: Span<T>) -> Self {
214        value.start..value.end
215    }
216}
217
218impl<T> IntoIterator for Span<T> {
219    type Item = usize;
220
221    type IntoIter = Range<usize>;
222
223    /// Converts the [`Span`] into an iterator that yields the indices covered by its range.
224    ///
225    /// Note that [`Span`] is half-open, meaning that the value [`Self::end`] will not be yielded
226    /// by this iterator: it will stop at the index immediately preceding [`Self::end`].
227    fn into_iter(self) -> Self::IntoIter {
228        self.start..self.end
229    }
230}
231
232impl<T> Clone for Span<T> {
233    // Note: manual implementation so we don't unnecessarily require `T` to impl `Clone`.
234    fn clone(&self) -> Self {
235        *self
236    }
237}
238impl<T> Copy for Span<T> {}
239
240#[cfg(test)]
241mod tests {
242    use crate::{
243        Document,
244        expr::{ExprExt, SequenceExpr},
245    };
246
247    use super::Span;
248
249    type UntypedSpan = Span<()>;
250
251    #[test]
252    fn overlaps() {
253        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(3, 6)));
254        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(2, 3)));
255        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(4, 5)));
256        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(4, 4)));
257
258        assert!(!UntypedSpan::new(0, 3).overlaps_with(UntypedSpan::new(3, 5)));
259    }
260
261    #[test]
262    fn expands_properly() {
263        let mut span = UntypedSpan::new(2, 2);
264
265        span.expand_to_include(1);
266        assert_eq!(span, UntypedSpan::new(1, 2));
267
268        span.expand_to_include(2);
269        assert_eq!(span, UntypedSpan::new(1, 3));
270    }
271
272    #[test]
273    fn to_char_span_converts_correctly() {
274        let doc = Document::new_plain_english_curated("Hello world!");
275
276        // Empty span.
277        let token_span = Span::EMPTY;
278        let converted = token_span.to_char_span(doc.get_tokens());
279        assert!(converted.is_empty());
280
281        // Span from `Expr`.
282        let token_span = SequenceExpr::default()
283            .then_any_word()
284            .t_ws()
285            .then_any_word()
286            .iter_matches_in_doc(&doc)
287            .next()
288            .unwrap();
289        let converted = token_span.to_char_span(doc.get_tokens());
290        assert_eq!(
291            converted.get_content_string(doc.get_source()),
292            "Hello world"
293        );
294    }
295}