harper_core/
span.rs

1use std::{fmt::Display, marker::PhantomData, ops::Range};
2
3use serde::{Deserialize, Serialize};
4
5use crate::Token;
6
7/// A window in a [`T`] sequence.
8///
9/// Note that the range covered by a [`Span`] is end-exclusive, meaning that the end index is not
10/// included in the range covered by the [`Span`]. If you're familiar with the Rust range syntax,
11/// you could say the span covers the equivalent of `start..end`, *not* `start..=end`.
12///
13/// For a [`Span`] to be correct, its end index must be greater than or equal to its start
14/// index. Creating or using a [`Span`] which does not follow this rule may lead to unexpected
15/// behavior or panics.
16///
17/// Although specific to `harper.js`, [this page may clear up any questions you have](https://writewithharper.com/docs/harperjs/spans).
18#[derive(Debug, Serialize, Deserialize, Default, PartialEq, Eq)]
19pub struct Span<T> {
20    /// The start index of the span.
21    pub start: usize,
22    /// The end index of the span.
23    ///
24    /// Note that [`Span`] represents an exclusive range. This means that a `Span::new(0, 5)` will
25    /// cover the values `0, 1, 2, 3, 4`; it will not cover the `5`.
26    pub end: usize,
27    #[serde(skip)]
28    span_type: PhantomData<T>,
29}
30
31impl<T> Span<T> {
32    /// An empty [`Span`].
33    pub const EMPTY: Self = Self {
34        start: 0,
35        end: 0,
36        span_type: PhantomData,
37    };
38
39    /// Creates a new [`Span`] with the provided start and end indices.
40    ///
41    /// # Panics
42    ///
43    /// This will panic if `start` is greater than `end`.
44    pub fn new(start: usize, end: usize) -> Self {
45        if start > end {
46            panic!("{start} > {end}");
47        }
48        Self {
49            start,
50            end,
51            span_type: PhantomData,
52        }
53    }
54
55    /// Creates a new [`Span`] from the provided start position and length.
56    pub fn new_with_len(start: usize, len: usize) -> Self {
57        Self {
58            start,
59            end: start + len,
60            span_type: PhantomData,
61        }
62    }
63
64    /// The length of the [`Span`].
65    pub fn len(&self) -> usize {
66        self.end - self.start
67    }
68
69    /// Checks whether the [`Span`] is empty.
70    ///
71    /// A [`Span`] is considered empty if it has a length of 0.
72    pub fn is_empty(&self) -> bool {
73        self.len() == 0
74    }
75
76    /// Checks whether `idx` is within the range of the span.
77    pub fn contains(&self, idx: usize) -> bool {
78        self.start <= idx && idx < self.end
79    }
80
81    /// Checks whether this span's range overlaps with `other`.
82    pub fn overlaps_with(&self, other: Self) -> bool {
83        (self.start < other.end) && (other.start < self.end)
84    }
85
86    /// Get the associated content. Will return [`None`] if the span is non-empty and any aspect is
87    /// invalid.
88    pub fn try_get_content<'a>(&self, source: &'a [T]) -> Option<&'a [T]> {
89        if self.is_empty() {
90            Some(&source[0..0])
91        } else {
92            source.get(self.start..self.end)
93        }
94    }
95
96    /// Expand the span by either modifying [`Self::start`] or [`Self::end`] to include the target
97    /// index.
98    ///
99    /// Does nothing if the span already includes the target.
100    pub fn expand_to_include(&mut self, target: usize) {
101        if target < self.start {
102            self.start = target;
103        } else if target >= self.end {
104            self.end = target + 1;
105        }
106    }
107
108    /// Return an expanded span by either modifying [`Self::start`] or [`Self::end`] to include the target
109    /// index.
110    pub fn expanded_to_include(&self, target: usize) -> Self {
111        let mut clone = *self;
112        clone.expand_to_include(target);
113        clone
114    }
115
116    /// Get the associated content. Will panic if any aspect is invalid.
117    pub fn get_content<'a>(&self, source: &'a [T]) -> &'a [T] {
118        match self.try_get_content(source) {
119            Some(v) => v,
120            None => panic!("Failed to get content for span."),
121        }
122    }
123
124    /// Set the span's length.
125    pub fn set_len(&mut self, length: usize) {
126        self.end = self.start + length;
127    }
128
129    /// Returns a copy of this [`Span`] with a new length.
130    pub fn with_len(&self, length: usize) -> Self {
131        let mut cloned = *self;
132        cloned.set_len(length);
133        cloned
134    }
135
136    /// Add an amount to both [`Self::start`] and [`Self::end`]
137    pub fn push_by(&mut self, by: usize) {
138        self.start += by;
139        self.end += by;
140    }
141
142    /// Subtract an amount from both [`Self::start`] and [`Self::end`]
143    pub fn pull_by(&mut self, by: usize) {
144        self.start -= by;
145        self.end -= by;
146    }
147
148    /// Add an amount to a copy of both [`Self::start`] and [`Self::end`]
149    pub fn pushed_by(&self, by: usize) -> Self {
150        let mut clone = *self;
151        clone.start += by;
152        clone.end += by;
153        clone
154    }
155
156    /// Subtract an amount to a copy of both [`Self::start`] and [`Self::end`]
157    pub fn pulled_by(&self, by: usize) -> Option<Self> {
158        if by > self.start {
159            return None;
160        }
161
162        let mut clone = *self;
163        clone.start -= by;
164        clone.end -= by;
165        Some(clone)
166    }
167}
168
169/// Additional functions for types that implement [`std::fmt::Debug`] and [`Display`].
170impl<T: Display + std::fmt::Debug> Span<T> {
171    /// Gets the content of this [`Span<T>`] as a [`String`].
172    pub fn get_content_string(&self, source: &[T]) -> String {
173        if let Some(content) = self.try_get_content(source) {
174            content.iter().map(|t| t.to_string()).collect()
175        } else {
176            panic!("Could not get position {self:?} within \"{source:?}\"")
177        }
178    }
179}
180
181/// Functionality specific to [`Token`] spans.
182impl Span<Token> {
183    /// Converts the [`Span<Token>`] into a [`Span<char>`].
184    ///
185    /// This requires knowing the character spans of the tokens covered by this
186    /// [`Span<Token>`]. Because of this, a reference to the source token sequence used to create
187    /// this span is required.
188    pub fn to_char_span(&self, source_document_tokens: &[Token]) -> Span<char> {
189        if self.is_empty() {
190            Span::EMPTY
191        } else {
192            let target_tokens = &source_document_tokens[self.start..self.end];
193            Span::new(
194                target_tokens.first().unwrap().span.start,
195                target_tokens.last().unwrap().span.end,
196            )
197        }
198    }
199}
200
201impl<T> From<Range<usize>> for Span<T> {
202    /// Reinterprets the provided [`std::ops::Range`] as a [`Span`].
203    fn from(value: Range<usize>) -> Self {
204        Self::new(value.start, value.end)
205    }
206}
207
208impl<T> From<Span<T>> for Range<usize> {
209    /// Converts the [`Span`] to an [`std::ops::Range`].
210    fn from(value: Span<T>) -> Self {
211        value.start..value.end
212    }
213}
214
215impl<T> IntoIterator for Span<T> {
216    type Item = usize;
217
218    type IntoIter = Range<usize>;
219
220    /// Converts the [`Span`] into an iterator that yields the indices covered by its range.
221    ///
222    /// Note that [`Span`] is half-open, meaning that the value [`Self::end`] will not be yielded
223    /// by this iterator: it will stop at the index immediately preceding [`Self::end`].
224    fn into_iter(self) -> Self::IntoIter {
225        self.start..self.end
226    }
227}
228
229impl<T> Clone for Span<T> {
230    // Note: manual implementation so we don't unnecessarily require `T` to impl `Clone`.
231    fn clone(&self) -> Self {
232        *self
233    }
234}
235impl<T> Copy for Span<T> {}
236
237#[cfg(test)]
238mod tests {
239    use crate::{
240        Document,
241        expr::{ExprExt, SequenceExpr},
242    };
243
244    use super::Span;
245
246    type UntypedSpan = Span<()>;
247
248    #[test]
249    fn overlaps() {
250        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(3, 6)));
251        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(2, 3)));
252        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(4, 5)));
253        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(4, 4)));
254
255        assert!(!UntypedSpan::new(0, 3).overlaps_with(UntypedSpan::new(3, 5)));
256    }
257
258    #[test]
259    fn expands_properly() {
260        let mut span = UntypedSpan::new(2, 2);
261
262        span.expand_to_include(1);
263        assert_eq!(span, UntypedSpan::new(1, 2));
264
265        span.expand_to_include(2);
266        assert_eq!(span, UntypedSpan::new(1, 3));
267    }
268
269    #[test]
270    fn to_char_span_converts_correctly() {
271        let doc = Document::new_plain_english_curated("Hello world!");
272
273        // Empty span.
274        let token_span = Span::EMPTY;
275        let converted = token_span.to_char_span(doc.get_tokens());
276        assert!(converted.is_empty());
277
278        // Span from `Expr`.
279        let token_span = SequenceExpr::default()
280            .then_any_word()
281            .t_ws()
282            .then_any_word()
283            .iter_matches_in_doc(&doc)
284            .next()
285            .unwrap();
286        let converted = token_span.to_char_span(doc.get_tokens());
287        assert_eq!(
288            converted.get_content_string(doc.get_source()),
289            "Hello world"
290        );
291    }
292}