harper_core/
span.rs

1use std::{fmt::Display, marker::PhantomData, ops::Range};
2
3use serde::{Deserialize, Serialize};
4
5use crate::Token;
6
7/// A window in a [`T`] sequence.
8///
9/// Note that the range covered by a [`Span`] is end-exclusive, meaning that the end index is not
10/// included in the range covered by the [`Span`]. If you're familiar with the Rust range syntax,
11/// you could say the span covers the equivalent of `start..end`, *not* `start..=end`.
12///
13/// For a [`Span`] to be correct, its end index must be greater than or equal to its start
14/// index. Creating or using a [`Span`] which does not follow this rule may lead to unexpected
15/// behavior or panics.
16///
17/// Although specific to `harper.js`, [this page may clear up any questions you have](https://writewithharper.com/docs/harperjs/spans).
18#[derive(Debug, Serialize, Deserialize, Default, PartialEq, Eq)]
19pub struct Span<T> {
20    /// The start index of the span.
21    pub start: usize,
22    /// The end index of the span.
23    ///
24    /// Note that [`Span`] represents an exclusive range. This means that a `Span::new(0, 5)` will
25    /// cover the values `0, 1, 2, 3, 4`; it will not cover the `5`.
26    pub end: usize,
27    #[serde(skip)]
28    span_type: PhantomData<T>,
29}
30
31impl<T> Span<T> {
32    /// An empty [`Span`].
33    pub const EMPTY: Self = Self {
34        start: 0,
35        end: 0,
36        span_type: PhantomData,
37    };
38
39    /// Creates a new [`Span`] with the provided start and end indices.
40    ///
41    /// # Panics
42    ///
43    /// This will panic if `start` is greater than `end`.
44    pub fn new(start: usize, end: usize) -> Self {
45        if start > end {
46            panic!("{start} > {end}");
47        }
48        Self {
49            start,
50            end,
51            span_type: PhantomData,
52        }
53    }
54
55    /// Creates a new [`Span`] from the provided start position and length.
56    pub fn new_with_len(start: usize, len: usize) -> Self {
57        Self {
58            start,
59            end: start + len,
60            span_type: PhantomData,
61        }
62    }
63
64    /// The length of the [`Span`].
65    pub fn len(&self) -> usize {
66        self.end - self.start
67    }
68
69    /// Checks whether the [`Span`] is empty.
70    ///
71    /// A [`Span`] is considered empty if it has a length of 0.
72    pub fn is_empty(&self) -> bool {
73        self.len() == 0
74    }
75
76    /// Checks whether `idx` is within the range of the span.
77    pub fn contains(&self, idx: usize) -> bool {
78        self.start <= idx && idx < self.end
79    }
80
81    /// Checks whether this span's range overlaps with `other`.
82    pub fn overlaps_with(&self, other: Self) -> bool {
83        (self.start < other.end) && (other.start < self.end)
84    }
85
86    /// Get the associated content. Will return [`None`] if the span is non-empty and any aspect is
87    /// invalid.
88    pub fn try_get_content<'a>(&self, source: &'a [T]) -> Option<&'a [T]> {
89        if self.is_empty() {
90            Some(&source[0..0])
91        } else {
92            source.get(self.start..self.end)
93        }
94    }
95
96    /// Expand the span by either modifying [`Self::start`] or [`Self::end`] to include the target
97    /// index.
98    ///
99    /// Does nothing if the span already includes the target.
100    pub fn expand_to_include(&mut self, target: usize) {
101        if target < self.start {
102            self.start = target;
103        } else if target >= self.end {
104            self.end = target + 1;
105        }
106    }
107
108    /// Get the associated content. Will panic if any aspect is invalid.
109    pub fn get_content<'a>(&self, source: &'a [T]) -> &'a [T] {
110        match self.try_get_content(source) {
111            Some(v) => v,
112            None => panic!("Failed to get content for span."),
113        }
114    }
115
116    /// Set the span's length.
117    pub fn set_len(&mut self, length: usize) {
118        self.end = self.start + length;
119    }
120
121    /// Returns a copy of this [`Span`] with a new length.
122    pub fn with_len(&self, length: usize) -> Self {
123        let mut cloned = *self;
124        cloned.set_len(length);
125        cloned
126    }
127
128    /// Add an amount to both [`Self::start`] and [`Self::end`]
129    pub fn push_by(&mut self, by: usize) {
130        self.start += by;
131        self.end += by;
132    }
133
134    /// Subtract an amount from both [`Self::start`] and [`Self::end`]
135    pub fn pull_by(&mut self, by: usize) {
136        self.start -= by;
137        self.end -= by;
138    }
139
140    /// Add an amount to a copy of both [`Self::start`] and [`Self::end`]
141    pub fn pushed_by(&self, by: usize) -> Self {
142        let mut clone = *self;
143        clone.start += by;
144        clone.end += by;
145        clone
146    }
147
148    /// Subtract an amount to a copy of both [`Self::start`] and [`Self::end`]
149    pub fn pulled_by(&self, by: usize) -> Option<Self> {
150        if by > self.start {
151            return None;
152        }
153
154        let mut clone = *self;
155        clone.start -= by;
156        clone.end -= by;
157        Some(clone)
158    }
159}
160
161/// Additional functions for types that implement [`std::fmt::Debug`] and [`Display`].
162impl<T: Display + std::fmt::Debug> Span<T> {
163    /// Gets the content of this [`Span<T>`] as a [`String`].
164    pub fn get_content_string(&self, source: &[T]) -> String {
165        if let Some(content) = self.try_get_content(source) {
166            content.iter().map(|t| t.to_string()).collect()
167        } else {
168            panic!("Could not get position {self:?} within \"{source:?}\"")
169        }
170    }
171}
172
173/// Functionality specific to [`Token`] spans.
174impl Span<Token> {
175    /// Converts the [`Span<Token>`] into a [`Span<char>`].
176    ///
177    /// This requires knowing the character spans of the tokens covered by this
178    /// [`Span<Token>`]. Because of this, a reference to the source token sequence used to create
179    /// this span is required.
180    pub fn to_char_span(&self, source_document_tokens: &[Token]) -> Span<char> {
181        if self.is_empty() {
182            Span::EMPTY
183        } else {
184            let target_tokens = &source_document_tokens[self.start..self.end];
185            Span::new(
186                target_tokens.first().unwrap().span.start,
187                target_tokens.last().unwrap().span.end,
188            )
189        }
190    }
191}
192
193impl<T> From<Range<usize>> for Span<T> {
194    /// Reinterprets the provided [`std::ops::Range`] as a [`Span`].
195    fn from(value: Range<usize>) -> Self {
196        Self::new(value.start, value.end)
197    }
198}
199
200impl<T> From<Span<T>> for Range<usize> {
201    /// Converts the [`Span`] to an [`std::ops::Range`].
202    fn from(value: Span<T>) -> Self {
203        value.start..value.end
204    }
205}
206
207impl<T> IntoIterator for Span<T> {
208    type Item = usize;
209
210    type IntoIter = Range<usize>;
211
212    /// Converts the [`Span`] into an iterator that yields the indices covered by its range.
213    ///
214    /// Note that [`Span`] is half-open, meaning that the value [`Self::end`] will not be yielded
215    /// by this iterator: it will stop at the index immediately preceding [`Self::end`].
216    fn into_iter(self) -> Self::IntoIter {
217        self.start..self.end
218    }
219}
220
221impl<T> Clone for Span<T> {
222    // Note: manual implementation so we don't unnecessarily require `T` to impl `Clone`.
223    fn clone(&self) -> Self {
224        *self
225    }
226}
227impl<T> Copy for Span<T> {}
228
229#[cfg(test)]
230mod tests {
231    use crate::{
232        Document,
233        expr::{ExprExt, SequenceExpr},
234    };
235
236    use super::Span;
237
238    type UntypedSpan = Span<()>;
239
240    #[test]
241    fn overlaps() {
242        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(3, 6)));
243        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(2, 3)));
244        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(4, 5)));
245        assert!(UntypedSpan::new(0, 5).overlaps_with(UntypedSpan::new(4, 4)));
246
247        assert!(!UntypedSpan::new(0, 3).overlaps_with(UntypedSpan::new(3, 5)));
248    }
249
250    #[test]
251    fn expands_properly() {
252        let mut span = UntypedSpan::new(2, 2);
253
254        span.expand_to_include(1);
255        assert_eq!(span, UntypedSpan::new(1, 2));
256
257        span.expand_to_include(2);
258        assert_eq!(span, UntypedSpan::new(1, 3));
259    }
260
261    #[test]
262    fn to_char_span_converts_correctly() {
263        let doc = Document::new_plain_english_curated("Hello world!");
264
265        // Empty span.
266        let token_span = Span::EMPTY;
267        let converted = token_span.to_char_span(doc.get_tokens());
268        assert!(converted.is_empty());
269
270        // Span from `Expr`.
271        let token_span = SequenceExpr::default()
272            .then_any_word()
273            .t_ws()
274            .then_any_word()
275            .iter_matches_in_doc(&doc)
276            .next()
277            .unwrap();
278        let converted = token_span.to_char_span(doc.get_tokens());
279        assert_eq!(
280            converted.get_content_string(doc.get_source()),
281            "Hello world"
282        );
283    }
284}