typst_syntax/
span.rs

1use std::fmt::{self, Debug, Formatter};
2use std::num::{NonZeroU16, NonZeroU64};
3use std::ops::Range;
4
5use ecow::EcoString;
6
7use crate::FileId;
8
9/// Defines a range in a file.
10///
11/// This is used throughout the compiler to track which source section an
12/// element stems from or an error applies to.
13///
14/// - The [`.id()`](Self::id) function can be used to get the `FileId` for the
15///   span and, by extension, its file system path.
16/// - The `WorldExt::range` function can be used to map the span to a
17///   `Range<usize>`.
18///
19/// This type takes up 8 bytes and is copyable and null-optimized (i.e.
20/// `Option<Span>` also takes 8 bytes).
21///
22/// Spans come in two flavors: Numbered spans and raw range spans. The
23/// `WorldExt::range` function automatically handles both cases, yielding a
24/// `Range<usize>`.
25///
26/// # Numbered spans
27/// Typst source files use _numbered spans._ Rather than using byte ranges,
28/// which shift a lot as you type, each AST node gets a unique number.
29///
30/// During editing, the span numbers stay mostly stable, even for nodes behind
31/// an insertion. This is not true for simple ranges as they would shift. Spans
32/// can be used as inputs to memoized functions without hurting cache
33/// performance when text is inserted somewhere in the document other than the
34/// end.
35///
36/// Span ids are ordered in the syntax tree to enable quickly finding the node
37/// with some id:
38/// - The id of a parent is always smaller than the ids of any of its children.
39/// - The id of a node is always greater than any id in the subtrees of any left
40///   sibling and smaller than any id in the subtrees of any right sibling.
41///
42/// # Raw range spans
43/// Non Typst-files use raw ranges instead of numbered spans. The maximum
44/// encodable value for start and end is 2^23. Larger values will be saturated.
45#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
46pub struct Span(NonZeroU64);
47
48impl Span {
49    /// The full range of numbers available for source file span numbering.
50    pub(crate) const FULL: Range<u64> = 2..(1 << 47);
51
52    /// The value reserved for the detached span.
53    const DETACHED: u64 = 1;
54
55    /// Data layout:
56    /// | 16 bits file id | 48 bits number |
57    ///
58    /// Number =
59    /// - 1 means detached
60    /// - 2..2^47-1 is a numbered span
61    /// - 2^47..2^48-1 is a raw range span. To retrieve it, you must subtract
62    ///   `RANGE_BASE` and then use shifting/bitmasking to extract the
63    ///   components.
64    const NUMBER_BITS: usize = 48;
65    const FILE_ID_SHIFT: usize = Self::NUMBER_BITS;
66    const NUMBER_MASK: u64 = (1 << Self::NUMBER_BITS) - 1;
67    const RANGE_BASE: u64 = Self::FULL.end;
68    const RANGE_PART_BITS: usize = 23;
69    const RANGE_PART_SHIFT: usize = Self::RANGE_PART_BITS;
70    const RANGE_PART_MASK: u64 = (1 << Self::RANGE_PART_BITS) - 1;
71
72    /// Create a span that does not point into any file.
73    pub const fn detached() -> Self {
74        match NonZeroU64::new(Self::DETACHED) {
75            Some(v) => Self(v),
76            None => unreachable!(),
77        }
78    }
79
80    /// Create a new span from a file id and a number.
81    ///
82    /// Returns `None` if `number` is not contained in `FULL`.
83    pub(crate) const fn from_number(id: FileId, number: u64) -> Option<Self> {
84        if number < Self::FULL.start || number >= Self::FULL.end {
85            return None;
86        }
87        Some(Self::pack(id, number))
88    }
89
90    /// Create a new span from a raw byte range instead of a span number.
91    ///
92    /// If one of the range's parts exceeds the maximum value (2^23), it is
93    /// saturated.
94    pub const fn from_range(id: FileId, range: Range<usize>) -> Self {
95        let max = 1 << Self::RANGE_PART_BITS;
96        let start = if range.start > max { max } else { range.start } as u64;
97        let end = if range.end > max { max } else { range.end } as u64;
98        let number = (start << Self::RANGE_PART_SHIFT) | end;
99        Self::pack(id, Self::RANGE_BASE + number)
100    }
101
102    /// Construct from a raw number.
103    ///
104    /// Should only be used with numbers retrieved via
105    /// [`into_raw`](Self::into_raw). Misuse may results in panics, but no
106    /// unsafety.
107    pub const fn from_raw(v: NonZeroU64) -> Self {
108        Self(v)
109    }
110
111    /// Pack a file ID and the low bits into a span.
112    const fn pack(id: FileId, low: u64) -> Self {
113        let bits = ((id.into_raw().get() as u64) << Self::FILE_ID_SHIFT) | low;
114        match NonZeroU64::new(bits) {
115            Some(v) => Self(v),
116            // The file ID is non-zero.
117            None => unreachable!(),
118        }
119    }
120
121    /// Whether the span is detached.
122    pub const fn is_detached(self) -> bool {
123        self.0.get() == Self::DETACHED
124    }
125
126    /// The id of the file the span points into.
127    ///
128    /// Returns `None` if the span is detached.
129    pub const fn id(self) -> Option<FileId> {
130        // Detached span has only zero high bits, so it will trigger the
131        // `None` case.
132        match NonZeroU16::new((self.0.get() >> Self::FILE_ID_SHIFT) as u16) {
133            Some(v) => Some(FileId::from_raw(v)),
134            None => None,
135        }
136    }
137
138    /// The unique number of the span within its [`Source`](crate::Source).
139    pub(crate) const fn number(self) -> u64 {
140        self.0.get() & Self::NUMBER_MASK
141    }
142
143    /// Extract a raw byte range from the span, if it is a raw range span.
144    ///
145    /// Typically, you should use `WorldExt::range` instead.
146    pub const fn range(self) -> Option<Range<usize>> {
147        let Some(number) = self.number().checked_sub(Self::RANGE_BASE) else {
148            return None;
149        };
150
151        let start = (number >> Self::RANGE_PART_SHIFT) as usize;
152        let end = (number & Self::RANGE_PART_MASK) as usize;
153        Some(start..end)
154    }
155
156    /// Extract the raw underlying number.
157    pub const fn into_raw(self) -> NonZeroU64 {
158        self.0
159    }
160
161    /// Return `other` if `self` is detached and `self` otherwise.
162    pub fn or(self, other: Self) -> Self {
163        if self.is_detached() {
164            other
165        } else {
166            self
167        }
168    }
169
170    /// Find the first non-detached span in the iterator.
171    pub fn find(iter: impl IntoIterator<Item = Self>) -> Self {
172        iter.into_iter()
173            .find(|span| !span.is_detached())
174            .unwrap_or(Span::detached())
175    }
176
177    /// Resolve a file location relative to this span's source.
178    pub fn resolve_path(self, path: &str) -> Result<FileId, EcoString> {
179        let Some(file) = self.id() else {
180            return Err("cannot access file system from here".into());
181        };
182        Ok(file.join(path))
183    }
184}
185
186/// A value with a span locating it in the source code.
187#[derive(Copy, Clone, Eq, PartialEq, Hash)]
188pub struct Spanned<T> {
189    /// The spanned value.
190    pub v: T,
191    /// The value's location in source code.
192    pub span: Span,
193}
194
195impl<T> Spanned<T> {
196    /// Create a new instance from a value and its span.
197    pub fn new(v: T, span: Span) -> Self {
198        Self { v, span }
199    }
200
201    /// Convert from `&Spanned<T>` to `Spanned<&T>`
202    pub fn as_ref(&self) -> Spanned<&T> {
203        Spanned { v: &self.v, span: self.span }
204    }
205
206    /// Map the value using a function.
207    pub fn map<F, U>(self, f: F) -> Spanned<U>
208    where
209        F: FnOnce(T) -> U,
210    {
211        Spanned { v: f(self.v), span: self.span }
212    }
213}
214
215impl<T: Debug> Debug for Spanned<T> {
216    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
217        self.v.fmt(f)
218    }
219}
220
221#[cfg(test)]
222mod tests {
223    use std::num::NonZeroU16;
224    use std::ops::Range;
225
226    use crate::{FileId, Span};
227
228    #[test]
229    fn test_span_detached() {
230        let span = Span::detached();
231        assert!(span.is_detached());
232        assert_eq!(span.id(), None);
233        assert_eq!(span.range(), None);
234    }
235
236    #[test]
237    fn test_span_number_encoding() {
238        let id = FileId::from_raw(NonZeroU16::new(5).unwrap());
239        let span = Span::from_number(id, 10).unwrap();
240        assert_eq!(span.id(), Some(id));
241        assert_eq!(span.number(), 10);
242        assert_eq!(span.range(), None);
243    }
244
245    #[test]
246    fn test_span_range_encoding() {
247        let id = FileId::from_raw(NonZeroU16::new(u16::MAX).unwrap());
248        let roundtrip = |range: Range<usize>| {
249            let span = Span::from_range(id, range.clone());
250            assert_eq!(span.id(), Some(id));
251            assert_eq!(span.range(), Some(range));
252        };
253
254        roundtrip(0..0);
255        roundtrip(177..233);
256        roundtrip(0..8388607);
257        roundtrip(8388606..8388607);
258    }
259}