typst_syntax/
span.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
use std::fmt::{self, Debug, Formatter};
use std::num::{NonZeroU16, NonZeroU64};
use std::ops::Range;

use ecow::EcoString;

use crate::FileId;

/// Defines a range in a file.
///
/// This is used throughout the compiler to track which source section an
/// element stems from or an error applies to.
///
/// - The [`.id()`](Self::id) function can be used to get the `FileId` for the
///   span and, by extension, its file system path.
/// - The `WorldExt::range` function can be used to map the span to a
///   `Range<usize>`.
///
/// This type takes up 8 bytes and is copyable and null-optimized (i.e.
/// `Option<Span>` also takes 8 bytes).
///
/// Spans come in two flavors: Numbered spans and raw range spans. The
/// `WorldExt::range` function automatically handles both cases, yielding a
/// `Range<usize>`.
///
/// # Numbered spans
/// Typst source files use _numbered spans._ Rather than using byte ranges,
/// which shift a lot as you type, each AST node gets a unique number.
///
/// During editing, the span numbers stay mostly stable, even for nodes behind
/// an insertion. This is not true for simple ranges as they would shift. Spans
/// can be used as inputs to memoized functions without hurting cache
/// performance when text is inserted somewhere in the document other than the
/// end.
///
/// Span ids are ordered in the syntax tree to enable quickly finding the node
/// with some id:
/// - The id of a parent is always smaller than the ids of any of its children.
/// - The id of a node is always greater than any id in the subtrees of any left
///   sibling and smaller than any id in the subtrees of any right sibling.
///
/// # Raw range spans
/// Non Typst-files use raw ranges instead of numbered spans. The maximum
/// encodable value for start and end is 2^23. Larger values will be saturated.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub struct Span(NonZeroU64);

impl Span {
    /// The full range of numbers available for source file span numbering.
    pub(crate) const FULL: Range<u64> = 2..(1 << 47);

    /// The value reserved for the detached span.
    const DETACHED: u64 = 1;

    /// Data layout:
    /// | 16 bits file id | 48 bits number |
    ///
    /// Number =
    /// - 1 means detached
    /// - 2..2^47-1 is a numbered span
    /// - 2^47..2^48-1 is a raw range span. To retrieve it, you must subtract
    ///   `RANGE_BASE` and then use shifting/bitmasking to extract the
    ///   components.
    const NUMBER_BITS: usize = 48;
    const FILE_ID_SHIFT: usize = Self::NUMBER_BITS;
    const NUMBER_MASK: u64 = (1 << Self::NUMBER_BITS) - 1;
    const RANGE_BASE: u64 = Self::FULL.end;
    const RANGE_PART_BITS: usize = 23;
    const RANGE_PART_SHIFT: usize = Self::RANGE_PART_BITS;
    const RANGE_PART_MASK: u64 = (1 << Self::RANGE_PART_BITS) - 1;

    /// Create a span that does not point into any file.
    pub const fn detached() -> Self {
        match NonZeroU64::new(Self::DETACHED) {
            Some(v) => Self(v),
            None => unreachable!(),
        }
    }

    /// Create a new span from a file id and a number.
    ///
    /// Returns `None` if `number` is not contained in `FULL`.
    pub(crate) const fn from_number(id: FileId, number: u64) -> Option<Self> {
        if number < Self::FULL.start || number >= Self::FULL.end {
            return None;
        }
        Some(Self::pack(id, number))
    }

    /// Create a new span from a raw byte range instead of a span number.
    ///
    /// If one of the range's parts exceeds the maximum value (2^23), it is
    /// saturated.
    pub const fn from_range(id: FileId, range: Range<usize>) -> Self {
        let max = 1 << Self::RANGE_PART_BITS;
        let start = if range.start > max { max } else { range.start } as u64;
        let end = if range.end > max { max } else { range.end } as u64;
        let number = (start << Self::RANGE_PART_SHIFT) | end;
        Self::pack(id, Self::RANGE_BASE + number)
    }

    /// Construct from a raw number.
    ///
    /// Should only be used with numbers retrieved via
    /// [`into_raw`](Self::into_raw). Misuse may results in panics, but no
    /// unsafety.
    pub const fn from_raw(v: NonZeroU64) -> Self {
        Self(v)
    }

    /// Pack a file ID and the low bits into a span.
    const fn pack(id: FileId, low: u64) -> Self {
        let bits = ((id.into_raw().get() as u64) << Self::FILE_ID_SHIFT) | low;
        match NonZeroU64::new(bits) {
            Some(v) => Self(v),
            // The file ID is non-zero.
            None => unreachable!(),
        }
    }

    /// Whether the span is detached.
    pub const fn is_detached(self) -> bool {
        self.0.get() == Self::DETACHED
    }

    /// The id of the file the span points into.
    ///
    /// Returns `None` if the span is detached.
    pub const fn id(self) -> Option<FileId> {
        // Detached span has only zero high bits, so it will trigger the
        // `None` case.
        match NonZeroU16::new((self.0.get() >> Self::FILE_ID_SHIFT) as u16) {
            Some(v) => Some(FileId::from_raw(v)),
            None => None,
        }
    }

    /// The unique number of the span within its [`Source`](crate::Source).
    pub(crate) const fn number(self) -> u64 {
        self.0.get() & Self::NUMBER_MASK
    }

    /// Extract a raw byte range from the span, if it is a raw range span.
    ///
    /// Typically, you should use `WorldExt::range` instead.
    pub const fn range(self) -> Option<Range<usize>> {
        let Some(number) = self.number().checked_sub(Self::RANGE_BASE) else {
            return None;
        };

        let start = (number >> Self::RANGE_PART_SHIFT) as usize;
        let end = (number & Self::RANGE_PART_MASK) as usize;
        Some(start..end)
    }

    /// Extract the raw underlying number.
    pub const fn into_raw(self) -> NonZeroU64 {
        self.0
    }

    /// Return `other` if `self` is detached and `self` otherwise.
    pub fn or(self, other: Self) -> Self {
        if self.is_detached() {
            other
        } else {
            self
        }
    }

    /// Find the first non-detached span in the iterator.
    pub fn find(iter: impl IntoIterator<Item = Self>) -> Self {
        iter.into_iter()
            .find(|span| !span.is_detached())
            .unwrap_or(Span::detached())
    }

    /// Resolve a file location relative to this span's source.
    pub fn resolve_path(self, path: &str) -> Result<FileId, EcoString> {
        let Some(file) = self.id() else {
            return Err("cannot access file system from here".into());
        };
        Ok(file.join(path))
    }
}

/// A value with a span locating it in the source code.
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
pub struct Spanned<T> {
    /// The spanned value.
    pub v: T,
    /// The value's location in source code.
    pub span: Span,
}

impl<T> Spanned<T> {
    /// Create a new instance from a value and its span.
    pub fn new(v: T, span: Span) -> Self {
        Self { v, span }
    }

    /// Convert from `&Spanned<T>` to `Spanned<&T>`
    pub fn as_ref(&self) -> Spanned<&T> {
        Spanned { v: &self.v, span: self.span }
    }

    /// Map the value using a function.
    pub fn map<F, U>(self, f: F) -> Spanned<U>
    where
        F: FnOnce(T) -> U,
    {
        Spanned { v: f(self.v), span: self.span }
    }
}

impl<T: Debug> Debug for Spanned<T> {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
        self.v.fmt(f)
    }
}

#[cfg(test)]
mod tests {
    use std::num::NonZeroU16;
    use std::ops::Range;

    use crate::{FileId, Span};

    #[test]
    fn test_span_detached() {
        let span = Span::detached();
        assert!(span.is_detached());
        assert_eq!(span.id(), None);
        assert_eq!(span.range(), None);
    }

    #[test]
    fn test_span_number_encoding() {
        let id = FileId::from_raw(NonZeroU16::new(5).unwrap());
        let span = Span::from_number(id, 10).unwrap();
        assert_eq!(span.id(), Some(id));
        assert_eq!(span.number(), 10);
        assert_eq!(span.range(), None);
    }

    #[test]
    fn test_span_range_encoding() {
        let id = FileId::from_raw(NonZeroU16::new(u16::MAX).unwrap());
        let roundtrip = |range: Range<usize>| {
            let span = Span::from_range(id, range.clone());
            assert_eq!(span.id(), Some(id));
            assert_eq!(span.range(), Some(range));
        };

        roundtrip(0..0);
        roundtrip(177..233);
        roundtrip(0..8388607);
        roundtrip(8388606..8388607);
    }
}