Skip to main content

maya_mel/
syntax.rs

1#![forbid(unsafe_code)]
2//! Shared spans, tokens, and source mapping primitives.
3//!
4//! These types are useful when consuming diagnostics or mapping byte ranges
5//! back to display text.
6
7use std::ops::Range;
8use std::sync::Arc;
9
10pub use text_size::{TextRange, TextSize};
11
12#[must_use]
13pub const fn text_size(value: u32) -> TextSize {
14    TextSize::new(value)
15}
16
17#[must_use]
18pub const fn text_range(start: u32, end: u32) -> TextRange {
19    TextRange::new(text_size(start), text_size(end))
20}
21
22#[must_use]
23pub fn range_start(range: TextRange) -> u32 {
24    range.start().into()
25}
26
27#[must_use]
28pub fn range_end(range: TextRange) -> u32 {
29    range.end().into()
30}
31
32#[must_use]
33pub fn range_len(range: TextRange) -> u32 {
34    range.len().into()
35}
36
37#[must_use]
38pub fn text_slice(text: &str, range: TextRange) -> &str {
39    &text[range_start(range) as usize..range_end(range) as usize]
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub struct SourceMapEdit {
44    source_start: u32,
45    source_end: u32,
46    display_start: u32,
47    display_end: u32,
48}
49
50impl SourceMapEdit {
51    #[must_use]
52    pub const fn new(
53        source_start: u32,
54        source_end: u32,
55        display_start: u32,
56        display_end: u32,
57    ) -> Self {
58        Self {
59            source_start,
60            source_end,
61            display_start,
62            display_end,
63        }
64    }
65
66    #[must_use]
67    pub const fn source_start(self) -> u32 {
68        self.source_start
69    }
70
71    #[must_use]
72    pub const fn source_end(self) -> u32 {
73        self.source_end
74    }
75
76    #[must_use]
77    pub const fn display_start(self) -> u32 {
78        self.display_start
79    }
80
81    #[must_use]
82    pub const fn display_end(self) -> u32 {
83        self.display_end
84    }
85
86    #[must_use]
87    pub const fn delta_after(self) -> i64 {
88        self.display_end as i64 - self.source_end as i64
89    }
90}
91
92#[derive(Debug, Clone, PartialEq, Eq)]
93enum SourceMapKind {
94    Identity {
95        len: usize,
96    },
97    Indexed {
98        source_to_display: Arc<[u32]>,
99    },
100    Sparse {
101        source_len: usize,
102        display_len: usize,
103        edits: Arc<[SourceMapEdit]>,
104    },
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct SourceMap {
109    kind: SourceMapKind,
110}
111
112impl SourceMap {
113    #[must_use]
114    pub fn identity(len: usize) -> Self {
115        Self {
116            kind: SourceMapKind::Identity { len },
117        }
118    }
119
120    #[must_use]
121    pub fn from_source_to_display(source_to_display: Vec<u32>) -> Self {
122        Self::from_shared_source_to_display(source_to_display.into())
123    }
124
125    #[must_use]
126    pub fn from_shared_source_to_display(source_to_display: Arc<[u32]>) -> Self {
127        if source_to_display
128            .iter()
129            .enumerate()
130            .all(|(offset, mapped)| *mapped == u32::try_from(offset).unwrap_or(u32::MAX))
131        {
132            return Self::identity(source_to_display.len().saturating_sub(1));
133        }
134        Self {
135            kind: SourceMapKind::Indexed { source_to_display },
136        }
137    }
138
139    #[must_use]
140    pub fn from_sparse_edits(
141        source_len: usize,
142        display_len: usize,
143        edits: Arc<[SourceMapEdit]>,
144    ) -> Self {
145        if source_len == display_len && edits.is_empty() {
146            return Self::identity(source_len);
147        }
148        Self {
149            kind: SourceMapKind::Sparse {
150                source_len,
151                display_len,
152                edits,
153            },
154        }
155    }
156
157    #[must_use]
158    pub fn display_offset(&self, offset: u32) -> usize {
159        match &self.kind {
160            SourceMapKind::Identity { len } => usize::try_from(offset).unwrap_or(*len).min(*len),
161            SourceMapKind::Indexed { source_to_display } => source_to_display
162                .get(offset as usize)
163                .copied()
164                .or_else(|| source_to_display.last().copied())
165                .unwrap_or(offset)
166                as usize,
167            SourceMapKind::Sparse {
168                source_len,
169                display_len,
170                edits,
171            } => sparse_source_to_display(*source_len, *display_len, edits, offset),
172        }
173    }
174
175    #[must_use]
176    pub fn display_range(&self, range: TextRange) -> Range<usize> {
177        self.display_offset(range_start(range))..self.display_offset(range_end(range))
178    }
179
180    #[must_use]
181    pub fn source_offset_for_display(&self, display_offset: usize) -> u32 {
182        match &self.kind {
183            SourceMapKind::Identity { len } => {
184                u32::try_from(display_offset.min(*len)).unwrap_or(u32::MAX)
185            }
186            SourceMapKind::Indexed { source_to_display } => {
187                match source_to_display
188                    .binary_search_by(|mapped| mapped.cmp(&(display_offset as u32)))
189                {
190                    Ok(mut index) => {
191                        while index + 1 < source_to_display.len()
192                            && source_to_display[index + 1] <= display_offset as u32
193                        {
194                            index += 1;
195                        }
196                        u32::try_from(index).unwrap_or(u32::MAX)
197                    }
198                    Err(0) => 0,
199                    Err(index) => u32::try_from(index - 1).unwrap_or(u32::MAX),
200                }
201            }
202            SourceMapKind::Sparse {
203                source_len,
204                display_len,
205                edits,
206            } => sparse_display_to_source(*source_len, *display_len, edits, display_offset),
207        }
208    }
209
210    #[must_use]
211    pub fn source_range_from_display_range(&self, range: Range<usize>) -> TextRange {
212        text_range(
213            self.source_offset_for_display(range.start),
214            self.source_offset_for_display(range.end),
215        )
216    }
217}
218
219fn sparse_source_to_display(
220    source_len: usize,
221    display_len: usize,
222    edits: &[SourceMapEdit],
223    offset: u32,
224) -> usize {
225    let clamped = usize::try_from(offset)
226        .unwrap_or(source_len)
227        .min(source_len) as u32;
228    let Some(index) = edits
229        .partition_point(|edit| edit.source_start() <= clamped)
230        .checked_sub(1)
231    else {
232        return clamped as usize;
233    };
234    let edit = edits[index];
235    if clamped == edit.source_start() {
236        return edit.display_start() as usize;
237    }
238    if clamped <= edit.source_end() {
239        return edit.display_end() as usize;
240    }
241    let mapped = (clamped as i64 + edit.delta_after()).clamp(0, display_len as i64);
242    mapped as usize
243}
244
245fn sparse_display_to_source(
246    source_len: usize,
247    display_len: usize,
248    edits: &[SourceMapEdit],
249    offset: usize,
250) -> u32 {
251    let clamped = offset.min(display_len) as u32;
252    let Some(index) = edits
253        .partition_point(|edit| edit.display_start() <= clamped)
254        .checked_sub(1)
255    else {
256        return clamped;
257    };
258    let edit = edits[index];
259    if clamped == edit.display_start() {
260        return edit.source_start();
261    }
262    if clamped <= edit.display_end() {
263        return edit.source_end();
264    }
265    let mapped = (clamped as i64 - edit.delta_after()).clamp(0, source_len as i64);
266    mapped as u32
267}
268
269#[derive(Debug, Clone, Copy)]
270pub struct SourceView<'a> {
271    text: &'a str,
272    source_map: &'a SourceMap,
273}
274
275impl<'a> SourceView<'a> {
276    #[must_use]
277    pub fn new(text: &'a str, source_map: &'a SourceMap) -> Self {
278        Self { text, source_map }
279    }
280
281    #[must_use]
282    pub fn text(self) -> &'a str {
283        self.text
284    }
285
286    #[must_use]
287    pub fn source_map(self) -> &'a SourceMap {
288        self.source_map
289    }
290
291    #[must_use]
292    pub fn display_range(self, range: TextRange) -> Range<usize> {
293        self.source_map.display_range(range)
294    }
295
296    #[must_use]
297    pub fn display_slice(self, range: TextRange) -> &'a str {
298        &self.text[self.display_range(range)]
299    }
300
301    #[must_use]
302    pub fn slice(self, range: TextRange) -> &'a str {
303        self.display_slice(range)
304    }
305
306    #[must_use]
307    pub fn source_range_from_display_range(self, range: Range<usize>) -> TextRange {
308        self.source_map.source_range_from_display_range(range)
309    }
310}
311
312#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
313pub enum TokenKind {
314    Whitespace,
315    LineComment,
316    BlockComment,
317    Ident,
318    IntLiteral,
319    FloatLiteral,
320    StringLiteral,
321    Flag,
322    Dollar,
323    Backquote,
324    LParen,
325    RParen,
326    LBracket,
327    RBracket,
328    LBrace,
329    RBrace,
330    Dot,
331    Pipe,
332    Comma,
333    Semi,
334    Assign,
335    PlusEq,
336    MinusEq,
337    StarEq,
338    SlashEq,
339    Plus,
340    PlusPlus,
341    Minus,
342    MinusMinus,
343    Star,
344    Slash,
345    Percent,
346    Caret,
347    Question,
348    Colon,
349    EqEq,
350    NotEq,
351    LtLt,
352    Lt,
353    Le,
354    GtGt,
355    Gt,
356    Ge,
357    AndAnd,
358    OrOr,
359    Bang,
360    Unknown,
361    Eof,
362}
363
364#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
365pub struct Token {
366    pub kind: TokenKind,
367    pub range: TextRange,
368}
369
370impl Token {
371    #[must_use]
372    pub const fn new(kind: TokenKind, range: TextRange) -> Self {
373        Self { kind, range }
374    }
375}
376
377impl TokenKind {
378    #[must_use]
379    pub const fn is_trivia(self) -> bool {
380        matches!(
381            self,
382            Self::Whitespace | Self::LineComment | Self::BlockComment
383        )
384    }
385}
386
387#[derive(Debug, Clone, PartialEq, Eq)]
388pub struct LexDiagnostic {
389    pub message: &'static str,
390    pub range: TextRange,
391}
392
393impl LexDiagnostic {
394    #[must_use]
395    pub const fn new(message: &'static str, range: TextRange) -> Self {
396        Self { message, range }
397    }
398}
399
400#[derive(Debug, Clone, PartialEq, Eq, Default)]
401pub struct Lexed {
402    pub tokens: Vec<Token>,
403    pub diagnostics: Vec<LexDiagnostic>,
404}
405
406#[cfg(test)]
407mod tests {
408    use super::{LexDiagnostic, SourceMap, SourceMapEdit, Token, TokenKind, range_len, text_range};
409
410    #[test]
411    fn text_range_helpers_keep_offsets() {
412        let range = text_range(10, 15);
413        assert_eq!(range_len(range), 5);
414        assert!(!range.is_empty());
415    }
416
417    #[test]
418    fn token_constructor_keeps_fields() {
419        let token = Token::new(TokenKind::Semi, text_range(1, 2));
420        assert_eq!(token.kind, TokenKind::Semi);
421        assert_eq!(token.range, text_range(1, 2));
422    }
423
424    #[test]
425    fn lex_diagnostic_constructor_keeps_fields() {
426        let diagnostic = LexDiagnostic::new("bad token", text_range(2, 4));
427        assert_eq!(diagnostic.message, "bad token");
428        assert_eq!(diagnostic.range, text_range(2, 4));
429    }
430
431    #[test]
432    fn trivia_kinds_are_marked_as_trivia() {
433        assert!(TokenKind::Whitespace.is_trivia());
434        assert!(TokenKind::LineComment.is_trivia());
435        assert!(TokenKind::BlockComment.is_trivia());
436        assert!(!TokenKind::Ident.is_trivia());
437    }
438
439    #[test]
440    fn source_map_can_map_display_offsets_back_to_source_offsets() {
441        let map = SourceMap::from_source_to_display(vec![0, 3, 3, 4]);
442        assert_eq!(map.source_offset_for_display(0), 0);
443        assert_eq!(map.source_offset_for_display(3), 2);
444        assert_eq!(map.source_offset_for_display(4), 3);
445        assert_eq!(map.source_range_from_display_range(0..3), text_range(0, 2));
446    }
447
448    #[test]
449    fn identity_source_map_avoids_index_materialization() {
450        let map = SourceMap::identity(8);
451        assert_eq!(map.display_offset(3), 3);
452        assert_eq!(map.display_offset(99), 8);
453        assert_eq!(map.source_offset_for_display(5), 5);
454        assert_eq!(map.source_offset_for_display(99), 8);
455        assert_eq!(map.source_range_from_display_range(2..6), text_range(2, 6));
456    }
457
458    #[test]
459    fn sparse_source_map_handles_positive_delta() {
460        let map = SourceMap::from_sparse_edits(4, 5, vec![SourceMapEdit::new(1, 2, 1, 3)].into());
461        assert_eq!(map.display_offset(0), 0);
462        assert_eq!(map.display_offset(1), 1);
463        assert_eq!(map.display_offset(2), 3);
464        assert_eq!(map.display_offset(4), 5);
465        assert_eq!(map.source_offset_for_display(0), 0);
466        assert_eq!(map.source_offset_for_display(1), 1);
467        assert_eq!(map.source_offset_for_display(2), 2);
468        assert_eq!(map.source_offset_for_display(3), 2);
469        assert_eq!(map.source_offset_for_display(5), 4);
470    }
471
472    #[test]
473    fn sparse_source_map_handles_negative_delta() {
474        let map = SourceMap::from_sparse_edits(6, 4, vec![SourceMapEdit::new(1, 5, 1, 3)].into());
475        assert_eq!(map.display_offset(0), 0);
476        assert_eq!(map.display_offset(1), 1);
477        assert_eq!(map.display_offset(2), 3);
478        assert_eq!(map.display_offset(5), 3);
479        assert_eq!(map.display_offset(6), 4);
480        assert_eq!(map.source_offset_for_display(0), 0);
481        assert_eq!(map.source_offset_for_display(1), 1);
482        assert_eq!(map.source_offset_for_display(2), 5);
483        assert_eq!(map.source_offset_for_display(3), 5);
484        assert_eq!(map.source_offset_for_display(4), 6);
485    }
486}