biome_parser/
lib.rs

1#![deny(rustdoc::broken_intra_doc_links)]
2#![doc = include_str!("../CONTRIBUTING.md")]
3
4use crate::diagnostic::{expected_token, ParseDiagnostic, ToDiagnostic};
5use crate::event::Event;
6use crate::event::Event::Token;
7use crate::token_source::{BumpWithContext, NthToken, TokenSource, TokenSourceWithBufferedLexer};
8use biome_console::fmt::Display;
9use biome_diagnostics::location::AsSpan;
10use biome_rowan::{AstNode, Language, SendNode, SyntaxKind, SyntaxNode, TextRange, TextSize};
11use std::any::type_name;
12
13pub mod diagnostic;
14pub mod event;
15pub mod lexer;
16mod marker;
17pub mod parse_lists;
18pub mod parse_recovery;
19pub mod parsed_syntax;
20pub mod prelude;
21pub mod token_set;
22pub mod token_source;
23pub mod tree_sink;
24
25use crate::lexer::LexerWithCheckpoint;
26use crate::parsed_syntax::ParsedSyntax;
27use crate::parsed_syntax::ParsedSyntax::{Absent, Present};
28use biome_diagnostics::serde::Diagnostic;
29pub use marker::{CompletedMarker, Marker};
30pub use token_set::TokenSet;
31
32pub struct ParserContext<K: SyntaxKind> {
33    events: Vec<Event<K>>,
34    skipping: bool,
35    diagnostics: Vec<ParseDiagnostic>,
36}
37
38impl<K: SyntaxKind> Default for ParserContext<K> {
39    fn default() -> Self {
40        Self::new()
41    }
42}
43
44impl<K: SyntaxKind> ParserContext<K> {
45    pub fn new() -> Self {
46        Self {
47            skipping: false,
48            events: Vec::new(),
49            diagnostics: Vec::new(),
50        }
51    }
52
53    /// Returns the slice with the parse events
54    pub fn events(&self) -> &[Event<K>] {
55        &self.events
56    }
57
58    /// Returns a slice with the parse diagnostics
59    pub fn diagnostics(&self) -> &[ParseDiagnostic] {
60        &self.diagnostics
61    }
62
63    /// Drops all diagnostics after `at`.
64    pub fn truncate_diagnostics(&mut self, at: usize) {
65        self.diagnostics.truncate(at);
66    }
67
68    /// Pushes a new token event
69    pub fn push_token(&mut self, kind: K, end: TextSize) {
70        self.push_event(Token { kind, end });
71    }
72
73    /// Pushes a parse event
74    pub fn push_event(&mut self, event: Event<K>) {
75        self.events.push(event)
76    }
77
78    /// Returns `true` if the parser is skipping a token as skipped token trivia.
79    pub fn is_skipping(&self) -> bool {
80        self.skipping
81    }
82
83    /// Splits the events into two at the given `position`. Returns a newly allocated vector containing the
84    /// elements from `[position;len]`.
85    ///
86    /// ## Safety
87    /// The method is marked as `unsafe` to discourage its usage. Removing events can lead to
88    /// corrupted events if not done carefully.
89    pub unsafe fn split_off_events(&mut self, position: usize) -> Vec<Event<K>> {
90        self.events.split_off(position)
91    }
92
93    /// Get the current index of the last event
94    fn cur_event_pos(&self) -> usize {
95        self.events.len().saturating_sub(1)
96    }
97
98    /// Remove `amount` events from the parser
99    fn drain_events(&mut self, amount: usize) {
100        self.events.truncate(self.events.len() - amount);
101    }
102
103    /// Rewind the parser back to a previous position in time
104    pub fn rewind(&mut self, checkpoint: ParserContextCheckpoint) {
105        let ParserContextCheckpoint {
106            event_pos,
107            errors_pos,
108        } = checkpoint;
109        self.drain_events(self.cur_event_pos() - event_pos);
110        self.diagnostics.truncate(errors_pos as usize);
111    }
112
113    /// Get a checkpoint representing the progress of the parser at this point of time
114    #[must_use]
115    pub fn checkpoint(&self) -> ParserContextCheckpoint {
116        ParserContextCheckpoint {
117            event_pos: self.cur_event_pos(),
118            errors_pos: self.diagnostics.len() as u32,
119        }
120    }
121
122    pub fn finish(self) -> (Vec<Event<K>>, Vec<ParseDiagnostic>) {
123        (self.events, self.diagnostics)
124    }
125}
126
127/// A structure signifying the Parser progress at one point in time
128#[derive(Debug)]
129pub struct ParserContextCheckpoint {
130    event_pos: usize,
131    /// The length of the errors list at the time the checkpoint was created.
132    /// Safety: The parser only supports files <= 4Gb. Storing a `u32` is sufficient to store one error
133    /// for each single character in the file, which should be sufficient for any realistic file.
134    errors_pos: u32,
135}
136
137impl ParserContextCheckpoint {
138    pub fn event_position(&self) -> usize {
139        self.event_pos
140    }
141}
142
143pub trait Parser: Sized {
144    type Kind: SyntaxKind;
145    type Source: TokenSource<Kind = Self::Kind>;
146
147    /// Returns a reference to the [ParserContext].
148    fn context(&self) -> &ParserContext<Self::Kind>;
149
150    /// Returns a mutable reference to the [ParserContext].
151    fn context_mut(&mut self) -> &mut ParserContext<Self::Kind>;
152
153    /// Returns a reference to the [`TokenSource``](TokenSource]
154    fn source(&self) -> &Self::Source;
155
156    /// Returns a mutable reference to the [TokenSource].
157    fn source_mut(&mut self) -> &mut Self::Source;
158
159    /// Returns `true` if the parser is trying to parse some syntax but only if it has no errors.
160    ///
161    /// Returning `true` disables more involved error recovery.
162    fn is_speculative_parsing(&self) -> bool {
163        false
164    }
165
166    /// Gets the source text of a range
167    ///
168    /// # Panics
169    ///
170    /// If the range is out of bounds
171    fn text(&self, span: TextRange) -> &str {
172        &self.source().text()[span]
173    }
174
175    /// Gets the current token kind of the parser
176    fn cur(&self) -> Self::Kind {
177        self.source().current()
178    }
179
180    /// Gets the range of the current token
181    fn cur_range(&self) -> TextRange {
182        self.source().current_range()
183    }
184
185    /// Tests if there's a line break before the current token (between the last and current)
186    fn has_preceding_line_break(&self) -> bool {
187        self.source().has_preceding_line_break()
188    }
189
190    /// Get the source code of the parser's current token.
191    fn cur_text(&self) -> &str {
192        &self.source().text()[self.cur_range()]
193    }
194
195    /// Checks if the parser is currently at a specific token
196    fn at(&self, kind: Self::Kind) -> bool {
197        self.cur() == kind
198    }
199
200    /// Check if the parser's current token is contained in a token set
201    fn at_ts(&self, kinds: TokenSet<Self::Kind>) -> bool {
202        kinds.contains(self.cur())
203    }
204
205    /// Look ahead at a token and get its kind.
206    fn nth<'l, Lex>(&mut self, n: usize) -> Self::Kind
207    where
208        Lex: LexerWithCheckpoint<'l, Kind = Self::Kind>,
209        Self::Source: NthToken<Lex> + TokenSourceWithBufferedLexer<Lex>,
210    {
211        self.source_mut().nth(n)
212    }
213
214    /// Checks if a token lookahead is something
215    fn nth_at<'l, Lex>(&mut self, n: usize, kind: Self::Kind) -> bool
216    where
217        Lex: LexerWithCheckpoint<'l, Kind = Self::Kind>,
218        Self::Source: NthToken<Lex> + TokenSourceWithBufferedLexer<Lex>,
219    {
220        self.nth(n) == kind
221    }
222
223    /// Checks if a token set lookahead is something
224    fn nth_at_ts<'l, Lex>(&mut self, n: usize, kinds: TokenSet<Self::Kind>) -> bool
225    where
226        Lex: LexerWithCheckpoint<'l, Kind = Self::Kind>,
227        Self::Source: NthToken<Lex> + TokenSourceWithBufferedLexer<Lex>,
228    {
229        kinds.contains(self.nth(n))
230    }
231
232    /// Tests if there's a line break before the nth token.
233    #[inline]
234    fn has_nth_preceding_line_break<'l, Lex>(&mut self, n: usize) -> bool
235    where
236        Lex: LexerWithCheckpoint<'l, Kind = Self::Kind>,
237        Self::Source: NthToken<Lex> + TokenSourceWithBufferedLexer<Lex>,
238    {
239        self.source_mut().has_nth_preceding_line_break(n)
240    }
241
242    /// Consume the current token if `kind` matches.
243    fn bump(&mut self, kind: Self::Kind) {
244        assert_eq!(
245            kind,
246            self.cur(),
247            "expected {:?} but at {:?}",
248            kind,
249            self.cur()
250        );
251
252        self.do_bump(kind)
253    }
254
255    /// Consume the current token if token set matches.
256    fn bump_ts(&mut self, kinds: TokenSet<Self::Kind>) {
257        assert!(
258            kinds.contains(self.cur()),
259            "expected {:?} but at {:?}",
260            kinds,
261            self.cur()
262        );
263
264        self.bump_any()
265    }
266
267    /// Consume any token but cast it as a different kind using the specified `context.
268    fn bump_remap_with_context(
269        &mut self,
270        kind: Self::Kind,
271        context: <Self::Source as BumpWithContext>::Context,
272    ) where
273        Self::Source: BumpWithContext,
274    {
275        self.do_bump_with_context(kind, context);
276    }
277
278    /// Consume any token but cast it as a different kind
279    fn bump_remap(&mut self, kind: Self::Kind) {
280        self.do_bump(kind);
281    }
282
283    /// Bumps the current token regardless of its kind and advances to the next token.
284    fn bump_any(&mut self) {
285        let kind = self.cur();
286        assert_ne!(kind, Self::Kind::EOF);
287
288        self.do_bump(kind);
289    }
290
291    /// Consumes the current token if `kind` matches and lexes the next token using the
292    /// specified `context.
293    fn bump_with_context(
294        &mut self,
295        kind: Self::Kind,
296        context: <Self::Source as BumpWithContext>::Context,
297    ) where
298        Self::Source: BumpWithContext,
299    {
300        assert_eq!(
301            kind,
302            self.cur(),
303            "expected {:?} but at {:?}",
304            kind,
305            self.cur()
306        );
307
308        self.do_bump_with_context(kind, context);
309    }
310
311    #[doc(hidden)]
312    fn do_bump_with_context(
313        &mut self,
314        kind: Self::Kind,
315        context: <Self::Source as BumpWithContext>::Context,
316    ) where
317        Self::Source: BumpWithContext,
318    {
319        let end = self.cur_range().end();
320        self.context_mut().push_token(kind, end);
321
322        if self.context().skipping {
323            self.source_mut().skip_as_trivia_with_context(context);
324        } else {
325            self.source_mut().bump_with_context(context);
326        }
327    }
328
329    #[doc(hidden)]
330    fn do_bump(&mut self, kind: Self::Kind) {
331        let end = self.cur_range().end();
332        self.context_mut().push_token(kind, end);
333
334        if self.context().skipping {
335            self.source_mut().skip_as_trivia();
336        } else {
337            self.source_mut().bump();
338        }
339    }
340
341    /// Consume the next token if `kind` matches using the specified `context.
342    fn eat_with_context(
343        &mut self,
344        kind: Self::Kind,
345        context: <Self::Source as BumpWithContext>::Context,
346    ) -> bool
347    where
348        Self::Source: BumpWithContext,
349    {
350        if !self.at(kind) {
351            return false;
352        }
353
354        self.do_bump_with_context(kind, context);
355
356        true
357    }
358
359    /// Consume the next token if `kind` matches.
360    fn eat(&mut self, kind: Self::Kind) -> bool {
361        if !self.at(kind) {
362            return false;
363        }
364
365        self.do_bump(kind);
366
367        true
368    }
369
370    /// Consume the next token if token set matches.
371    fn eat_ts(&mut self, kinds: TokenSet<Self::Kind>) -> bool {
372        if !self.at_ts(kinds) {
373            return false;
374        }
375
376        self.do_bump(self.cur());
377
378        true
379    }
380
381    /// Consume the next token if token set matches using the specified `context.
382    fn eat_ts_with_context(
383        &mut self,
384        kinds: TokenSet<Self::Kind>,
385        context: <Self::Source as BumpWithContext>::Context,
386    ) -> bool
387    where
388        Self::Source: BumpWithContext,
389    {
390        if !self.at_ts(kinds) {
391            return false;
392        }
393
394        self.do_bump_with_context(self.cur(), context);
395
396        true
397    }
398
399    /// Try to eat a specific token kind, if the kind is not there then adds an error to the events stack
400    /// using the specified `context.
401    fn expect_with_context(
402        &mut self,
403        kind: Self::Kind,
404        context: <Self::Source as BumpWithContext>::Context,
405    ) -> bool
406    where
407        Self::Source: BumpWithContext,
408    {
409        if self.eat_with_context(kind, context) {
410            true
411        } else {
412            self.error(expected_token(kind));
413            false
414        }
415    }
416
417    /// Try to eat a specific token kind, if the kind is not there then adds an error to the events stack.
418    fn expect(&mut self, kind: Self::Kind) -> bool {
419        if self.eat(kind) {
420            true
421        } else {
422            self.error(expected_token(kind));
423            false
424        }
425    }
426
427    /// Allows parsing an unsupported syntax as skipped trivia tokens.
428    fn parse_as_skipped_trivia_tokens<P>(&mut self, parse: P)
429    where
430        P: FnOnce(&mut Self),
431    {
432        let events_pos = self.context().events.len();
433        self.context_mut().skipping = true;
434        parse(self);
435        self.context_mut().skipping = false;
436
437        // Truncate any start/finish events
438        self.context_mut().events.truncate(events_pos);
439    }
440
441    /// Add a diagnostic
442    fn error(&mut self, err: impl ToDiagnostic<Self>) {
443        let err = err.into_diagnostic(self);
444
445        // Don't report another diagnostic if the last diagnostic is at the same position of the current one
446        if let Some(previous) = self.context().diagnostics.last() {
447            match (&err.diagnostic_range(), &previous.diagnostic_range()) {
448                (Some(err_range), Some(previous_range))
449                    if err_range.start() == previous_range.start() =>
450                {
451                    return;
452                }
453                _ => {}
454            }
455        }
456        self.context_mut().diagnostics.push(err)
457    }
458
459    /// Creates a new diagnostic. Pass the message and the range where the error occurred
460    #[must_use]
461    fn err_builder(&self, message: impl Display, span: impl AsSpan) -> ParseDiagnostic {
462        ParseDiagnostic::new(message, span)
463    }
464
465    /// Bump and add an error event
466    fn err_and_bump(&mut self, err: impl ToDiagnostic<Self>, unknown_syntax_kind: Self::Kind) {
467        let m = self.start();
468        self.bump_any();
469        m.complete(self, unknown_syntax_kind);
470        self.error(err);
471    }
472
473    /// Returns the kind of the last bumped token.
474    fn last(&self) -> Option<Self::Kind> {
475        self.context()
476            .events
477            .iter()
478            .rev()
479            .find_map(|event| match event {
480                Token { kind, .. } => Some(*kind),
481                _ => None,
482            })
483    }
484
485    /// Returns the end offset of the last bumped token.
486    fn last_end(&self) -> Option<TextSize> {
487        self.context()
488            .events
489            .iter()
490            .rev()
491            .find_map(|event| match event {
492                Token { end, .. } => Some(*end),
493                _ => None,
494            })
495    }
496
497    /// Starts a new node in the syntax tree. All nodes and tokens
498    /// consumed between the `start` and the corresponding `Marker::complete`
499    /// belong to the same node.
500    fn start(&mut self) -> Marker {
501        let pos = self.context().events.len() as u32;
502        let start = self.source().position();
503        self.context_mut().push_event(Event::tombstone());
504        Marker::new(pos, start)
505    }
506}
507
508/// Captures the progress of the parser and allows to test if the parsing is still making progress
509#[derive(Debug, Eq, Ord, PartialOrd, PartialEq, Hash, Default)]
510pub struct ParserProgress(Option<TextSize>);
511
512impl ParserProgress {
513    /// Returns true if the current parser position is passed this position
514    #[inline]
515    pub fn has_progressed<P>(&self, p: &P) -> bool
516    where
517        P: Parser,
518    {
519        match self.0 {
520            None => true,
521            Some(pos) => pos < p.source().position(),
522        }
523    }
524
525    /// Asserts that the parsing is still making progress.
526    ///
527    /// # Panics
528    ///
529    /// Panics if the parser is still at this position
530    #[inline]
531    pub fn assert_progressing<P>(&mut self, p: &P)
532    where
533        P: Parser,
534    {
535        assert!(
536            self.has_progressed(p),
537            "The parser is no longer progressing. Stuck at '{}' {:?}:{:?}",
538            p.cur_text(),
539            p.cur(),
540            p.cur_range(),
541        );
542
543        self.0 = Some(p.source().position());
544    }
545}
546
547/// A syntax feature that may or may not be supported depending on the file type and parser configuration
548pub trait SyntaxFeature: Sized {
549    type Parser<'source>: Parser;
550
551    /// Returns `true` if the current parsing context supports this syntax feature.
552    fn is_supported(&self, p: &Self::Parser<'_>) -> bool;
553
554    /// Returns `true` if the current parsing context doesn't support this syntax feature.
555    fn is_unsupported(&self, p: &Self::Parser<'_>) -> bool {
556        !self.is_supported(p)
557    }
558
559    /// Adds a diagnostic and changes the kind of the node to [SyntaxKind::to_bogus] if this feature isn't
560    /// supported.
561    ///
562    /// Returns the parsed syntax.
563    fn exclusive_syntax<'source, S, E, D>(
564        &self,
565        p: &mut Self::Parser<'source>,
566        syntax: S,
567        error_builder: E,
568    ) -> ParsedSyntax
569    where
570        S: Into<ParsedSyntax>,
571        E: FnOnce(&Self::Parser<'source>, &CompletedMarker) -> D,
572        D: ToDiagnostic<Self::Parser<'source>>,
573    {
574        syntax.into().map(|mut syntax| {
575            if self.is_unsupported(p) {
576                let error = error_builder(p, &syntax);
577                p.error(error);
578                syntax.change_to_bogus(p);
579                syntax
580            } else {
581                syntax
582            }
583        })
584    }
585
586    /// Parses a syntax and adds a diagnostic and changes the kind of the node to [SyntaxKind::to_bogus] if this feature isn't
587    /// supported.
588    ///
589    /// Returns the parsed syntax.
590    fn parse_exclusive_syntax<'source, P, E>(
591        &self,
592        p: &mut Self::Parser<'source>,
593        parse: P,
594        error_builder: E,
595    ) -> ParsedSyntax
596    where
597        P: FnOnce(&mut Self::Parser<'source>) -> ParsedSyntax,
598        E: FnOnce(&Self::Parser<'source>, &CompletedMarker) -> ParseDiagnostic,
599    {
600        if self.is_supported(p) {
601            parse(p)
602        } else {
603            let diagnostics_checkpoint = p.context().diagnostics().len();
604            let syntax = parse(p);
605            p.context_mut().truncate_diagnostics(diagnostics_checkpoint);
606
607            match syntax {
608                Present(mut syntax) => {
609                    let diagnostic = error_builder(p, &syntax);
610                    p.error(diagnostic);
611                    syntax.change_to_bogus(p);
612                    Present(syntax)
613                }
614                _ => Absent,
615            }
616        }
617    }
618
619    /// Adds a diagnostic and changes the kind of the node to [SyntaxKind::to_bogus] if this feature is
620    /// supported.
621    ///
622    /// Returns the parsed syntax.
623    fn excluding_syntax<'source, S, E>(
624        &self,
625        p: &mut Self::Parser<'source>,
626        syntax: S,
627        error_builder: E,
628    ) -> ParsedSyntax
629    where
630        S: Into<ParsedSyntax>,
631        E: FnOnce(&Self::Parser<'source>, &CompletedMarker) -> ParseDiagnostic,
632    {
633        syntax.into().map(|mut syntax| {
634            if self.is_unsupported(p) {
635                syntax
636            } else {
637                let error = error_builder(p, &syntax);
638                p.error(error);
639                syntax.change_to_bogus(p);
640                syntax
641            }
642        })
643    }
644}
645
646/// Language-independent cache entry for a parsed file
647///
648/// This struct holds a handle to the root node of the parsed syntax tree,
649/// along with the list of diagnostics emitted by the parser while generating
650/// this entry.
651///
652/// It can be dynamically downcast into a concrete [SyntaxNode] or [AstNode] of
653/// the corresponding language, generally through a language-specific capability
654#[derive(Clone, Debug)]
655pub struct AnyParse {
656    pub(crate) root: SendNode,
657    pub(crate) diagnostics: Vec<ParseDiagnostic>,
658}
659
660impl AnyParse {
661    pub fn new(root: SendNode, diagnostics: Vec<ParseDiagnostic>) -> AnyParse {
662        AnyParse { root, diagnostics }
663    }
664
665    pub fn syntax<L>(&self) -> SyntaxNode<L>
666    where
667        L: Language + 'static,
668    {
669        self.root.clone().into_node().unwrap_or_else(|| {
670            panic!(
671                "could not downcast root node to language {}",
672                type_name::<L>()
673            )
674        })
675    }
676
677    pub fn tree<N>(&self) -> N
678    where
679        N: AstNode,
680        N::Language: 'static,
681    {
682        N::unwrap_cast(self.syntax::<N::Language>())
683    }
684
685    /// This function transforms diagnostics coming from the parser into serializable diagnostics
686    pub fn into_diagnostics(self) -> Vec<Diagnostic> {
687        self.diagnostics.into_iter().map(Diagnostic::new).collect()
688    }
689
690    /// Get the diagnostics which occurred when parsing
691    pub fn diagnostics(&self) -> &[ParseDiagnostic] {
692        &self.diagnostics
693    }
694
695    pub fn has_errors(&self) -> bool {
696        self.diagnostics.iter().any(|diag| diag.is_error())
697    }
698}