solar_parse/parser/
mod.rs

1use crate::{Lexer, PErr, PResult};
2use smallvec::SmallVec;
3use solar_ast::{
4    self as ast, AstPath, Box, BoxSlice, DocComment, DocComments,
5    token::{Delimiter, Token, TokenKind},
6};
7use solar_data_structures::{BumpExt, fmt::or_list};
8use solar_interface::{
9    Ident, Result, Session, Span, Symbol,
10    diagnostics::DiagCtxt,
11    source_map::{FileName, SourceFile},
12};
13use std::{fmt, path::Path};
14
15mod expr;
16mod item;
17mod lit;
18mod stmt;
19mod ty;
20mod yul;
21
22/// Maximum allowed recursive descent depth for selected parser entry points.
23const PARSER_RECURSION_LIMIT: usize = 128;
24
25/// Solidity and Yul parser.
26///
27/// # Examples
28///
29/// ```
30/// # mod solar { pub use {solar_ast as ast, solar_interface as interface, solar_parse as parse}; }
31/// # fn main() {}
32#[doc = include_str!("../../doc-examples/parser.rs")]
33/// ```
34pub struct Parser<'sess, 'ast> {
35    /// The parser session.
36    pub sess: &'sess Session,
37    /// The arena where the AST nodes are allocated.
38    pub arena: &'ast ast::Arena,
39
40    /// The current token.
41    pub token: Token,
42    /// The previous token.
43    pub prev_token: Token,
44    /// List of expected tokens. Cleared after each `bump` call.
45    expected_tokens: Vec<ExpectedToken>,
46    /// The span of the last unexpected token.
47    last_unexpected_token_span: Option<Span>,
48    /// The current doc-comments.
49    docs: Vec<DocComment>,
50
51    /// The token stream.
52    tokens: std::vec::IntoIter<Token>,
53
54    /// Whether the parser is in Yul mode.
55    ///
56    /// Currently, this can only happen when parsing a Yul "assembly" block.
57    in_yul: bool,
58    /// Whether the parser is currently parsing a contract block.
59    in_contract: bool,
60
61    /// Current recursion depth for recursive parsing operations.
62    recursion_depth: usize,
63}
64
65#[derive(Clone, Debug, PartialEq, Eq)]
66enum ExpectedToken {
67    Token(TokenKind),
68    Keyword(Symbol),
69    Lit,
70    StrLit,
71    Ident,
72    Path,
73    ElementaryType,
74}
75
76impl fmt::Display for ExpectedToken {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        f.write_str(match self {
79            Self::Token(t) => return write!(f, "`{t}`"),
80            Self::Keyword(kw) => return write!(f, "`{kw}`"),
81            Self::StrLit => "string literal",
82            Self::Lit => "literal",
83            Self::Ident => "identifier",
84            Self::Path => "path",
85            Self::ElementaryType => "elementary type name",
86        })
87    }
88}
89
90impl ExpectedToken {
91    fn to_string_many(tokens: &[Self]) -> String {
92        or_list(tokens).to_string()
93    }
94
95    fn eq_kind(&self, other: TokenKind) -> bool {
96        match *self {
97            Self::Token(kind) => kind == other,
98            _ => false,
99        }
100    }
101}
102
103/// A sequence separator.
104#[derive(Debug)]
105struct SeqSep {
106    /// The separator token.
107    sep: Option<TokenKind>,
108    /// `true` if a trailing separator is allowed.
109    trailing_sep_allowed: bool,
110    /// `true` if a trailing separator is required.
111    trailing_sep_required: bool,
112}
113
114impl SeqSep {
115    fn trailing_enforced(t: TokenKind) -> Self {
116        Self { sep: Some(t), trailing_sep_required: true, trailing_sep_allowed: true }
117    }
118
119    #[allow(dead_code)]
120    fn trailing_allowed(t: TokenKind) -> Self {
121        Self { sep: Some(t), trailing_sep_required: false, trailing_sep_allowed: true }
122    }
123
124    fn trailing_disallowed(t: TokenKind) -> Self {
125        Self { sep: Some(t), trailing_sep_required: false, trailing_sep_allowed: false }
126    }
127
128    fn none() -> Self {
129        Self { sep: None, trailing_sep_required: false, trailing_sep_allowed: false }
130    }
131}
132
133/// Indicates whether the parser took a recovery path and continued.
134#[derive(Copy, Clone, Debug, PartialEq, Eq)]
135pub enum Recovered {
136    No,
137    Yes,
138}
139
140impl<'sess, 'ast> Parser<'sess, 'ast> {
141    /// Creates a new parser.
142    pub fn new(sess: &'sess Session, arena: &'ast ast::Arena, tokens: Vec<Token>) -> Self {
143        assert!(sess.is_entered(), "session should be entered before parsing");
144        let mut parser = Self {
145            sess,
146            arena,
147            token: Token::DUMMY,
148            prev_token: Token::DUMMY,
149            expected_tokens: Vec::with_capacity(8),
150            last_unexpected_token_span: None,
151            docs: Vec::with_capacity(4),
152            tokens: tokens.into_iter(),
153            in_yul: false,
154            in_contract: false,
155            recursion_depth: 0,
156        };
157        parser.bump();
158        parser
159    }
160
161    /// Creates a new parser from a source code string.
162    pub fn from_source_code(
163        sess: &'sess Session,
164        arena: &'ast ast::Arena,
165        filename: FileName,
166        src: impl Into<String>,
167    ) -> Result<Self> {
168        Self::from_lazy_source_code(sess, arena, filename, || Ok(src.into()))
169    }
170
171    /// Creates a new parser from a file.
172    ///
173    /// The file will not be read if it has already been added into the source map.
174    pub fn from_file(sess: &'sess Session, arena: &'ast ast::Arena, path: &Path) -> Result<Self> {
175        Self::from_lazy_source_code(sess, arena, FileName::Real(path.to_path_buf()), || {
176            sess.source_map().file_loader().load_file(path)
177        })
178    }
179
180    /// Creates a new parser from a source code closure.
181    ///
182    /// The closure will not be called if the file name has already been added into the source map.
183    pub fn from_lazy_source_code(
184        sess: &'sess Session,
185        arena: &'ast ast::Arena,
186        filename: FileName,
187        get_src: impl FnOnce() -> std::io::Result<String>,
188    ) -> Result<Self> {
189        let file = sess
190            .source_map()
191            .new_source_file_with(filename, get_src)
192            .map_err(|e| sess.dcx.err(e.to_string()).emit())?;
193        Ok(Self::from_source_file(sess, arena, &file))
194    }
195
196    /// Creates a new parser from a source file.
197    ///
198    /// Note that the source file must be added to the source map before calling this function.
199    /// Prefer using [`from_source_code`](Self::from_source_code) or [`from_file`](Self::from_file)
200    /// instead.
201    pub fn from_source_file(
202        sess: &'sess Session,
203        arena: &'ast ast::Arena,
204        file: &SourceFile,
205    ) -> Self {
206        Self::from_lexer(arena, Lexer::from_source_file(sess, file))
207    }
208
209    /// Creates a new parser from a lexer.
210    pub fn from_lexer(arena: &'ast ast::Arena, lexer: Lexer<'sess, '_>) -> Self {
211        Self::new(lexer.sess, arena, lexer.into_tokens())
212    }
213
214    /// Returns the diagnostic context.
215    #[inline]
216    pub fn dcx(&self) -> &'sess DiagCtxt {
217        &self.sess.dcx
218    }
219
220    /// Allocates an object on the AST arena.
221    pub fn alloc<T>(&self, value: T) -> Box<'ast, T> {
222        self.arena.alloc(value)
223    }
224
225    /// Allocates a list of objects on the AST arena.
226    ///
227    /// # Panics
228    ///
229    /// Panics if the list is empty.
230    pub fn alloc_path(&self, segments: &[Ident]) -> AstPath<'ast> {
231        // SAFETY: `Ident` is `Copy`.
232        AstPath::new_in(self.arena.bump(), segments)
233    }
234
235    /// Allocates a list of objects on the AST arena.
236    pub fn alloc_vec<T>(&self, values: Vec<T>) -> BoxSlice<'ast, T> {
237        self.arena.alloc_vec_thin((), values)
238    }
239
240    /// Allocates a list of objects on the AST arena.
241    pub fn alloc_smallvec<A: smallvec::Array>(
242        &self,
243        values: SmallVec<A>,
244    ) -> BoxSlice<'ast, A::Item> {
245        self.arena.alloc_smallvec_thin((), values)
246    }
247
248    /// Returns an "unexpected token" error in a [`PResult`] for the current token.
249    #[inline]
250    #[track_caller]
251    pub fn unexpected<T>(&mut self) -> PResult<'sess, T> {
252        Err(self.unexpected_error())
253    }
254
255    /// Returns an "unexpected token" error for the current token.
256    #[cold]
257    #[track_caller]
258    pub fn unexpected_error(&mut self) -> PErr<'sess> {
259        match self.expected_one_of_not_found(&[], &[]) {
260            Ok(b) => unreachable!("`unexpected()` returned Ok({b:?})"),
261            Err(e) => e,
262        }
263    }
264
265    /// Expects and consumes the token `t`. Signals an error if the next token is not `t`.
266    #[inline]
267    #[track_caller]
268    pub fn expect(&mut self, tok: TokenKind) -> PResult<'sess, Recovered> {
269        if self.check_noexpect(tok) {
270            self.bump();
271            Ok(Recovered::No)
272        } else {
273            self.expected_one_of_not_found(std::slice::from_ref(&tok), &[])
274        }
275    }
276
277    /// Expect next token to be edible or inedible token. If edible,
278    /// then consume it; if inedible, then return without consuming
279    /// anything. Signal a fatal error if next token is unexpected.
280    #[track_caller]
281    pub fn expect_one_of(
282        &mut self,
283        edible: &[TokenKind],
284        inedible: &[TokenKind],
285    ) -> PResult<'sess, Recovered> {
286        if edible.contains(&self.token.kind) {
287            self.bump();
288            Ok(Recovered::No)
289        } else if inedible.contains(&self.token.kind) {
290            // leave it in the input
291            Ok(Recovered::No)
292        } else {
293            self.expected_one_of_not_found(edible, inedible)
294        }
295    }
296
297    #[cold]
298    #[track_caller]
299    fn expected_one_of_not_found(
300        &mut self,
301        edible: &[TokenKind],
302        inedible: &[TokenKind],
303    ) -> PResult<'sess, Recovered> {
304        if self.token.kind != TokenKind::Eof
305            && self.last_unexpected_token_span == Some(self.token.span)
306        {
307            panic!("called unexpected twice on the same token");
308        }
309
310        let mut expected = edible
311            .iter()
312            .chain(inedible)
313            .cloned()
314            .map(ExpectedToken::Token)
315            .chain(self.expected_tokens.iter().cloned())
316            .filter(|token| {
317                // Filter out suggestions that suggest the same token
318                // which was found and deemed incorrect.
319                fn is_ident_eq_keyword(found: TokenKind, expected: &ExpectedToken) -> bool {
320                    if let TokenKind::Ident(current_sym) = found
321                        && let ExpectedToken::Keyword(suggested_sym) = expected
322                    {
323                        return current_sym == *suggested_sym;
324                    }
325                    false
326                }
327
328                if !token.eq_kind(self.token.kind) {
329                    let eq = is_ident_eq_keyword(self.token.kind, token);
330                    // If the suggestion is a keyword and the found token is an ident,
331                    // the content of which are equal to the suggestion's content,
332                    // we can remove that suggestion (see the `return false` below).
333
334                    // If this isn't the case however, and the suggestion is a token the
335                    // content of which is the same as the found token's, we remove it as well.
336                    if !eq {
337                        if let ExpectedToken::Token(kind) = token
338                            && *kind == self.token.kind
339                        {
340                            return false;
341                        }
342                        return true;
343                    }
344                }
345                false
346            })
347            .collect::<Vec<_>>();
348        expected.sort_by_cached_key(ToString::to_string);
349        expected.dedup();
350
351        let expect = ExpectedToken::to_string_many(&expected);
352        let actual = self.token.full_description();
353        let (msg_exp, (mut label_span, label_exp)) = match expected.len() {
354            0 => (
355                format!("unexpected token: {actual}"),
356                (self.prev_token.span, "unexpected token after this".to_string()),
357            ),
358            1 => (
359                format!("expected {expect}, found {actual}"),
360                (self.prev_token.span.shrink_to_hi(), format!("expected {expect}")),
361            ),
362            len => {
363                let fmt = format!("expected one of {expect}, found {actual}");
364                let short_expect = if len > 6 { format!("{len} possible tokens") } else { expect };
365                let s = self.prev_token.span.shrink_to_hi();
366                (fmt, (s, format!("expected one of {short_expect}")))
367            }
368        };
369        if self.token.is_eof() {
370            // This is EOF; don't want to point at the following char, but rather the last token.
371            label_span = self.prev_token.span;
372        };
373
374        self.last_unexpected_token_span = Some(self.token.span);
375        let mut err = self.dcx().err(msg_exp).span(self.token.span);
376
377        if self.prev_token.span.is_dummy()
378            || !self
379                .sess
380                .source_map()
381                .is_multiline(self.token.span.shrink_to_hi().until(label_span.shrink_to_lo()))
382        {
383            // When the spans are in the same line, it means that the only content between
384            // them is whitespace, point at the found token in that case.
385            err = err.span_label(self.token.span, label_exp);
386        } else {
387            err = err.span_label(label_span, label_exp);
388            err = err.span_label(self.token.span, "unexpected token");
389        }
390
391        Err(err)
392    }
393
394    /// Expects and consumes a semicolon.
395    #[inline]
396    #[track_caller]
397    fn expect_semi(&mut self) -> PResult<'sess, ()> {
398        self.expect(TokenKind::Semi).map(drop)
399    }
400
401    /// Checks if the next token is `tok`, and returns `true` if so.
402    ///
403    /// This method will automatically add `tok` to `expected_tokens` if `tok` is not
404    /// encountered.
405    #[inline]
406    #[must_use]
407    fn check(&mut self, tok: TokenKind) -> bool {
408        let is_present = self.check_noexpect(tok);
409        if !is_present {
410            self.push_expected(ExpectedToken::Token(tok));
411        }
412        is_present
413    }
414
415    #[inline]
416    #[must_use]
417    fn check_noexpect(&self, tok: TokenKind) -> bool {
418        self.token.kind == tok
419    }
420
421    /// Consumes a token 'tok' if it exists. Returns whether the given token was present.
422    ///
423    /// the main purpose of this function is to reduce the cluttering of the suggestions list
424    /// which using the normal eat method could introduce in some cases.
425    #[inline]
426    #[must_use]
427    pub fn eat_noexpect(&mut self, tok: TokenKind) -> bool {
428        let is_present = self.check_noexpect(tok);
429        if is_present {
430            self.bump()
431        }
432        is_present
433    }
434
435    /// Consumes a token 'tok' if it exists. Returns whether the given token was present.
436    #[inline]
437    #[must_use]
438    pub fn eat(&mut self, tok: TokenKind) -> bool {
439        let is_present = self.check(tok);
440        if is_present {
441            self.bump()
442        }
443        is_present
444    }
445
446    /// If the next token is the given keyword, returns `true` without eating it.
447    /// An expectation is also added for diagnostics purposes.
448    #[inline]
449    #[must_use]
450    fn check_keyword(&mut self, kw: Symbol) -> bool {
451        let is_keyword = self.token.is_keyword(kw);
452        if !is_keyword {
453            self.push_expected(ExpectedToken::Keyword(kw));
454        }
455        is_keyword
456    }
457
458    /// If the next token is the given keyword, eats it and returns `true`.
459    /// Otherwise, returns `false`. An expectation is also added for diagnostics purposes.
460    #[inline]
461    #[must_use]
462    pub fn eat_keyword(&mut self, kw: Symbol) -> bool {
463        let is_keyword = self.check_keyword(kw);
464        if is_keyword {
465            self.bump();
466        }
467        is_keyword
468    }
469
470    /// If the given word is not a keyword, signals an error.
471    /// If the next token is not the given word, signals an error.
472    /// Otherwise, eats it.
473    #[track_caller]
474    fn expect_keyword(&mut self, kw: Symbol) -> PResult<'sess, ()> {
475        if !self.eat_keyword(kw) { self.unexpected() } else { Ok(()) }
476    }
477
478    #[must_use]
479    fn check_ident(&mut self) -> bool {
480        self.check_or_expected(self.token.is_ident(), ExpectedToken::Ident)
481    }
482
483    #[must_use]
484    fn check_nr_ident(&mut self) -> bool {
485        self.check_or_expected(self.token.is_non_reserved_ident(self.in_yul), ExpectedToken::Ident)
486    }
487
488    #[must_use]
489    fn check_path(&mut self) -> bool {
490        self.check_or_expected(self.token.is_ident(), ExpectedToken::Path)
491    }
492
493    #[must_use]
494    fn check_lit(&mut self) -> bool {
495        self.check_or_expected(self.token.is_lit(), ExpectedToken::Lit)
496    }
497
498    #[must_use]
499    fn check_str_lit(&mut self) -> bool {
500        self.check_or_expected(self.token.is_str_lit(), ExpectedToken::StrLit)
501    }
502
503    #[must_use]
504    fn check_elementary_type(&mut self) -> bool {
505        self.check_or_expected(self.token.is_elementary_type(), ExpectedToken::ElementaryType)
506    }
507
508    #[must_use]
509    fn check_or_expected(&mut self, ok: bool, t: ExpectedToken) -> bool {
510        if !ok {
511            self.push_expected(t);
512        }
513        ok
514    }
515
516    // #[inline(never)]
517    fn push_expected(&mut self, expected: ExpectedToken) {
518        self.expected_tokens.push(expected);
519    }
520
521    /// Parses a comma-separated sequence delimited by parentheses (e.g. `(x, y)`).
522    /// The function `f` must consume tokens until reaching the next separator or
523    /// closing bracket.
524    #[track_caller]
525    #[inline]
526    fn parse_paren_comma_seq<T>(
527        &mut self,
528        allow_empty: bool,
529        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
530    ) -> PResult<'sess, BoxSlice<'ast, T>> {
531        self.parse_delim_comma_seq(Delimiter::Parenthesis, allow_empty, f)
532    }
533
534    /// Parses a comma-separated sequence, including both delimiters.
535    /// The function `f` must consume tokens until reaching the next separator or
536    /// closing bracket.
537    #[track_caller]
538    #[inline]
539    fn parse_delim_comma_seq<T>(
540        &mut self,
541        delim: Delimiter,
542        allow_empty: bool,
543        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
544    ) -> PResult<'sess, BoxSlice<'ast, T>> {
545        self.parse_delim_seq(delim, SeqSep::trailing_disallowed(TokenKind::Comma), allow_empty, f)
546    }
547
548    /// Parses a comma-separated sequence.
549    /// The function `f` must consume tokens until reaching the next separator.
550    #[track_caller]
551    #[inline]
552    fn parse_nodelim_comma_seq<T>(
553        &mut self,
554        stop: TokenKind,
555        allow_empty: bool,
556        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
557    ) -> PResult<'sess, BoxSlice<'ast, T>> {
558        self.parse_seq_to_before_end(
559            stop,
560            SeqSep::trailing_disallowed(TokenKind::Comma),
561            allow_empty,
562            f,
563        )
564        .map(|(v, _recovered)| v)
565    }
566
567    /// Parses a `sep`-separated sequence, including both delimiters.
568    /// The function `f` must consume tokens until reaching the next separator or
569    /// closing bracket.
570    #[track_caller]
571    #[inline]
572    fn parse_delim_seq<T>(
573        &mut self,
574        delim: Delimiter,
575        sep: SeqSep,
576        allow_empty: bool,
577        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
578    ) -> PResult<'sess, BoxSlice<'ast, T>> {
579        self.parse_unspanned_seq(
580            TokenKind::OpenDelim(delim),
581            TokenKind::CloseDelim(delim),
582            sep,
583            allow_empty,
584            f,
585        )
586    }
587
588    /// Parses a sequence, including both delimiters. The function
589    /// `f` must consume tokens until reaching the next separator or
590    /// closing bracket.
591    #[track_caller]
592    #[inline]
593    fn parse_unspanned_seq<T>(
594        &mut self,
595        bra: TokenKind,
596        ket: TokenKind,
597        sep: SeqSep,
598        allow_empty: bool,
599        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
600    ) -> PResult<'sess, BoxSlice<'ast, T>> {
601        self.expect(bra)?;
602        self.parse_seq_to_end(ket, sep, allow_empty, f)
603    }
604
605    /// Parses a sequence, including only the closing delimiter. The function
606    /// `f` must consume tokens until reaching the next separator or
607    /// closing bracket.
608    #[track_caller]
609    #[inline]
610    fn parse_seq_to_end<T>(
611        &mut self,
612        ket: TokenKind,
613        sep: SeqSep,
614        allow_empty: bool,
615        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
616    ) -> PResult<'sess, BoxSlice<'ast, T>> {
617        let (val, recovered) = self.parse_seq_to_before_end(ket, sep, allow_empty, f)?;
618        if recovered == Recovered::No {
619            self.expect(ket)?;
620        }
621        Ok(val)
622    }
623
624    /// Parses a sequence, not including the delimiters. The function
625    /// `f` must consume tokens until reaching the next separator or
626    /// closing bracket.
627    #[track_caller]
628    #[inline]
629    fn parse_seq_to_before_end<T>(
630        &mut self,
631        ket: TokenKind,
632        sep: SeqSep,
633        allow_empty: bool,
634        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
635    ) -> PResult<'sess, (BoxSlice<'ast, T>, Recovered)> {
636        self.parse_seq_to_before_tokens(ket, sep, allow_empty, f)
637    }
638
639    /// Parses a sequence until the specified delimiters. The function
640    /// `f` must consume tokens until reaching the next separator or
641    /// closing bracket.
642    #[track_caller]
643    fn parse_seq_to_before_tokens<T>(
644        &mut self,
645        ket: TokenKind,
646        sep: SeqSep,
647        allow_empty: bool,
648        mut f: impl FnMut(&mut Self) -> PResult<'sess, T>,
649    ) -> PResult<'sess, (BoxSlice<'ast, T>, Recovered)> {
650        let mut first = true;
651        let mut recovered = Recovered::No;
652        let mut trailing = false;
653        let mut v = SmallVec::<[T; 8]>::new();
654
655        if !allow_empty {
656            v.push(f(self)?);
657            first = false;
658        }
659
660        while !self.check(ket) {
661            if let TokenKind::CloseDelim(..) | TokenKind::Eof = self.token.kind {
662                break;
663            }
664
665            if let Some(sep_kind) = sep.sep {
666                if first {
667                    // no separator for the first element
668                    first = false;
669                } else {
670                    // check for separator
671                    match self.expect(sep_kind) {
672                        Ok(recovered_) => {
673                            if recovered_ == Recovered::Yes {
674                                recovered = Recovered::Yes;
675                                break;
676                            }
677                        }
678                        Err(e) => return Err(e),
679                    }
680
681                    if self.check(ket) {
682                        trailing = true;
683                        break;
684                    }
685                }
686            }
687
688            v.push(f(self)?);
689        }
690
691        if let Some(sep_kind) = sep.sep {
692            let open_close_delim = first && allow_empty;
693            if !open_close_delim
694                && sep.trailing_sep_required
695                && !trailing
696                && let Err(e) = self.expect(sep_kind)
697            {
698                e.emit();
699            }
700            if !sep.trailing_sep_allowed && trailing {
701                let msg = format!("trailing `{sep_kind}` separator is not allowed");
702                self.dcx().err(msg).span(self.prev_token.span).emit();
703            }
704        }
705
706        Ok((self.alloc_smallvec(v), recovered))
707    }
708
709    /// Advance the parser by one token.
710    pub fn bump(&mut self) {
711        let next = self.next_token();
712        if next.is_comment_or_doc() {
713            return self.bump_trivia(next);
714        }
715        self.inlined_bump_with(next);
716    }
717
718    /// Advance the parser by one token using provided token as the next one.
719    ///
720    /// # Panics
721    ///
722    /// Panics if the provided token is a comment.
723    pub fn bump_with(&mut self, next: Token) {
724        self.inlined_bump_with(next);
725    }
726
727    /// This always-inlined version should only be used on hot code paths.
728    #[inline(always)]
729    fn inlined_bump_with(&mut self, next: Token) {
730        #[cfg(debug_assertions)]
731        if next.is_comment_or_doc() {
732            self.dcx().bug("`bump_with` should not be used with comments").span(next.span).emit();
733        }
734        self.prev_token = std::mem::replace(&mut self.token, next);
735        self.expected_tokens.clear();
736        self.docs.clear();
737    }
738
739    /// Bumps comments and docs.
740    ///
741    /// Pushes docs to `self.docs`. Retrieve them with `parse_doc_comments`.
742    #[cold]
743    fn bump_trivia(&mut self, next: Token) {
744        self.docs.clear();
745
746        debug_assert!(next.is_comment_or_doc());
747        self.prev_token = std::mem::replace(&mut self.token, next);
748        while let Some((is_doc, doc)) = self.token.comment() {
749            if is_doc {
750                self.docs.push(doc);
751            }
752            // Don't set `prev_token` on purpose.
753            self.token = self.next_token();
754        }
755
756        self.expected_tokens.clear();
757    }
758
759    /// Advances the internal `tokens` iterator, without updating the parser state.
760    ///
761    /// Use [`bump`](Self::bump) and [`token`](Self::token) instead.
762    #[inline(always)]
763    fn next_token(&mut self) -> Token {
764        self.tokens.next().unwrap_or(Token::new(TokenKind::Eof, self.token.span))
765    }
766
767    /// Returns the token `dist` tokens ahead of the current one.
768    ///
769    /// [`Eof`](Token::EOF) will be returned if the look-ahead is any distance past the end of the
770    /// tokens.
771    #[inline]
772    pub fn look_ahead(&self, dist: usize) -> Token {
773        // Specialize for the common `dist` cases.
774        match dist {
775            0 => self.token,
776            1 => self.look_ahead_full(1),
777            2 => self.look_ahead_full(2),
778            dist => self.look_ahead_full(dist),
779        }
780    }
781
782    fn look_ahead_full(&self, dist: usize) -> Token {
783        self.tokens
784            .as_slice()
785            .iter()
786            .copied()
787            .filter(|t| !t.is_comment_or_doc())
788            .nth(dist - 1)
789            .unwrap_or(Token::EOF)
790    }
791
792    /// Calls `f` with the token `dist` tokens ahead of the current one.
793    ///
794    /// See [`look_ahead`](Self::look_ahead) for more information.
795    #[inline]
796    pub fn look_ahead_with<R>(&self, dist: usize, f: impl FnOnce(Token) -> R) -> R {
797        f(self.look_ahead(dist))
798    }
799
800    /// Runs `f` with the parser in a contract context.
801    #[inline]
802    fn in_contract<R>(&mut self, f: impl FnOnce(&mut Self) -> R) -> R {
803        let old = std::mem::replace(&mut self.in_contract, true);
804        let res = f(self);
805        self.in_contract = old;
806        res
807    }
808
809    /// Runs `f` with the parser in a Yul context.
810    #[inline]
811    fn in_yul<R>(&mut self, f: impl FnOnce(&mut Self) -> R) -> R {
812        let old = std::mem::replace(&mut self.in_yul, true);
813        let res = f(self);
814        self.in_yul = old;
815        res
816    }
817
818    /// Runs `f` with recursion depth tracking and limit enforcement.
819    #[inline]
820    pub fn with_recursion_limit<T>(
821        &mut self,
822        context: &str,
823        f: impl FnOnce(&mut Self) -> PResult<'sess, T>,
824    ) -> PResult<'sess, T> {
825        self.recursion_depth += 1;
826        let res = if self.recursion_depth > PARSER_RECURSION_LIMIT {
827            Err(self.recursion_limit_reached(context))
828        } else {
829            f(self)
830        };
831        self.recursion_depth -= 1;
832        res
833    }
834
835    #[cold]
836    fn recursion_limit_reached(&mut self, context: &str) -> PErr<'sess> {
837        let mut err = self.dcx().err("recursion limit reached").span(self.token.span);
838        if !self.prev_token.span.is_dummy() {
839            err = err.span_label(self.prev_token.span, format!("while parsing {context}"));
840        }
841        err
842    }
843}
844
845/// Common parsing methods.
846impl<'sess, 'ast> Parser<'sess, 'ast> {
847    /// Provides a spanned parser.
848    #[track_caller]
849    pub fn parse_spanned<T>(
850        &mut self,
851        f: impl FnOnce(&mut Self) -> PResult<'sess, T>,
852    ) -> PResult<'sess, (Span, T)> {
853        let lo = self.token.span;
854        let res = f(self);
855        let span = lo.to(self.prev_token.span);
856        match res {
857            Ok(t) => Ok((span, t)),
858            Err(e) if e.span.is_dummy() => Err(e.span(span)),
859            Err(e) => Err(e),
860        }
861    }
862
863    /// Parses contiguous doc comments. Can be empty.
864    #[inline]
865    pub fn parse_doc_comments(&mut self) -> DocComments<'ast> {
866        if !self.docs.is_empty() { self.parse_doc_comments_inner() } else { Default::default() }
867    }
868
869    #[cold]
870    fn parse_doc_comments_inner(&mut self) -> DocComments<'ast> {
871        let docs = self.arena.alloc_thin_slice_copy((), &self.docs);
872        self.docs.clear();
873        docs.into()
874    }
875
876    /// Parses a qualified identifier: `foo.bar.baz`.
877    #[track_caller]
878    pub fn parse_path(&mut self) -> PResult<'sess, AstPath<'ast>> {
879        let first = self.parse_ident()?;
880        self.parse_path_with(first)
881    }
882
883    /// Parses a qualified identifier starting with the given identifier.
884    #[track_caller]
885    pub fn parse_path_with(&mut self, first: Ident) -> PResult<'sess, AstPath<'ast>> {
886        if self.in_yul {
887            self.parse_path_with_f(first, Self::parse_yul_path_ident)
888        } else {
889            self.parse_path_with_f(first, Self::parse_ident)
890        }
891    }
892
893    /// Parses either an identifier or a Yul EVM builtin.
894    fn parse_yul_path_ident(&mut self) -> PResult<'sess, Ident> {
895        let ident = self.ident_or_err(true)?;
896        if !ident.is_yul_evm_builtin() && ident.is_reserved(true) {
897            self.expected_ident_found_err().emit();
898        }
899        self.bump();
900        Ok(ident)
901    }
902
903    /// Parses a qualified identifier: `foo.bar.baz`.
904    #[track_caller]
905    pub fn parse_path_any(&mut self) -> PResult<'sess, AstPath<'ast>> {
906        let first = self.parse_ident_any()?;
907        self.parse_path_with_f(first, Self::parse_ident_any)
908    }
909
910    /// Parses a qualified identifier starting with the given identifier.
911    #[track_caller]
912    fn parse_path_with_f(
913        &mut self,
914        first: Ident,
915        mut f: impl FnMut(&mut Self) -> PResult<'sess, Ident>,
916    ) -> PResult<'sess, AstPath<'ast>> {
917        if !self.check_noexpect(TokenKind::Dot) {
918            return Ok(self.alloc_path(&[first]));
919        }
920
921        let mut path = SmallVec::<[_; 4]>::new();
922        path.push(first);
923        while self.eat(TokenKind::Dot) {
924            path.push(f(self)?);
925        }
926        Ok(self.alloc_path(&path))
927    }
928
929    /// Parses an identifier.
930    #[track_caller]
931    pub fn parse_ident(&mut self) -> PResult<'sess, Ident> {
932        self.parse_ident_common(true)
933    }
934
935    /// Parses an identifier. Does not check if the identifier is a reserved keyword.
936    #[track_caller]
937    pub fn parse_ident_any(&mut self) -> PResult<'sess, Ident> {
938        let ident = self.ident_or_err(true)?;
939        self.bump();
940        Ok(ident)
941    }
942
943    /// Parses an optional identifier.
944    #[track_caller]
945    pub fn parse_ident_opt(&mut self) -> PResult<'sess, Option<Ident>> {
946        if self.check_ident() { self.parse_ident().map(Some) } else { Ok(None) }
947    }
948
949    #[track_caller]
950    fn parse_ident_common(&mut self, recover: bool) -> PResult<'sess, Ident> {
951        let ident = self.ident_or_err(recover)?;
952        if ident.is_reserved(self.in_yul) {
953            let err = self.expected_ident_found_err();
954            if recover {
955                err.emit();
956            } else {
957                return Err(err);
958            }
959        }
960        self.bump();
961        Ok(ident)
962    }
963
964    /// Returns Ok if the current token is an identifier. Does not advance the parser.
965    #[track_caller]
966    fn ident_or_err(&mut self, recover: bool) -> PResult<'sess, Ident> {
967        match self.token.ident() {
968            Some(ident) => Ok(ident),
969            None => self.expected_ident_found(recover),
970        }
971    }
972
973    #[cold]
974    #[track_caller]
975    fn expected_ident_found(&mut self, recover: bool) -> PResult<'sess, Ident> {
976        self.expected_ident_found_other(self.token, recover)
977    }
978
979    #[cold]
980    #[track_caller]
981    fn expected_ident_found_other(&mut self, token: Token, recover: bool) -> PResult<'sess, Ident> {
982        let msg = format!("expected identifier, found {}", token.full_description());
983        let span = token.span;
984        let mut err = self.dcx().err(msg).span(span);
985
986        let mut recovered_ident = None;
987
988        let suggest_remove_comma = token.kind == TokenKind::Comma && self.look_ahead(1).is_ident();
989        if suggest_remove_comma {
990            if recover {
991                self.bump();
992                recovered_ident = self.ident_or_err(false).ok();
993            }
994            err = err.span_help(span, "remove this comma");
995        }
996
997        if recover && let Some(ident) = recovered_ident {
998            err.emit();
999            return Ok(ident);
1000        }
1001        Err(err)
1002    }
1003
1004    #[cold]
1005    #[track_caller]
1006    fn expected_ident_found_err(&mut self) -> PErr<'sess> {
1007        self.expected_ident_found(false).unwrap_err()
1008    }
1009}
solar_parse/parser/mod.rs

solar_parse/parser/
mod.rs