solar_parse/parser/
mod.rs

1use crate::{Lexer, PErr, PResult};
2use smallvec::SmallVec;
3use solar_ast::{
4    self as ast, AstPath, Box, DocComment, DocComments, PathSlice,
5    token::{Delimiter, Token, TokenKind},
6};
7use solar_data_structures::{BumpExt, fmt::or_list};
8use solar_interface::{
9    Ident, Result, Session, Span, Symbol,
10    diagnostics::DiagCtxt,
11    source_map::{FileName, SourceFile},
12};
13use std::{fmt, path::Path};
14
15mod expr;
16mod item;
17mod lit;
18mod stmt;
19mod ty;
20mod yul;
21
22/// Maximum allowed recursive descent depth for selected parser entry points.
23///
24/// This limit is applied to `parse_expr`, `parse_stmt`, and `parse_yul_stmt`.
25const PARSER_RECURSION_LIMIT: usize = 128;
26
27/// Solidity and Yul parser.
28///
29/// # Examples
30///
31/// ```
32/// # mod solar { pub use {solar_ast as ast, solar_interface as interface, solar_parse as parse}; }
33/// # fn main() {}
34#[doc = include_str!("../../doc-examples/parser.rs")]
35/// ```
36pub struct Parser<'sess, 'ast> {
37    /// The parser session.
38    pub sess: &'sess Session,
39    /// The arena where the AST nodes are allocated.
40    pub arena: &'ast ast::Arena,
41
42    /// The current token.
43    pub token: Token,
44    /// The previous token.
45    pub prev_token: Token,
46    /// List of expected tokens. Cleared after each `bump` call.
47    expected_tokens: Vec<ExpectedToken>,
48    /// The span of the last unexpected token.
49    last_unexpected_token_span: Option<Span>,
50    /// The current doc-comments.
51    docs: Vec<DocComment>,
52
53    /// The token stream.
54    tokens: std::vec::IntoIter<Token>,
55
56    /// Whether the parser is in Yul mode.
57    ///
58    /// Currently, this can only happen when parsing a Yul "assembly" block.
59    in_yul: bool,
60    /// Whether the parser is currently parsing a contract block.
61    in_contract: bool,
62
63    /// Current recursion depth for recursive parsing operations.
64    recursion_depth: usize,
65}
66
67#[derive(Clone, Debug, PartialEq, Eq)]
68enum ExpectedToken {
69    Token(TokenKind),
70    Keyword(Symbol),
71    Lit,
72    StrLit,
73    Ident,
74    Path,
75    ElementaryType,
76}
77
78impl fmt::Display for ExpectedToken {
79    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80        f.write_str(match self {
81            Self::Token(t) => return write!(f, "`{t}`"),
82            Self::Keyword(kw) => return write!(f, "`{kw}`"),
83            Self::StrLit => "string literal",
84            Self::Lit => "literal",
85            Self::Ident => "identifier",
86            Self::Path => "path",
87            Self::ElementaryType => "elementary type name",
88        })
89    }
90}
91
92impl ExpectedToken {
93    fn to_string_many(tokens: &[Self]) -> String {
94        or_list(tokens).to_string()
95    }
96
97    fn eq_kind(&self, other: TokenKind) -> bool {
98        match *self {
99            Self::Token(kind) => kind == other,
100            _ => false,
101        }
102    }
103}
104
105/// A sequence separator.
106#[derive(Debug)]
107struct SeqSep {
108    /// The separator token.
109    sep: Option<TokenKind>,
110    /// `true` if a trailing separator is allowed.
111    trailing_sep_allowed: bool,
112    /// `true` if a trailing separator is required.
113    trailing_sep_required: bool,
114}
115
116impl SeqSep {
117    fn trailing_enforced(t: TokenKind) -> Self {
118        Self { sep: Some(t), trailing_sep_required: true, trailing_sep_allowed: true }
119    }
120
121    #[allow(dead_code)]
122    fn trailing_allowed(t: TokenKind) -> Self {
123        Self { sep: Some(t), trailing_sep_required: false, trailing_sep_allowed: true }
124    }
125
126    fn trailing_disallowed(t: TokenKind) -> Self {
127        Self { sep: Some(t), trailing_sep_required: false, trailing_sep_allowed: false }
128    }
129
130    fn none() -> Self {
131        Self { sep: None, trailing_sep_required: false, trailing_sep_allowed: false }
132    }
133}
134
135impl<'sess, 'ast> Parser<'sess, 'ast> {
136    /// Creates a new parser.
137    pub fn new(sess: &'sess Session, arena: &'ast ast::Arena, tokens: Vec<Token>) -> Self {
138        let mut parser = Self {
139            sess,
140            arena,
141            token: Token::DUMMY,
142            prev_token: Token::DUMMY,
143            expected_tokens: Vec::with_capacity(8),
144            last_unexpected_token_span: None,
145            docs: Vec::with_capacity(4),
146            tokens: tokens.into_iter(),
147            in_yul: false,
148            in_contract: false,
149            recursion_depth: 0,
150        };
151        parser.bump();
152        parser
153    }
154
155    /// Creates a new parser from a source code string.
156    pub fn from_source_code(
157        sess: &'sess Session,
158        arena: &'ast ast::Arena,
159        filename: FileName,
160        src: impl Into<String>,
161    ) -> Result<Self> {
162        Self::from_lazy_source_code(sess, arena, filename, || Ok(src.into()))
163    }
164
165    /// Creates a new parser from a file.
166    ///
167    /// The file will not be read if it has already been added into the source map.
168    pub fn from_file(sess: &'sess Session, arena: &'ast ast::Arena, path: &Path) -> Result<Self> {
169        Self::from_lazy_source_code(sess, arena, FileName::Real(path.to_path_buf()), || {
170            sess.source_map().file_loader().load_file(path)
171        })
172    }
173
174    /// Creates a new parser from a source code closure.
175    ///
176    /// The closure will not be called if the file name has already been added into the source map.
177    pub fn from_lazy_source_code(
178        sess: &'sess Session,
179        arena: &'ast ast::Arena,
180        filename: FileName,
181        get_src: impl FnOnce() -> std::io::Result<String>,
182    ) -> Result<Self> {
183        let file = sess
184            .source_map()
185            .new_source_file_with(filename, get_src)
186            .map_err(|e| sess.dcx.err(e.to_string()).emit())?;
187        Ok(Self::from_source_file(sess, arena, &file))
188    }
189
190    /// Creates a new parser from a source file.
191    ///
192    /// Note that the source file must be added to the source map before calling this function.
193    /// Prefer using [`from_source_code`](Self::from_source_code) or [`from_file`](Self::from_file)
194    /// instead.
195    pub fn from_source_file(
196        sess: &'sess Session,
197        arena: &'ast ast::Arena,
198        file: &SourceFile,
199    ) -> Self {
200        Self::from_lexer(arena, Lexer::from_source_file(sess, file))
201    }
202
203    /// Creates a new parser from a lexer.
204    pub fn from_lexer(arena: &'ast ast::Arena, lexer: Lexer<'sess, '_>) -> Self {
205        Self::new(lexer.sess, arena, lexer.into_tokens())
206    }
207
208    /// Returns the diagnostic context.
209    #[inline]
210    pub fn dcx(&self) -> &'sess DiagCtxt {
211        &self.sess.dcx
212    }
213
214    /// Allocates an object on the AST arena.
215    pub fn alloc<T>(&self, value: T) -> Box<'ast, T> {
216        self.arena.alloc(value)
217    }
218
219    /// Allocates a list of objects on the AST arena.
220    ///
221    /// # Panics
222    ///
223    /// Panics if the list is empty.
224    pub fn alloc_path(&self, values: &[Ident]) -> AstPath<'ast> {
225        PathSlice::from_mut_slice(self.arena.alloc_slice_copy(values))
226    }
227
228    /// Allocates a list of objects on the AST arena.
229    pub fn alloc_vec<T>(&self, values: Vec<T>) -> Box<'ast, [T]> {
230        self.arena.alloc_vec(values)
231    }
232
233    /// Allocates a list of objects on the AST arena.
234    pub fn alloc_smallvec<A: smallvec::Array>(&self, values: SmallVec<A>) -> Box<'ast, [A::Item]> {
235        self.arena.alloc_smallvec(values)
236    }
237
238    /// Returns an "unexpected token" error in a [`PResult`] for the current token.
239    #[inline]
240    #[track_caller]
241    pub fn unexpected<T>(&mut self) -> PResult<'sess, T> {
242        Err(self.unexpected_error())
243    }
244
245    /// Returns an "unexpected token" error for the current token.
246    #[inline]
247    #[track_caller]
248    pub fn unexpected_error(&mut self) -> PErr<'sess> {
249        #[cold]
250        #[inline(never)]
251        #[track_caller]
252        fn unexpected_ok(b: bool) -> ! {
253            unreachable!("`unexpected()` returned Ok({b})")
254        }
255        match self.expect_one_of(&[], &[]) {
256            Ok(b) => unexpected_ok(b),
257            Err(e) => e,
258        }
259    }
260
261    /// Expects and consumes the token `t`. Signals an error if the next token is not `t`.
262    #[track_caller]
263    pub fn expect(&mut self, tok: TokenKind) -> PResult<'sess, bool /* recovered */> {
264        if self.expected_tokens.is_empty() {
265            if self.check_noexpect(tok) {
266                self.bump();
267                Ok(false)
268            } else {
269                Err(self.unexpected_error_with(tok))
270            }
271        } else {
272            self.expect_one_of(&[tok], &[])
273        }
274    }
275
276    /// Creates a [`PErr`] for an unexpected token `t`.
277    #[track_caller]
278    fn unexpected_error_with(&mut self, t: TokenKind) -> PErr<'sess> {
279        let prev_span = if self.prev_token.span.is_dummy() {
280            // We don't want to point at the following span after a dummy span.
281            // This happens when the parser finds an empty token stream.
282            self.token.span
283        } else if self.token.is_eof() {
284            // EOF, don't want to point at the following char, but rather the last token.
285            self.prev_token.span
286        } else {
287            self.prev_token.span.shrink_to_hi()
288        };
289        let span = self.token.span;
290
291        let this_token_str = self.token.full_description();
292        let label_exp = format!("expected `{t}`");
293        let msg = format!("{label_exp}, found {this_token_str}");
294        let mut err = self.dcx().err(msg).span(span);
295        if !self.sess.source_map().is_multiline(prev_span.until(span)) {
296            // When the spans are in the same line, it means that the only content
297            // between them is whitespace, point only at the found token.
298            err = err.span_label(span, label_exp);
299        } else {
300            err = err.span_label(prev_span, label_exp);
301            err = err.span_label(span, "unexpected token");
302        }
303        err
304    }
305
306    /// Expect next token to be edible or inedible token. If edible,
307    /// then consume it; if inedible, then return without consuming
308    /// anything. Signal a fatal error if next token is unexpected.
309    #[track_caller]
310    pub fn expect_one_of(
311        &mut self,
312        edible: &[TokenKind],
313        inedible: &[TokenKind],
314    ) -> PResult<'sess, bool /* recovered */> {
315        if edible.contains(&self.token.kind) {
316            self.bump();
317            Ok(false)
318        } else if inedible.contains(&self.token.kind) {
319            // leave it in the input
320            Ok(false)
321        } else if self.token.kind != TokenKind::Eof
322            && self.last_unexpected_token_span == Some(self.token.span)
323        {
324            panic!("called unexpected twice on the same token");
325        } else {
326            self.expected_one_of_not_found(edible, inedible)
327        }
328    }
329
330    #[track_caller]
331    fn expected_one_of_not_found(
332        &mut self,
333        edible: &[TokenKind],
334        inedible: &[TokenKind],
335    ) -> PResult<'sess, bool> {
336        let mut expected = edible
337            .iter()
338            .chain(inedible)
339            .cloned()
340            .map(ExpectedToken::Token)
341            .chain(self.expected_tokens.iter().cloned())
342            .filter(|token| {
343                // Filter out suggestions that suggest the same token
344                // which was found and deemed incorrect.
345                fn is_ident_eq_keyword(found: TokenKind, expected: &ExpectedToken) -> bool {
346                    if let TokenKind::Ident(current_sym) = found
347                        && let ExpectedToken::Keyword(suggested_sym) = expected
348                    {
349                        return current_sym == *suggested_sym;
350                    }
351                    false
352                }
353
354                if !token.eq_kind(self.token.kind) {
355                    let eq = is_ident_eq_keyword(self.token.kind, token);
356                    // If the suggestion is a keyword and the found token is an ident,
357                    // the content of which are equal to the suggestion's content,
358                    // we can remove that suggestion (see the `return false` below).
359
360                    // If this isn't the case however, and the suggestion is a token the
361                    // content of which is the same as the found token's, we remove it as well.
362                    if !eq {
363                        if let ExpectedToken::Token(kind) = token
364                            && *kind == self.token.kind
365                        {
366                            return false;
367                        }
368                        return true;
369                    }
370                }
371                false
372            })
373            .collect::<Vec<_>>();
374        expected.sort_by_cached_key(ToString::to_string);
375        expected.dedup();
376
377        let expect = ExpectedToken::to_string_many(&expected);
378        let actual = self.token.full_description();
379        let (msg_exp, (mut label_span, label_exp)) = match expected.len() {
380            0 => (
381                format!("unexpected token: {actual}"),
382                (self.prev_token.span, "unexpected token after this".to_string()),
383            ),
384            1 => (
385                format!("expected {expect}, found {actual}"),
386                (self.prev_token.span.shrink_to_hi(), format!("expected {expect}")),
387            ),
388            len => {
389                let fmt = format!("expected one of {expect}, found {actual}");
390                let short_expect = if len > 6 { format!("{len} possible tokens") } else { expect };
391                let s = self.prev_token.span.shrink_to_hi();
392                (fmt, (s, format!("expected one of {short_expect}")))
393            }
394        };
395        if self.token.is_eof() {
396            // This is EOF; don't want to point at the following char, but rather the last token.
397            label_span = self.prev_token.span;
398        };
399
400        self.last_unexpected_token_span = Some(self.token.span);
401        let mut err = self.dcx().err(msg_exp).span(self.token.span);
402
403        if self.prev_token.span.is_dummy()
404            || !self
405                .sess
406                .source_map()
407                .is_multiline(self.token.span.shrink_to_hi().until(label_span.shrink_to_lo()))
408        {
409            // When the spans are in the same line, it means that the only content between
410            // them is whitespace, point at the found token in that case.
411            err = err.span_label(self.token.span, label_exp);
412        } else {
413            err = err.span_label(label_span, label_exp);
414            err = err.span_label(self.token.span, "unexpected token");
415        }
416
417        Err(err)
418    }
419
420    /// Expects and consumes a semicolon.
421    #[track_caller]
422    fn expect_semi(&mut self) -> PResult<'sess, ()> {
423        self.expect(TokenKind::Semi).map(drop)
424    }
425
426    /// Checks if the next token is `tok`, and returns `true` if so.
427    ///
428    /// This method will automatically add `tok` to `expected_tokens` if `tok` is not
429    /// encountered.
430    #[inline]
431    #[must_use]
432    fn check(&mut self, tok: TokenKind) -> bool {
433        let is_present = self.check_noexpect(tok);
434        if !is_present {
435            self.expected_tokens.push(ExpectedToken::Token(tok));
436        }
437        is_present
438    }
439
440    #[inline]
441    #[must_use]
442    fn check_noexpect(&self, tok: TokenKind) -> bool {
443        self.token.kind == tok
444    }
445
446    /// Consumes a token 'tok' if it exists. Returns whether the given token was present.
447    ///
448    /// the main purpose of this function is to reduce the cluttering of the suggestions list
449    /// which using the normal eat method could introduce in some cases.
450    #[must_use]
451    pub fn eat_noexpect(&mut self, tok: TokenKind) -> bool {
452        let is_present = self.check_noexpect(tok);
453        if is_present {
454            self.bump()
455        }
456        is_present
457    }
458
459    /// Consumes a token 'tok' if it exists. Returns whether the given token was present.
460    #[must_use]
461    pub fn eat(&mut self, tok: TokenKind) -> bool {
462        let is_present = self.check(tok);
463        if is_present {
464            self.bump()
465        }
466        is_present
467    }
468
469    /// If the next token is the given keyword, returns `true` without eating it.
470    /// An expectation is also added for diagnostics purposes.
471    #[must_use]
472    fn check_keyword(&mut self, kw: Symbol) -> bool {
473        self.expected_tokens.push(ExpectedToken::Keyword(kw));
474        self.token.is_keyword(kw)
475    }
476
477    /// If the next token is the given keyword, eats it and returns `true`.
478    /// Otherwise, returns `false`. An expectation is also added for diagnostics purposes.
479    #[must_use]
480    pub fn eat_keyword(&mut self, kw: Symbol) -> bool {
481        if self.check_keyword(kw) {
482            self.bump();
483            true
484        } else {
485            false
486        }
487    }
488
489    /// If the given word is not a keyword, signals an error.
490    /// If the next token is not the given word, signals an error.
491    /// Otherwise, eats it.
492    #[track_caller]
493    fn expect_keyword(&mut self, kw: Symbol) -> PResult<'sess, ()> {
494        if !self.eat_keyword(kw) { self.unexpected() } else { Ok(()) }
495    }
496
497    #[must_use]
498    fn check_ident(&mut self) -> bool {
499        self.check_or_expected(self.token.is_ident(), ExpectedToken::Ident)
500    }
501
502    #[must_use]
503    fn check_nr_ident(&mut self) -> bool {
504        self.check_or_expected(self.token.is_non_reserved_ident(self.in_yul), ExpectedToken::Ident)
505    }
506
507    #[must_use]
508    fn check_path(&mut self) -> bool {
509        self.check_or_expected(self.token.is_ident(), ExpectedToken::Path)
510    }
511
512    #[must_use]
513    fn check_lit(&mut self) -> bool {
514        self.check_or_expected(self.token.is_lit(), ExpectedToken::Lit)
515    }
516
517    #[must_use]
518    fn check_str_lit(&mut self) -> bool {
519        self.check_or_expected(self.token.is_str_lit(), ExpectedToken::StrLit)
520    }
521
522    #[must_use]
523    fn check_elementary_type(&mut self) -> bool {
524        self.check_or_expected(self.token.is_elementary_type(), ExpectedToken::ElementaryType)
525    }
526
527    #[must_use]
528    fn check_or_expected(&mut self, ok: bool, t: ExpectedToken) -> bool {
529        if !ok {
530            self.expected_tokens.push(t);
531        }
532        ok
533    }
534
535    /// Parses a comma-separated sequence delimited by parentheses (e.g. `(x, y)`).
536    /// The function `f` must consume tokens until reaching the next separator or
537    /// closing bracket.
538    #[track_caller]
539    #[inline]
540    fn parse_paren_comma_seq<T>(
541        &mut self,
542        allow_empty: bool,
543        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
544    ) -> PResult<'sess, Box<'ast, [T]>> {
545        self.parse_delim_comma_seq(Delimiter::Parenthesis, allow_empty, f)
546    }
547
548    /// Parses a comma-separated sequence, including both delimiters.
549    /// The function `f` must consume tokens until reaching the next separator or
550    /// closing bracket.
551    #[track_caller]
552    #[inline]
553    fn parse_delim_comma_seq<T>(
554        &mut self,
555        delim: Delimiter,
556        allow_empty: bool,
557        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
558    ) -> PResult<'sess, Box<'ast, [T]>> {
559        self.parse_delim_seq(delim, SeqSep::trailing_disallowed(TokenKind::Comma), allow_empty, f)
560    }
561
562    /// Parses a comma-separated sequence.
563    /// The function `f` must consume tokens until reaching the next separator.
564    #[track_caller]
565    #[inline]
566    fn parse_nodelim_comma_seq<T>(
567        &mut self,
568        stop: TokenKind,
569        allow_empty: bool,
570        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
571    ) -> PResult<'sess, Box<'ast, [T]>> {
572        self.parse_seq_to_before_end(
573            stop,
574            SeqSep::trailing_disallowed(TokenKind::Comma),
575            allow_empty,
576            f,
577        )
578        .map(|(v, _recovered)| v)
579    }
580
581    /// Parses a `sep`-separated sequence, including both delimiters.
582    /// The function `f` must consume tokens until reaching the next separator or
583    /// closing bracket.
584    #[track_caller]
585    #[inline]
586    fn parse_delim_seq<T>(
587        &mut self,
588        delim: Delimiter,
589        sep: SeqSep,
590        allow_empty: bool,
591        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
592    ) -> PResult<'sess, Box<'ast, [T]>> {
593        self.parse_unspanned_seq(
594            TokenKind::OpenDelim(delim),
595            TokenKind::CloseDelim(delim),
596            sep,
597            allow_empty,
598            f,
599        )
600    }
601
602    /// Parses a sequence, including both delimiters. The function
603    /// `f` must consume tokens until reaching the next separator or
604    /// closing bracket.
605    #[track_caller]
606    #[inline]
607    fn parse_unspanned_seq<T>(
608        &mut self,
609        bra: TokenKind,
610        ket: TokenKind,
611        sep: SeqSep,
612        allow_empty: bool,
613        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
614    ) -> PResult<'sess, Box<'ast, [T]>> {
615        self.expect(bra)?;
616        self.parse_seq_to_end(ket, sep, allow_empty, f)
617    }
618
619    /// Parses a sequence, including only the closing delimiter. The function
620    /// `f` must consume tokens until reaching the next separator or
621    /// closing bracket.
622    #[track_caller]
623    #[inline]
624    fn parse_seq_to_end<T>(
625        &mut self,
626        ket: TokenKind,
627        sep: SeqSep,
628        allow_empty: bool,
629        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
630    ) -> PResult<'sess, Box<'ast, [T]>> {
631        let (val, recovered) = self.parse_seq_to_before_end(ket, sep, allow_empty, f)?;
632        if !recovered {
633            self.expect(ket)?;
634        }
635        Ok(val)
636    }
637
638    /// Parses a sequence, not including the delimiters. The function
639    /// `f` must consume tokens until reaching the next separator or
640    /// closing bracket.
641    #[track_caller]
642    #[inline]
643    fn parse_seq_to_before_end<T>(
644        &mut self,
645        ket: TokenKind,
646        sep: SeqSep,
647        allow_empty: bool,
648        f: impl FnMut(&mut Self) -> PResult<'sess, T>,
649    ) -> PResult<'sess, (Box<'ast, [T]>, bool /* recovered */)> {
650        self.parse_seq_to_before_tokens(&[ket], sep, allow_empty, f)
651    }
652
653    /// Checks if the next token is contained within `kets`, and returns `true` if so.
654    fn check_any(&mut self, kets: &[TokenKind]) -> bool {
655        kets.iter().any(|&k| self.check(k))
656    }
657
658    /// Parses a sequence until the specified delimiters. The function
659    /// `f` must consume tokens until reaching the next separator or
660    /// closing bracket.
661    #[track_caller]
662    fn parse_seq_to_before_tokens<T>(
663        &mut self,
664        kets: &[TokenKind],
665        sep: SeqSep,
666        allow_empty: bool,
667        mut f: impl FnMut(&mut Self) -> PResult<'sess, T>,
668    ) -> PResult<'sess, (Box<'ast, [T]>, bool /* recovered */)> {
669        let mut first = true;
670        let mut recovered = false;
671        let mut trailing = false;
672        let mut v = SmallVec::<[T; 8]>::new();
673
674        if !allow_empty {
675            v.push(f(self)?);
676            first = false;
677        }
678
679        while !self.check_any(kets) {
680            if let TokenKind::CloseDelim(..) | TokenKind::Eof = self.token.kind {
681                break;
682            }
683
684            if let Some(sep_kind) = sep.sep {
685                if first {
686                    // no separator for the first element
687                    first = false;
688                } else {
689                    // check for separator
690                    match self.expect(sep_kind) {
691                        Ok(recovered_) => {
692                            if recovered_ {
693                                recovered = true;
694                                break;
695                            }
696                        }
697                        Err(e) => return Err(e),
698                    }
699
700                    if self.check_any(kets) {
701                        trailing = true;
702                        break;
703                    }
704                }
705            }
706
707            v.push(f(self)?);
708        }
709
710        if let Some(sep_kind) = sep.sep {
711            let open_close_delim = first && allow_empty;
712            if !open_close_delim
713                && sep.trailing_sep_required
714                && !trailing
715                && let Err(e) = self.expect(sep_kind)
716            {
717                e.emit();
718            }
719            if !sep.trailing_sep_allowed && trailing {
720                let msg = format!("trailing `{sep_kind}` separator is not allowed");
721                self.dcx().err(msg).span(self.prev_token.span).emit();
722            }
723        }
724
725        Ok((self.alloc_smallvec(v), recovered))
726    }
727
728    /// Advance the parser by one token.
729    pub fn bump(&mut self) {
730        let next = self.next_token();
731        if next.is_comment_or_doc() {
732            return self.bump_trivia(next);
733        }
734        self.inlined_bump_with(next);
735    }
736
737    /// Advance the parser by one token using provided token as the next one.
738    ///
739    /// # Panics
740    ///
741    /// Panics if the provided token is a comment.
742    pub fn bump_with(&mut self, next: Token) {
743        self.inlined_bump_with(next);
744    }
745
746    /// This always-inlined version should only be used on hot code paths.
747    #[inline(always)]
748    fn inlined_bump_with(&mut self, next: Token) {
749        #[cfg(debug_assertions)]
750        if next.is_comment_or_doc() {
751            self.dcx().bug("`bump_with` should not be used with comments").span(next.span).emit();
752        }
753        self.prev_token = std::mem::replace(&mut self.token, next);
754        self.expected_tokens.clear();
755        self.docs.clear();
756    }
757
758    /// Bumps comments and docs.
759    ///
760    /// Pushes docs to `self.docs`. Retrieve them with `parse_doc_comments`.
761    #[cold]
762    fn bump_trivia(&mut self, next: Token) {
763        self.docs.clear();
764
765        debug_assert!(next.is_comment_or_doc());
766        self.prev_token = std::mem::replace(&mut self.token, next);
767        while let Some((is_doc, doc)) = self.token.comment() {
768            if is_doc {
769                self.docs.push(doc);
770            }
771            // Don't set `prev_token` on purpose.
772            self.token = self.next_token();
773        }
774
775        self.expected_tokens.clear();
776    }
777
778    /// Advances the internal `tokens` iterator, without updating the parser state.
779    ///
780    /// Use [`bump`](Self::bump) and [`token`](Self::token) instead.
781    #[inline(always)]
782    fn next_token(&mut self) -> Token {
783        self.tokens.next().unwrap_or(Token { kind: TokenKind::Eof, span: self.token.span })
784    }
785
786    /// Returns the token `dist` tokens ahead of the current one.
787    ///
788    /// [`Eof`](Token::EOF) will be returned if the look-ahead is any distance past the end of the
789    /// tokens.
790    #[inline]
791    pub fn look_ahead(&self, dist: usize) -> Token {
792        // Specialize for the common `dist` cases.
793        match dist {
794            0 => self.token,
795            1 => self.look_ahead_full(1),
796            2 => self.look_ahead_full(2),
797            dist => self.look_ahead_full(dist),
798        }
799    }
800
801    fn look_ahead_full(&self, dist: usize) -> Token {
802        self.tokens
803            .as_slice()
804            .iter()
805            .copied()
806            .filter(|t| !t.is_comment_or_doc())
807            .nth(dist - 1)
808            .unwrap_or(Token::EOF)
809    }
810
811    /// Calls `f` with the token `dist` tokens ahead of the current one.
812    ///
813    /// See [`look_ahead`](Self::look_ahead) for more information.
814    #[inline]
815    pub fn look_ahead_with<R>(&self, dist: usize, f: impl FnOnce(Token) -> R) -> R {
816        f(self.look_ahead(dist))
817    }
818
819    /// Runs `f` with the parser in a contract context.
820    #[inline]
821    fn in_contract<R>(&mut self, f: impl FnOnce(&mut Self) -> R) -> R {
822        let old = std::mem::replace(&mut self.in_contract, true);
823        let res = f(self);
824        self.in_contract = old;
825        res
826    }
827
828    /// Runs `f` with the parser in a Yul context.
829    #[inline]
830    fn in_yul<R>(&mut self, f: impl FnOnce(&mut Self) -> R) -> R {
831        let old = std::mem::replace(&mut self.in_yul, true);
832        let res = f(self);
833        self.in_yul = old;
834        res
835    }
836
837    /// Runs `f` with recursion depth tracking and limit enforcement.
838    #[inline]
839    pub fn with_recursion_limit<T>(
840        &mut self,
841        context: &str,
842        f: impl FnOnce(&mut Self) -> PResult<'sess, T>,
843    ) -> PResult<'sess, T> {
844        // Increment recursion depth and enforce limit.
845        self.recursion_depth = self.recursion_depth.saturating_add(1);
846        if self.recursion_depth > PARSER_RECURSION_LIMIT {
847            let mut err = self.dcx().err("recursion limit reached").span(self.token.span);
848            // Try to point at a larger span if we have a previous token.
849            if !self.prev_token.span.is_dummy() {
850                err = err.span_label(self.prev_token.span, format!("while parsing {context}"));
851            }
852            // Decrement depth before returning to keep counters consistent if caller continues.
853            self.recursion_depth = self.recursion_depth.saturating_sub(1);
854            return Err(err);
855        }
856
857        let res = f(self);
858        self.recursion_depth = self.recursion_depth.saturating_sub(1);
859        res
860    }
861}
862
863/// Common parsing methods.
864impl<'sess, 'ast> Parser<'sess, 'ast> {
865    /// Provides a spanned parser.
866    #[track_caller]
867    pub fn parse_spanned<T>(
868        &mut self,
869        f: impl FnOnce(&mut Self) -> PResult<'sess, T>,
870    ) -> PResult<'sess, (Span, T)> {
871        let lo = self.token.span;
872        let res = f(self);
873        let span = lo.to(self.prev_token.span);
874        match res {
875            Ok(t) => Ok((span, t)),
876            Err(e) if e.span.is_dummy() => Err(e.span(span)),
877            Err(e) => Err(e),
878        }
879    }
880
881    /// Parses contiguous doc comments. Can be empty.
882    #[inline]
883    pub fn parse_doc_comments(&mut self) -> DocComments<'ast> {
884        if !self.docs.is_empty() { self.parse_doc_comments_inner() } else { Default::default() }
885    }
886
887    #[cold]
888    fn parse_doc_comments_inner(&mut self) -> DocComments<'ast> {
889        let docs = self.arena.alloc_slice_copy(&self.docs);
890        self.docs.clear();
891        docs.into()
892    }
893
894    /// Parses a qualified identifier: `foo.bar.baz`.
895    #[track_caller]
896    pub fn parse_path(&mut self) -> PResult<'sess, AstPath<'ast>> {
897        let first = self.parse_ident()?;
898        self.parse_path_with(first)
899    }
900
901    /// Parses a qualified identifier starting with the given identifier.
902    #[track_caller]
903    pub fn parse_path_with(&mut self, first: Ident) -> PResult<'sess, AstPath<'ast>> {
904        if self.in_yul {
905            self.parse_path_with_f(first, Self::parse_yul_path_ident)
906        } else {
907            self.parse_path_with_f(first, Self::parse_ident)
908        }
909    }
910
911    /// Parses either an identifier or a Yul EVM builtin.
912    fn parse_yul_path_ident(&mut self) -> PResult<'sess, Ident> {
913        let ident = self.ident_or_err(true)?;
914        if !ident.is_yul_evm_builtin() && ident.is_reserved(true) {
915            self.expected_ident_found_err().emit();
916        }
917        self.bump();
918        Ok(ident)
919    }
920
921    /// Parses a qualified identifier: `foo.bar.baz`.
922    #[track_caller]
923    pub fn parse_path_any(&mut self) -> PResult<'sess, AstPath<'ast>> {
924        let first = self.parse_ident_any()?;
925        self.parse_path_with_f(first, Self::parse_ident_any)
926    }
927
928    /// Parses a qualified identifier starting with the given identifier.
929    #[track_caller]
930    fn parse_path_with_f(
931        &mut self,
932        first: Ident,
933        mut f: impl FnMut(&mut Self) -> PResult<'sess, Ident>,
934    ) -> PResult<'sess, AstPath<'ast>> {
935        if !self.check_noexpect(TokenKind::Dot) {
936            return Ok(self.alloc_path(&[first]));
937        }
938
939        let mut path = SmallVec::<[_; 4]>::new();
940        path.push(first);
941        while self.eat(TokenKind::Dot) {
942            path.push(f(self)?);
943        }
944        Ok(self.alloc_path(&path))
945    }
946
947    /// Parses an identifier.
948    #[track_caller]
949    pub fn parse_ident(&mut self) -> PResult<'sess, Ident> {
950        self.parse_ident_common(true)
951    }
952
953    /// Parses an identifier. Does not check if the identifier is a reserved keyword.
954    #[track_caller]
955    pub fn parse_ident_any(&mut self) -> PResult<'sess, Ident> {
956        let ident = self.ident_or_err(true)?;
957        self.bump();
958        Ok(ident)
959    }
960
961    /// Parses an optional identifier.
962    #[track_caller]
963    pub fn parse_ident_opt(&mut self) -> PResult<'sess, Option<Ident>> {
964        if self.check_ident() { self.parse_ident().map(Some) } else { Ok(None) }
965    }
966
967    #[track_caller]
968    fn parse_ident_common(&mut self, recover: bool) -> PResult<'sess, Ident> {
969        let ident = self.ident_or_err(recover)?;
970        if ident.is_reserved(self.in_yul) {
971            let err = self.expected_ident_found_err();
972            if recover {
973                err.emit();
974            } else {
975                return Err(err);
976            }
977        }
978        self.bump();
979        Ok(ident)
980    }
981
982    /// Returns Ok if the current token is an identifier. Does not advance the parser.
983    #[track_caller]
984    fn ident_or_err(&mut self, recover: bool) -> PResult<'sess, Ident> {
985        match self.token.ident() {
986            Some(ident) => Ok(ident),
987            None => self.expected_ident_found(recover),
988        }
989    }
990
991    #[track_caller]
992    fn expected_ident_found(&mut self, recover: bool) -> PResult<'sess, Ident> {
993        self.expected_ident_found_other(self.token, recover)
994    }
995
996    #[track_caller]
997    fn expected_ident_found_other(&mut self, token: Token, recover: bool) -> PResult<'sess, Ident> {
998        let msg = format!("expected identifier, found {}", token.full_description());
999        let span = token.span;
1000        let mut err = self.dcx().err(msg).span(span);
1001
1002        let mut recovered_ident = None;
1003
1004        let suggest_remove_comma = token.kind == TokenKind::Comma && self.look_ahead(1).is_ident();
1005        if suggest_remove_comma {
1006            if recover {
1007                self.bump();
1008                recovered_ident = self.ident_or_err(false).ok();
1009            }
1010            err = err.span_help(span, "remove this comma");
1011        }
1012
1013        if recover && let Some(ident) = recovered_ident {
1014            err.emit();
1015            return Ok(ident);
1016        }
1017        Err(err)
1018    }
1019
1020    #[track_caller]
1021    fn expected_ident_found_err(&mut self) -> PErr<'sess> {
1022        self.expected_ident_found(false).unwrap_err()
1023    }
1024}
solar_parse/parser/mod.rs

solar_parse/parser/
mod.rs