leo_parser_rowan/parser/
mod.rs

1// Copyright (C) 2019-2026 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17//! Parser infrastructure for the rowan-based Leo parser.
18//!
19//! This module provides the core parser state and helper methods for building
20//! a rowan syntax tree from a token stream. The parser uses a hand-written
21//! recursive descent approach with support for error recovery.
22
23mod expressions;
24mod grammar;
25mod items;
26mod statements;
27mod types;
28
29use crate::{
30    SyntaxNode,
31    lexer::{LexError, Token},
32    syntax_kind::{SyntaxKind, SyntaxKind::*},
33};
34pub use grammar::{parse_expression_entry, parse_file, parse_module_entry, parse_statement_entry};
35use rowan::{Checkpoint, GreenNode, GreenNodeBuilder, TextRange, TextSize};
36
37// =============================================================================
38// Parse Result
39// =============================================================================
40
41/// An error encountered during parsing.
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct ParseError {
44    /// A description of the error.
45    pub message: String,
46    /// The source range where the error occurred.
47    ///
48    /// For "expected X" errors, this covers the found token so that
49    /// diagnostic carets underline it. For recovery errors, this spans the
50    /// tokens wrapped in the ERROR node.
51    pub range: TextRange,
52    /// The token that was found (for "expected X, found Y" errors).
53    pub found: Option<String>,
54    /// The list of expected tokens (for structured error messages).
55    pub expected: Vec<String>,
56}
57
58/// The result of a parse operation.
59///
60/// A parse result always contains a syntax tree, even if errors occurred.
61/// This is essential for IDE use cases where we need to provide feedback
62/// even on incomplete or invalid code.
63pub struct Parse {
64    /// The green tree (immutable, cheap to clone).
65    green: GreenNode,
66    /// Errors encountered during parsing.
67    errors: Vec<ParseError>,
68    /// Errors encountered during lexing.
69    lex_errors: Vec<LexError>,
70}
71
72impl Parse {
73    /// Get the root syntax node.
74    pub fn syntax(&self) -> SyntaxNode {
75        SyntaxNode::new_root(self.green.clone())
76    }
77
78    /// Get the parse errors.
79    pub fn errors(&self) -> &[ParseError] {
80        &self.errors
81    }
82
83    /// Get the lexer errors.
84    pub fn lex_errors(&self) -> &[LexError] {
85        &self.lex_errors
86    }
87
88    /// Check if the parse was successful (no errors).
89    pub fn is_ok(&self) -> bool {
90        self.errors.is_empty() && self.lex_errors.is_empty()
91    }
92
93    /// Convert to a Result, returning the syntax node on success or errors on failure.
94    pub fn ok(self) -> Result<SyntaxNode, Vec<ParseError>> {
95        if self.errors.is_empty() && self.lex_errors.is_empty() { Ok(self.syntax()) } else { Err(self.errors) }
96    }
97}
98
99// =============================================================================
100// Marker Types
101// =============================================================================
102
103/// A marker for a node that is currently being parsed.
104///
105/// Call `complete()` to finish the node with a specific kind, or `abandon()`
106/// to cancel without creating a node. Dropping a `Marker` without calling
107/// either method will panic in debug builds.
108pub struct Marker {
109    /// The checkpoint position where this node started.
110    checkpoint: Checkpoint,
111    /// Debug bomb to catch forgotten markers.
112    #[cfg(debug_assertions)]
113    completed: bool,
114}
115
116impl Marker {
117    fn new(checkpoint: Checkpoint) -> Self {
118        Self {
119            checkpoint,
120            #[cfg(debug_assertions)]
121            completed: false,
122        }
123    }
124
125    /// Finish this node with the given kind.
126    #[allow(unused_mut)]
127    pub fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker {
128        #[cfg(debug_assertions)]
129        {
130            self.completed = true;
131        }
132        p.builder.start_node_at(self.checkpoint, kind.into());
133        p.builder.finish_node();
134        CompletedMarker { _checkpoint: self.checkpoint, kind }
135    }
136
137    /// Abandon this marker without creating a node.
138    #[allow(unused_mut)] // `mut` needed for debug_assertions cfg
139    pub fn abandon(mut self, _p: &mut Parser) {
140        #[cfg(debug_assertions)]
141        {
142            self.completed = true;
143        }
144        // Nothing to do - we just don't create the node
145    }
146}
147
148#[cfg(debug_assertions)]
149impl Drop for Marker {
150    fn drop(&mut self) {
151        if !self.completed && !std::thread::panicking() {
152            panic!("Marker was dropped without being completed or abandoned");
153        }
154    }
155}
156
157/// A marker for a completed node.
158///
159/// Can be used with `precede()` to wrap the node in an outer node retroactively.
160/// This is essential for left-associative operators in Pratt parsing.
161#[derive(Clone, Copy)]
162pub struct CompletedMarker {
163    /// The checkpoint where this node started (for precede).
164    _checkpoint: Checkpoint,
165    /// The kind of the completed node.
166    kind: SyntaxKind,
167}
168
169impl CompletedMarker {
170    /// Create a new marker that will wrap this completed node.
171    ///
172    /// Used for left-associative operators: parse LHS, see operator, then
173    /// wrap LHS in a binary expression node.
174    pub fn precede(self, _p: &mut Parser) -> Marker {
175        // We need a new checkpoint at the same position as the completed node.
176        // Since GreenNodeBuilder doesn't expose the old checkpoint position,
177        // we use start_node_at with the saved checkpoint.
178        Marker::new(self._checkpoint)
179    }
180
181    /// Get the kind of the completed node.
182    pub fn kind(&self) -> SyntaxKind {
183        self.kind
184    }
185}
186
187// =============================================================================
188// Parser
189// =============================================================================
190
191/// The parser state.
192///
193/// This struct maintains the state needed to build a rowan syntax tree from
194/// a token stream. It provides methods for token inspection, consumption,
195/// and tree building.
196pub struct Parser<'t, 's> {
197    /// The source text being parsed.
198    source: &'s str,
199    /// The token stream (from the lexer).
200    tokens: &'t [Token],
201    /// Current position in the token stream.
202    pos: usize,
203    /// Current byte offset into the source text.
204    byte_offset: usize,
205    /// The green tree builder.
206    builder: GreenNodeBuilder<'static>,
207    /// Accumulated parse errors.
208    errors: Vec<ParseError>,
209    /// Whether the parser is currently in an error state.
210    ///
211    /// When `true`, subsequent errors are suppressed until recovery completes.
212    /// This prevents cascading errors from a single syntactic mistake.
213    erroring: bool,
214}
215
216impl<'t, 's> Parser<'t, 's> {
217    /// Create a new parser for the given source and tokens.
218    pub fn new(source: &'s str, tokens: &'t [Token]) -> Self {
219        Self {
220            source,
221            tokens,
222            pos: 0,
223            byte_offset: 0,
224            builder: GreenNodeBuilder::new(),
225            errors: Vec::new(),
226            erroring: false,
227        }
228    }
229
230    // =========================================================================
231    // Marker-based Node Building
232    // =========================================================================
233
234    /// Start a new node, returning a Marker.
235    ///
236    /// The marker must be completed with `complete()` or abandoned with
237    /// `abandon()`. Dropping it without doing either will panic.
238    pub fn start(&mut self) -> Marker {
239        Marker::new(self.builder.checkpoint())
240    }
241
242    // =========================================================================
243    // Token Inspection
244    // =========================================================================
245
246    /// Get the kind of the current token (or EOF if at end).
247    pub fn current(&self) -> SyntaxKind {
248        self.nth(0)
249    }
250
251    /// Look ahead by `n` tokens and return the kind (skipping trivia).
252    pub fn nth(&self, n: usize) -> SyntaxKind {
253        self.nth_non_trivia(n)
254    }
255
256    /// Look ahead by `n` non-trivia tokens.
257    fn nth_non_trivia(&self, n: usize) -> SyntaxKind {
258        let mut pos = self.pos;
259        let mut count = 0;
260        while pos < self.tokens.len() {
261            let kind = self.tokens[pos].kind;
262            if !kind.is_trivia() {
263                if count == n {
264                    return kind;
265                }
266                count += 1;
267            }
268            pos += 1;
269        }
270        EOF
271    }
272
273    /// Get the kind of the current token including trivia.
274    pub fn current_including_trivia(&self) -> SyntaxKind {
275        self.tokens.get(self.pos).map(|t| t.kind).unwrap_or(EOF)
276    }
277
278    /// Check if the current token matches the given kind.
279    pub fn at(&self, kind: SyntaxKind) -> bool {
280        self.current() == kind
281    }
282
283    /// Check if the current token matches any of the given kinds.
284    pub fn at_any(&self, kinds: &[SyntaxKind]) -> bool {
285        kinds.contains(&self.current())
286    }
287
288    /// Check if we're at the end of the token stream.
289    pub fn at_eof(&self) -> bool {
290        self.at(EOF)
291    }
292
293    /// Get the text of the current token.
294    pub fn current_text(&self) -> &'s str {
295        if self.pos >= self.tokens.len() {
296            return "";
297        }
298        let len = self.tokens[self.pos].len as usize;
299        &self.source[self.byte_offset..self.byte_offset + len]
300    }
301
302    /// Get the `TextRange` covering the current non-trivia token.
303    ///
304    /// Walks past trivia to find the next meaningful token and returns its
305    /// range. At EOF, returns a zero-length range at the current byte offset.
306    fn current_token_range(&self) -> TextRange {
307        let mut pos = self.pos;
308        let mut offset = self.byte_offset;
309        while pos < self.tokens.len() {
310            let token = &self.tokens[pos];
311            if !token.kind.is_trivia() {
312                let start = TextSize::new(offset as u32);
313                let end = TextSize::new(offset as u32 + token.len);
314                return TextRange::new(start, end);
315            }
316            offset += token.len as usize;
317            pos += 1;
318        }
319        let pos = TextSize::new(offset as u32);
320        TextRange::empty(pos)
321    }
322
323    // =========================================================================
324    // Token Consumption
325    // =========================================================================
326
327    /// Consume the current token and add it to the tree.
328    pub fn bump(&mut self) {
329        self.bump_raw();
330    }
331
332    /// Consume the current token without skipping trivia first.
333    fn bump_raw(&mut self) {
334        if self.pos >= self.tokens.len() {
335            return;
336        }
337        let token = &self.tokens[self.pos];
338
339        // Lex error tokens enter error state to suppress cascade parse errors.
340        if token.kind == ERROR {
341            self.erroring = true;
342        }
343
344        let text = &self.source[self.byte_offset..self.byte_offset + token.len as usize];
345        self.builder.token(token.kind.into(), text);
346        self.byte_offset += token.len as usize;
347        self.pos += 1;
348    }
349
350    /// Skip trivia tokens, adding them to the tree.
351    pub fn skip_trivia(&mut self) {
352        while self.current_including_trivia().is_trivia() {
353            self.bump_raw();
354        }
355    }
356
357    /// Consume the current token if it matches the given kind.
358    /// Returns true if consumed.
359    pub fn eat(&mut self, kind: SyntaxKind) -> bool {
360        self.skip_trivia();
361        if self.current_including_trivia() == kind {
362            self.bump_raw();
363            true
364        } else {
365            false
366        }
367    }
368
369    /// Consume any trivia and then the next token, regardless of kind.
370    pub fn bump_any(&mut self) {
371        self.skip_trivia();
372        self.bump_raw();
373    }
374
375    // =========================================================================
376    // Direct Node Building
377    // =========================================================================
378
379    /// Start a new node of the given kind.
380    ///
381    /// Used by error recovery. For general parsing, prefer `start()` and
382    /// `Marker::complete()`.
383    pub fn start_node(&mut self, kind: SyntaxKind) {
384        self.builder.start_node(kind.into());
385    }
386
387    /// Finish the current node.
388    pub fn finish_node(&mut self) {
389        self.builder.finish_node();
390    }
391
392    /// Create a checkpoint for later wrapping.
393    pub fn checkpoint(&self) -> Checkpoint {
394        self.builder.checkpoint()
395    }
396
397    /// Start a node at a previous checkpoint.
398    pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
399        self.builder.start_node_at(checkpoint, kind.into());
400    }
401
402    // =========================================================================
403    // Error Handling
404    // =========================================================================
405
406    /// Expect a token of the given kind.
407    ///
408    /// If the current token matches, it is consumed. Otherwise, an error
409    /// is recorded but no token is consumed.
410    pub fn expect(&mut self, kind: SyntaxKind) -> bool {
411        if self.eat(kind) {
412            true
413        } else {
414            if !self.erroring {
415                let found_text = self.current_text().to_string();
416                let expected_name = kind.user_friendly_name();
417                self.errors.push(ParseError {
418                    message: format!("expected {}", expected_name),
419                    range: self.current_token_range(),
420                    found: Some(found_text),
421                    expected: vec![expected_name.to_string()],
422                });
423                self.erroring = true;
424            }
425            false
426        }
427    }
428
429    /// Record a parse error at the current position.
430    pub fn error(&mut self, message: impl std::fmt::Display) {
431        if self.erroring {
432            return;
433        }
434        let found_text = if !self.at_eof() { Some(self.current_text().to_string()) } else { None };
435        self.errors.push(ParseError {
436            message: message.to_string(),
437            range: self.current_token_range(),
438            found: found_text,
439            expected: vec![],
440        });
441        self.erroring = true;
442    }
443
444    /// Record an "unexpected token" error with explicit expected tokens.
445    ///
446    /// This sets both `found` and `expected` fields, allowing rowan.rs to emit
447    /// a ParserError::unexpected instead of ParserError::custom.
448    /// The `found_kind` parameter should be the SyntaxKind of the unexpected token.
449    pub fn error_unexpected(&mut self, found_kind: SyntaxKind, expected: &[&str]) {
450        if self.erroring {
451            return;
452        }
453        let range = self.current_token_range();
454        // For 'found', use the actual token text for identifiers and numbers
455        // (since "an identifier" is less helpful than showing the actual name).
456        // For other tokens, use user_friendly_name with quotes stripped.
457        let found_text = if matches!(found_kind, IDENT | INTEGER) {
458            let start = u32::from(range.start()) as usize;
459            let end = u32::from(range.end()) as usize;
460            if start < end && end <= self.source.len() {
461                self.source[start..end].to_string()
462            } else {
463                found_kind.user_friendly_name().trim_matches('\'').to_string()
464            }
465        } else {
466            found_kind.user_friendly_name().trim_matches('\'').to_string()
467        };
468        self.errors.push(ParseError {
469            message: format!("expected {}", expected.join(", ")),
470            range,
471            found: Some(found_text),
472            expected: expected.iter().map(|s| s.to_string()).collect(),
473        });
474        self.erroring = true;
475    }
476
477    /// Record a parse error spanning from `start` to the current position.
478    fn error_span(&mut self, message: impl std::fmt::Display, start: usize) {
479        if self.erroring {
480            return;
481        }
482        self.errors.push(ParseError {
483            message: message.to_string(),
484            range: TextRange::new(TextSize::new(start as u32), TextSize::new(self.byte_offset as u32)),
485            found: None,
486            expected: vec![],
487        });
488        self.erroring = true;
489    }
490
491    /// Wrap unexpected tokens in an ERROR node until we reach a recovery point.
492    ///
493    /// This consumes tokens until we see one of the recovery tokens or EOF.
494    /// If already at a recovery token, consumes it to ensure progress is made.
495    /// The error range spans the tokens wrapped in the ERROR node.
496    pub fn error_recover(&mut self, message: &str, recovery: &[SyntaxKind]) {
497        let start = self.byte_offset;
498        self.start_node(ERROR);
499        self.skip_to_recovery(recovery);
500        self.finish_node();
501
502        // Record error with the span of consumed tokens
503        self.error_span(message, start);
504
505        // Recovery complete — allow new errors for the next construct.
506        self.erroring = false;
507    }
508
509    /// Skip tokens until a recovery point, wrapping them in an ERROR node.
510    /// Unlike `error_recover`, this does not emit an error message (useful when
511    /// the caller has already reported the error).
512    ///
513    /// Balanced brace pairs (`{ ... }`) encountered during recovery are
514    /// consumed as a unit so that we don't stop at an inner `}` that belongs
515    /// to a nested block.
516    pub fn recover(&mut self, recovery: &[SyntaxKind]) {
517        self.start_node(ERROR);
518        self.skip_to_recovery(recovery);
519        self.finish_node();
520
521        // Recovery complete — allow new errors for the next construct.
522        self.erroring = false;
523    }
524
525    /// Skip tokens until we reach a recovery point or EOF.
526    ///
527    /// If already at a recovery token (other than `}`), consumes it to ensure
528    /// progress. Balanced brace pairs are consumed as a unit so that we don't
529    /// stop at an inner `}` that belongs to a nested block.
530    fn skip_to_recovery(&mut self, recovery: &[SyntaxKind]) {
531        if self.at_any(recovery) && !self.at(R_BRACE) && !self.at_eof() {
532            self.bump_any();
533        }
534        let mut brace_depth: u32 = 0;
535        while !self.at_eof() {
536            match self.current() {
537                L_BRACE => brace_depth += 1,
538                R_BRACE if brace_depth > 0 => brace_depth -= 1,
539                kind if brace_depth == 0 && recovery.contains(&kind) => break,
540                _ => {}
541            }
542            self.bump_any();
543        }
544    }
545
546    /// Create an ERROR node containing the current token.
547    /// The error range spans the single consumed token.
548    pub fn error_and_bump(&mut self, message: &str) {
549        let start = self.byte_offset;
550        self.start_node(ERROR);
551        self.bump_any();
552        self.finish_node();
553        self.error_span(message, start);
554
555        // Consumed the bad token — allow new errors for the next construct.
556        self.erroring = false;
557    }
558
559    // =========================================================================
560    // Completion
561    // =========================================================================
562
563    /// Get the current number of accumulated parse errors.
564    pub fn error_count(&self) -> usize {
565        self.errors.len()
566    }
567
568    /// Finish parsing and return the parse result.
569    pub fn finish(self, lex_errors: Vec<LexError>) -> Parse {
570        Parse { green: self.builder.finish(), errors: self.errors, lex_errors }
571    }
572}
573
574// =============================================================================
575// Recovery Token Sets
576// =============================================================================
577
578/// Tokens that can start a statement (for recovery).
579pub(crate) const STMT_RECOVERY: &[SyntaxKind] =
580    &[KW_LET, KW_CONST, KW_RETURN, KW_IF, KW_FOR, KW_ASSERT, KW_ASSERT_EQ, KW_ASSERT_NEQ, L_BRACE, R_BRACE, SEMICOLON];
581
582/// Tokens that can start a top-level item (for recovery).
583pub(crate) const ITEM_RECOVERY: &[SyntaxKind] =
584    &[KW_IMPORT, KW_PROGRAM, KW_FN, KW_STRUCT, KW_RECORD, KW_MAPPING, KW_STORAGE, KW_CONST, KW_FINAL, AT, R_BRACE];
585
586/// Tokens that indicate we should stop expression recovery.
587pub(crate) const EXPR_RECOVERY: &[SyntaxKind] = &[
588    SEMICOLON,
589    COMMA,
590    R_PAREN,
591    R_BRACKET,
592    R_BRACE,
593    KW_LET,
594    KW_CONST,
595    KW_RETURN,
596    KW_IF,
597    KW_FOR,
598    KW_ASSERT,
599    KW_ASSERT_EQ,
600    KW_ASSERT_NEQ,
601];
602
603/// Tokens for recovering inside type parsing.
604pub(crate) const TYPE_RECOVERY: &[SyntaxKind] =
605    &[COMMA, GT, R_BRACKET, R_PAREN, L_BRACE, R_BRACE, EQ, SEMICOLON, ARROW];
606
607/// Tokens for recovering inside parameter lists.
608pub(crate) const PARAM_RECOVERY: &[SyntaxKind] = &[COMMA, R_PAREN, L_BRACE, ARROW];
609
610#[cfg(test)]
611mod tests {
612    use super::*;
613    use crate::lexer::lex;
614    use expect_test::{Expect, expect};
615
616    /// Helper to parse source and format the tree for snapshot testing.
617    fn check_parse(input: &str, parse_fn: impl FnOnce(&mut Parser), expect: Expect) {
618        let (tokens, _) = lex(input);
619        let mut parser = Parser::new(input, &tokens);
620        parse_fn(&mut parser);
621        let parse = parser.finish(vec![]);
622        let output = format!("{:#?}", parse.syntax());
623        expect.assert_eq(&output);
624    }
625
626    /// Helper to check parse errors.
627    fn check_parse_errors(input: &str, parse_fn: impl FnOnce(&mut Parser), expect: Expect) {
628        let (tokens, _) = lex(input);
629        let mut parser = Parser::new(input, &tokens);
630        parse_fn(&mut parser);
631        let parse = parser.finish(vec![]);
632        let output = parse
633            .errors()
634            .iter()
635            .map(|e| format!("{}..{}:{}", u32::from(e.range.start()), u32::from(e.range.end()), e.message))
636            .collect::<Vec<_>>()
637            .join("\n");
638        expect.assert_eq(&output);
639    }
640
641    // =========================================================================
642    // Legacy tests (using start_node/finish_node)
643    // =========================================================================
644
645    #[test]
646    fn parse_empty() {
647        check_parse(
648            "",
649            |p| {
650                p.start_node(ROOT);
651                p.finish_node();
652            },
653            expect![[r#"
654            ROOT@0..0
655        "#]],
656        );
657    }
658
659    #[test]
660    fn parse_whitespace_only() {
661        check_parse(
662            "   ",
663            |p| {
664                p.start_node(ROOT);
665                p.skip_trivia();
666                p.finish_node();
667            },
668            expect![[r#"
669            ROOT@0..3
670              WHITESPACE@0..3 "   "
671        "#]],
672        );
673    }
674
675    #[test]
676    fn parse_single_token() {
677        check_parse(
678            "foo",
679            |p| {
680                p.start_node(ROOT);
681                p.skip_trivia();
682                p.bump();
683                p.finish_node();
684            },
685            expect![[r#"
686            ROOT@0..3
687              IDENT@0..3 "foo"
688        "#]],
689        );
690    }
691
692    #[test]
693    fn parse_token_with_trivia() {
694        check_parse(
695            "  foo  ",
696            |p| {
697                p.start_node(ROOT);
698                p.skip_trivia();
699                p.bump();
700                p.skip_trivia();
701                p.finish_node();
702            },
703            expect![[r#"
704            ROOT@0..7
705              WHITESPACE@0..2 "  "
706              IDENT@2..5 "foo"
707              WHITESPACE@5..7 "  "
708        "#]],
709        );
710    }
711
712    #[test]
713    fn parse_nested_nodes() {
714        check_parse(
715            "(a)",
716            |p| {
717                p.start_node(ROOT);
718                p.start_node(PAREN_EXPR);
719                p.eat(L_PAREN);
720                p.skip_trivia();
721                p.start_node(PATH_EXPR);
722                p.bump();
723                p.finish_node();
724                p.eat(R_PAREN);
725                p.finish_node();
726                p.finish_node();
727            },
728            expect![[r#"
729            ROOT@0..3
730              PAREN_EXPR@0..3
731                L_PAREN@0..1 "("
732                PATH_EXPR@1..2
733                  IDENT@1..2 "a"
734                R_PAREN@2..3 ")"
735        "#]],
736        );
737    }
738
739    #[test]
740    fn parse_with_checkpoint() {
741        check_parse(
742            "1 + 2",
743            |p| {
744                p.start_node(ROOT);
745                p.skip_trivia();
746                let checkpoint = p.checkpoint();
747                p.start_node(LITERAL_INT);
748                p.bump();
749                p.finish_node();
750                p.skip_trivia();
751                p.start_node_at(checkpoint, BINARY_EXPR);
752                p.bump();
753                p.skip_trivia();
754                p.start_node(LITERAL_INT);
755                p.bump();
756                p.finish_node();
757                p.finish_node();
758                p.finish_node();
759            },
760            expect![[r#"
761                ROOT@0..5
762                  BINARY_EXPR@0..5
763                    LITERAL_INT@0..1
764                      INTEGER@0..1 "1"
765                    WHITESPACE@1..2 " "
766                    PLUS@2..3 "+"
767                    WHITESPACE@3..4 " "
768                    LITERAL_INT@4..5
769                      INTEGER@4..5 "2"
770            "#]],
771        );
772    }
773
774    #[test]
775    fn parse_expect_success() {
776        check_parse(
777            ";",
778            |p| {
779                p.start_node(ROOT);
780                p.expect(SEMICOLON);
781                p.finish_node();
782            },
783            expect![[r#"
784            ROOT@0..1
785              SEMICOLON@0..1 ";"
786        "#]],
787        );
788    }
789
790    #[test]
791    fn parse_expect_failure() {
792        check_parse_errors(
793            "foo",
794            |p| {
795                p.start_node(ROOT);
796                p.expect(SEMICOLON);
797                p.finish_node();
798            },
799            expect![[r#"0..3:expected ';'"#]],
800        );
801    }
802
803    #[test]
804    fn parse_error_recover() {
805        check_parse(
806            "garbage ; ok",
807            |p| {
808                p.start_node(ROOT);
809                p.error_recover("unexpected tokens", &[SEMICOLON]);
810                p.eat(SEMICOLON);
811                p.skip_trivia();
812                p.bump();
813                p.finish_node();
814            },
815            expect![[r#"
816            ROOT@0..12
817              ERROR@0..7
818                IDENT@0..7 "garbage"
819              WHITESPACE@7..8 " "
820              SEMICOLON@8..9 ";"
821              WHITESPACE@9..10 " "
822              IDENT@10..12 "ok"
823        "#]],
824        );
825    }
826
827    #[test]
828    fn parse_error_and_bump() {
829        check_parse(
830            "$",
831            |p| {
832                p.start_node(ROOT);
833                p.skip_trivia();
834                p.error_and_bump("unexpected token");
835                p.finish_node();
836            },
837            expect![[r#"
838            ROOT@0..1
839              ERROR@0..1
840                ERROR@0..1 "$"
841        "#]],
842        );
843    }
844
845    #[test]
846    fn parse_at_and_eat() {
847        check_parse(
848            "let x",
849            |p| {
850                p.start_node(ROOT);
851                assert!(p.at(KW_LET));
852                assert!(!p.at(KW_CONST));
853                p.eat(KW_LET);
854                assert!(p.at(IDENT));
855                p.skip_trivia();
856                p.bump();
857                p.finish_node();
858            },
859            expect![[r#"
860            ROOT@0..5
861              KW_LET@0..3 "let"
862              WHITESPACE@3..4 " "
863              IDENT@4..5 "x"
864        "#]],
865        );
866    }
867
868    #[test]
869    fn parse_nth_lookahead() {
870        check_parse(
871            "a + b * c",
872            |p| {
873                p.start_node(ROOT);
874                assert_eq!(p.nth(0), IDENT);
875                assert_eq!(p.nth(1), PLUS);
876                assert_eq!(p.nth(2), IDENT);
877                assert_eq!(p.nth(3), STAR);
878                assert_eq!(p.nth(4), IDENT);
879                assert_eq!(p.nth(5), EOF);
880                while !p.at_eof() {
881                    p.bump_any();
882                }
883                p.finish_node();
884            },
885            expect![[r#"
886            ROOT@0..9
887              IDENT@0..1 "a"
888              WHITESPACE@1..2 " "
889              PLUS@2..3 "+"
890              WHITESPACE@3..4 " "
891              IDENT@4..5 "b"
892              WHITESPACE@5..6 " "
893              STAR@6..7 "*"
894              WHITESPACE@7..8 " "
895              IDENT@8..9 "c"
896        "#]],
897        );
898    }
899
900    // =========================================================================
901    // Marker tests
902    // =========================================================================
903
904    #[test]
905    fn marker_complete() {
906        check_parse(
907            "foo",
908            |p| {
909                let m = p.start();
910                p.skip_trivia();
911                p.bump();
912                m.complete(p, ROOT);
913            },
914            expect![[r#"
915            ROOT@0..3
916              IDENT@0..3 "foo"
917        "#]],
918        );
919    }
920
921    #[test]
922    fn marker_precede() {
923        // Test precede for binary expression: parse "1 + 2"
924        check_parse(
925            "1 + 2",
926            |p| {
927                let root = p.start();
928                p.skip_trivia();
929
930                // Parse LHS
931                let lhs_m = p.start();
932                p.bump(); // 1
933                let lhs = lhs_m.complete(p, LITERAL_INT);
934
935                p.skip_trivia();
936
937                // See operator - wrap LHS in binary expr
938                let bin_m = lhs.precede(p);
939                p.bump(); // +
940
941                p.skip_trivia();
942
943                // Parse RHS
944                let rhs_m = p.start();
945                p.bump(); // 2
946                rhs_m.complete(p, LITERAL_INT);
947
948                bin_m.complete(p, BINARY_EXPR);
949                root.complete(p, ROOT);
950            },
951            expect![[r#"
952                ROOT@0..5
953                  BINARY_EXPR@0..5
954                    LITERAL_INT@0..1
955                      INTEGER@0..1 "1"
956                    WHITESPACE@1..2 " "
957                    PLUS@2..3 "+"
958                    WHITESPACE@3..4 " "
959                    LITERAL_INT@4..5
960                      INTEGER@4..5 "2"
961            "#]],
962        );
963    }
964
965    #[test]
966    fn marker_nested() {
967        check_parse(
968            "(a)",
969            |p| {
970                let root = p.start();
971                let paren = p.start();
972                p.eat(L_PAREN);
973                p.skip_trivia();
974                let name = p.start();
975                p.bump();
976                name.complete(p, PATH_EXPR);
977                p.eat(R_PAREN);
978                paren.complete(p, PAREN_EXPR);
979                root.complete(p, ROOT);
980            },
981            expect![[r#"
982            ROOT@0..3
983              PAREN_EXPR@0..3
984                L_PAREN@0..1 "("
985                PATH_EXPR@1..2
986                  IDENT@1..2 "a"
987                R_PAREN@2..3 ")"
988        "#]],
989        );
990    }
991
992    // =========================================================================
993    // Error Recovery Tests
994    // =========================================================================
995    //
996    // These tests verify that the parser can recover from various error
997    // conditions and continue parsing. The key invariants are:
998    // 1. Parser never panics
999    // 2. ERROR nodes contain malformed content
1000    // 3. Valid portions are parsed correctly
1001    // 4. Reasonable error messages are generated
1002
1003    /// Helper to parse a file and return both tree and errors.
1004    fn parse_file(input: &str) -> Parse {
1005        let (tokens, _) = lex(input);
1006        let mut parser = Parser::new(input, &tokens);
1007        let root = parser.start();
1008        parser.parse_file_items();
1009        root.complete(&mut parser, ROOT);
1010        parser.finish(vec![])
1011    }
1012
1013    /// Helper to parse a statement and return both tree and errors.
1014    fn parse_stmt_result(input: &str) -> Parse {
1015        let (tokens, _) = lex(input);
1016        let mut parser = Parser::new(input, &tokens);
1017        let root = parser.start();
1018        parser.parse_stmt();
1019        parser.skip_trivia();
1020        root.complete(&mut parser, ROOT);
1021        parser.finish(vec![])
1022    }
1023
1024    #[test]
1025    fn recover_missing_semicolon_let() {
1026        let parse = parse_stmt_result("let x = 1");
1027        // Parser should complete but report an error
1028        assert!(!parse.errors().is_empty(), "should have errors");
1029        // Tree should still have a LET_STMT
1030        let tree = format!("{:#?}", parse.syntax());
1031        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1032    }
1033
1034    #[test]
1035    fn recover_missing_semicolon_return() {
1036        let parse = parse_stmt_result("return 42");
1037        assert!(!parse.errors().is_empty(), "should have errors");
1038        let tree = format!("{:#?}", parse.syntax());
1039        assert!(tree.contains("RETURN_STMT"), "tree should have RETURN_STMT");
1040    }
1041
1042    #[test]
1043    fn recover_missing_expr_in_let() {
1044        let parse = parse_stmt_result("let x = ;");
1045        assert!(!parse.errors().is_empty(), "should have errors");
1046        let tree = format!("{:#?}", parse.syntax());
1047        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1048    }
1049
1050    #[test]
1051    fn recover_missing_type_in_let() {
1052        let parse = parse_stmt_result("let x: = 1;");
1053        assert!(!parse.errors().is_empty(), "should have errors");
1054        let tree = format!("{:#?}", parse.syntax());
1055        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1056    }
1057
1058    #[test]
1059    fn recover_missing_condition_in_if() {
1060        // When the condition is missing and we have `if { }`, the `{` is parsed
1061        // as an empty block expression in the condition position. The parser
1062        // should still recover and produce an IF_STMT.
1063        let parse = parse_stmt_result("if { }");
1064        // May or may not have errors depending on interpretation
1065        let tree = format!("{:#?}", parse.syntax());
1066        assert!(tree.contains("IF_STMT"), "tree should have IF_STMT");
1067    }
1068
1069    #[test]
1070    fn recover_missing_block_in_if() {
1071        let parse = parse_stmt_result("if x");
1072        assert!(!parse.errors().is_empty(), "should have errors");
1073        let tree = format!("{:#?}", parse.syntax());
1074        assert!(tree.contains("IF_STMT"), "tree should have IF_STMT");
1075    }
1076
1077    #[test]
1078    fn recover_missing_range_in_for() {
1079        let parse = parse_stmt_result("for i in { }");
1080        assert!(!parse.errors().is_empty(), "should have errors");
1081        let tree = format!("{:#?}", parse.syntax());
1082        assert!(tree.contains("FOR_STMT"), "tree should have FOR_STMT");
1083    }
1084
1085    #[test]
1086    fn recover_missing_operand_binary() {
1087        let parse = parse_stmt_result("let x = 1 + ;");
1088        assert!(!parse.errors().is_empty(), "should have errors");
1089        let tree = format!("{:#?}", parse.syntax());
1090        assert!(tree.contains("BINARY_EXPR"), "tree should have BINARY_EXPR");
1091    }
1092
1093    #[test]
1094    fn recover_missing_operand_unary() {
1095        let parse = parse_stmt_result("let x = -;");
1096        assert!(!parse.errors().is_empty(), "should have errors");
1097        let tree = format!("{:#?}", parse.syntax());
1098        assert!(tree.contains("UNARY_EXPR"), "tree should have UNARY_EXPR");
1099    }
1100
1101    #[test]
1102    fn recover_unclosed_paren() {
1103        let parse = parse_stmt_result("let x = (1 + 2;");
1104        assert!(!parse.errors().is_empty(), "should have errors");
1105        let tree = format!("{:#?}", parse.syntax());
1106        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1107    }
1108
1109    #[test]
1110    fn recover_unclosed_bracket() {
1111        let parse = parse_stmt_result("let x = [1, 2;");
1112        assert!(!parse.errors().is_empty(), "should have errors");
1113        let tree = format!("{:#?}", parse.syntax());
1114        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1115    }
1116
1117    #[test]
1118    fn recover_invalid_token_in_expr() {
1119        let parse = parse_stmt_result("let x = 1 @ 2;");
1120        assert!(!parse.errors().is_empty(), "should have errors");
1121        let tree = format!("{:#?}", parse.syntax());
1122        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1123    }
1124
1125    #[test]
1126    fn recover_malformed_assert() {
1127        let parse = parse_stmt_result("assert();");
1128        assert!(!parse.errors().is_empty(), "should have errors");
1129        let tree = format!("{:#?}", parse.syntax());
1130        assert!(tree.contains("ASSERT_STMT"), "tree should have ASSERT_STMT");
1131    }
1132
1133    #[test]
1134    fn recover_malformed_assert_eq() {
1135        let parse = parse_stmt_result("assert_eq(1);");
1136        assert!(!parse.errors().is_empty(), "should have errors");
1137        let tree = format!("{:#?}", parse.syntax());
1138        assert!(tree.contains("ASSERT_EQ_STMT"), "tree should have ASSERT_EQ_STMT");
1139    }
1140
1141    #[test]
1142    fn recover_valid_statement_after_malformed_assert_eq_in_block() {
1143        let parse = parse_file(
1144            r#"program test.aleo {
1145                fn main(a: bool) -> bool {
1146                    assert_eq(a == 1u8);
1147                    assert(1u8);
1148                    return a;
1149                }
1150            }"#,
1151        );
1152        assert!(!parse.errors().is_empty(), "should have errors");
1153        let tree = format!("{:#?}", parse.syntax());
1154        assert!(tree.contains("ASSERT_EQ_STMT"), "tree should have ASSERT_EQ_STMT");
1155        assert_eq!(tree.matches("ASSERT_STMT@").count(), 1, "should preserve the valid assert statement");
1156        assert!(tree.contains("RETURN_STMT"), "tree should preserve the following return statement");
1157    }
1158
1159    #[test]
1160    fn recover_trailing_tokens_inside_expression_statement() {
1161        let parse = parse_stmt_result("final finalize_foo(a);");
1162        assert!(!parse.errors().is_empty(), "should have errors");
1163        let tree = format!("{:#?}", parse.syntax());
1164        assert!(tree.contains("EXPR_STMT"), "tree should have EXPR_STMT");
1165        assert!(tree.contains("ERROR"), "tree should capture trailing junk in an ERROR node");
1166        assert!(tree.contains("\"finalize_foo\""), "tree should preserve the trailing call tokens");
1167    }
1168
1169    #[test]
1170    fn recover_missing_assignment_rhs() {
1171        let parse = parse_stmt_result("x = ;");
1172        assert!(!parse.errors().is_empty(), "should have errors");
1173        let tree = format!("{:#?}", parse.syntax());
1174        assert!(tree.contains("ASSIGN_STMT"), "tree should have ASSIGN_STMT");
1175    }
1176
1177    #[test]
1178    fn recover_malformed_function_missing_params() {
1179        let parse = parse_file("program test.aleo { fn foo { } }");
1180        assert!(!parse.errors().is_empty(), "should have errors");
1181        let tree = format!("{:#?}", parse.syntax());
1182        assert!(tree.contains("PROGRAM_DECL"), "tree should have PROGRAM_DECL");
1183        assert!(tree.contains("FUNCTION_DEF"), "tree should have FUNCTION_DEF");
1184    }
1185
1186    #[test]
1187    fn recover_malformed_function_missing_body() {
1188        let parse = parse_file("program test.aleo { fn foo() }");
1189        assert!(!parse.errors().is_empty(), "should have errors");
1190        let tree = format!("{:#?}", parse.syntax());
1191        assert!(tree.contains("PROGRAM_DECL"), "tree should have PROGRAM_DECL");
1192        assert!(tree.contains("FUNCTION_DEF"), "tree should have FUNCTION_DEF");
1193    }
1194
1195    #[test]
1196    fn recover_malformed_struct_field() {
1197        let parse = parse_file("program test.aleo { struct Foo { x, y: u32 } }");
1198        assert!(!parse.errors().is_empty(), "should have errors");
1199        let tree = format!("{:#?}", parse.syntax());
1200        assert!(tree.contains("STRUCT_DEF"), "tree should have STRUCT_DEF");
1201    }
1202
1203    #[test]
1204    fn recover_malformed_mapping() {
1205        let parse = parse_file("program test.aleo { mapping balances: => u64; }");
1206        assert!(!parse.errors().is_empty(), "should have errors");
1207        let tree = format!("{:#?}", parse.syntax());
1208        assert!(tree.contains("MAPPING_DEF"), "tree should have MAPPING_DEF");
1209    }
1210
1211    #[test]
1212    fn recover_multiple_errors_in_function() {
1213        let parse = parse_file(
1214            r#"program test.aleo {
1215                fn foo() {
1216                    let x = ;
1217                    let y: = 1;
1218                    return x +;
1219                }
1220            }"#,
1221        );
1222        // Should have multiple errors but still parse the structure
1223        assert!(parse.errors().len() >= 3, "should have at least 3 errors");
1224        let tree = format!("{:#?}", parse.syntax());
1225        assert!(tree.contains("FUNCTION_DEF"), "tree should have FUNCTION_DEF");
1226        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1227        assert!(tree.contains("RETURN_STMT"), "tree should have RETURN_STMT");
1228    }
1229
1230    #[test]
1231    fn recover_valid_items_after_error() {
1232        let parse = parse_file(
1233            r#"program test.aleo {
1234                struct Invalid { x }
1235                struct Valid { y: u32 }
1236            }"#,
1237        );
1238        // Should have errors but parse both structs
1239        assert!(!parse.errors().is_empty(), "should have errors");
1240        let tree = format!("{:#?}", parse.syntax());
1241        // Both structs should be present
1242        let struct_count = tree.matches("STRUCT_DEF").count();
1243        assert_eq!(struct_count, 2, "should have 2 struct definitions");
1244    }
1245
1246    #[test]
1247    fn recover_empty_tuple_pattern() {
1248        let parse = parse_stmt_result("let () = foo;");
1249        // Empty tuple pattern should work
1250        let tree = format!("{:#?}", parse.syntax());
1251        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1252        assert!(tree.contains("TUPLE_PATTERN"), "tree should have TUPLE_PATTERN");
1253    }
1254
1255    #[test]
1256    fn recover_nested_errors() {
1257        // Errors within nested expressions
1258        let parse = parse_stmt_result("let x = ((1 + ) + 2);");
1259        assert!(!parse.errors().is_empty(), "should have errors");
1260        let tree = format!("{:#?}", parse.syntax());
1261        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1262        assert!(tree.contains("PAREN_EXPR"), "tree should have PAREN_EXPR");
1263    }
1264
1265    #[test]
1266    fn recover_ternary_missing_then() {
1267        let parse = parse_stmt_result("let x = cond ? : 2;");
1268        assert!(!parse.errors().is_empty(), "should have errors");
1269        let tree = format!("{:#?}", parse.syntax());
1270        assert!(tree.contains("TERNARY_EXPR"), "tree should have TERNARY_EXPR");
1271    }
1272
1273    #[test]
1274    fn recover_ternary_missing_else() {
1275        let parse = parse_stmt_result("let x = cond ? 1 :;");
1276        assert!(!parse.errors().is_empty(), "should have errors");
1277        let tree = format!("{:#?}", parse.syntax());
1278        assert!(tree.contains("TERNARY_EXPR"), "tree should have TERNARY_EXPR");
1279    }
1280
1281    #[test]
1282    fn never_panics_on_garbage() {
1283        // Feed various garbage inputs - parser should never panic
1284        let garbage_inputs = [
1285            "@#$%^&*()",
1286            "{{{{{{",
1287            "}}}}}}",
1288            "let let let",
1289            "program { program { program",
1290            "struct struct struct",
1291            ";;;;;;",
1292            "++ -- ** //",
1293            "",
1294            "   \n\t\r  ",
1295            "🦀🦀🦀", // Unicode
1296        ];
1297
1298        for input in garbage_inputs {
1299            // This should never panic
1300            let _ = parse_file(input);
1301        }
1302    }
1303}
leo_parser_rowan/parser/mod.rs

leo_parser_rowan/parser/
mod.rs