leo_parser_rowan/parser/
mod.rs

1// Copyright (C) 2019-2026 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17//! Parser infrastructure for the rowan-based Leo parser.
18//!
19//! This module provides the core parser state and helper methods for building
20//! a rowan syntax tree from a token stream. The parser uses a hand-written
21//! recursive descent approach with support for error recovery.
22
23mod expressions;
24mod grammar;
25mod items;
26mod statements;
27mod types;
28
29use crate::{
30    SyntaxNode,
31    lexer::Token,
32    syntax_kind::{SyntaxKind, SyntaxKind::*},
33};
34pub use grammar::{parse_expression_entry, parse_file, parse_module_entry, parse_statement_entry};
35use rowan::{Checkpoint, GreenNode, GreenNodeBuilder, TextRange, TextSize};
36
37// =============================================================================
38// Parse Result
39// =============================================================================
40
41/// An error encountered during parsing.
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct ParseError {
44    /// A description of the error.
45    pub message: String,
46    /// The source range where the error occurred.
47    ///
48    /// For "expected X" errors, this is typically a zero-length range at the
49    /// error position. For recovery errors, this spans the tokens wrapped in
50    /// the ERROR node.
51    pub range: TextRange,
52}
53
54/// The result of a parse operation.
55///
56/// A parse result always contains a syntax tree, even if errors occurred.
57/// This is essential for IDE use cases where we need to provide feedback
58/// even on incomplete or invalid code.
59pub struct Parse {
60    /// The green tree (immutable, cheap to clone).
61    green: GreenNode,
62    /// Errors encountered during parsing.
63    errors: Vec<ParseError>,
64}
65
66impl Parse {
67    /// Get the root syntax node.
68    pub fn syntax(&self) -> SyntaxNode {
69        SyntaxNode::new_root(self.green.clone())
70    }
71
72    /// Get the parse errors.
73    pub fn errors(&self) -> &[ParseError] {
74        &self.errors
75    }
76
77    /// Check if the parse was successful (no errors).
78    pub fn is_ok(&self) -> bool {
79        self.errors.is_empty()
80    }
81
82    /// Convert to a Result, returning the syntax node on success or errors on failure.
83    pub fn ok(self) -> Result<SyntaxNode, Vec<ParseError>> {
84        if self.errors.is_empty() { Ok(self.syntax()) } else { Err(self.errors) }
85    }
86}
87
88// =============================================================================
89// Marker Types
90// =============================================================================
91
92/// A marker for a node that is currently being parsed.
93///
94/// Call `complete()` to finish the node with a specific kind, or `abandon()`
95/// to cancel without creating a node. Dropping a `Marker` without calling
96/// either method will panic in debug builds.
97pub struct Marker {
98    /// The checkpoint position where this node started.
99    checkpoint: Checkpoint,
100    /// Debug bomb to catch forgotten markers.
101    #[cfg(debug_assertions)]
102    completed: bool,
103}
104
105impl Marker {
106    fn new(checkpoint: Checkpoint) -> Self {
107        Self {
108            checkpoint,
109            #[cfg(debug_assertions)]
110            completed: false,
111        }
112    }
113
114    /// Finish this node with the given kind.
115    #[allow(unused_mut)]
116    pub fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker {
117        #[cfg(debug_assertions)]
118        {
119            self.completed = true;
120        }
121        p.builder.start_node_at(self.checkpoint, kind.into());
122        p.builder.finish_node();
123        CompletedMarker { _checkpoint: self.checkpoint, kind }
124    }
125
126    /// Abandon this marker without creating a node.
127    #[allow(unused_mut)] // `mut` needed for debug_assertions cfg
128    pub fn abandon(mut self, _p: &mut Parser) {
129        #[cfg(debug_assertions)]
130        {
131            self.completed = true;
132        }
133        // Nothing to do - we just don't create the node
134    }
135}
136
137#[cfg(debug_assertions)]
138impl Drop for Marker {
139    fn drop(&mut self) {
140        if !self.completed && !std::thread::panicking() {
141            panic!("Marker was dropped without being completed or abandoned");
142        }
143    }
144}
145
146/// A marker for a completed node.
147///
148/// Can be used with `precede()` to wrap the node in an outer node retroactively.
149/// This is essential for left-associative operators in Pratt parsing.
150#[derive(Clone, Copy)]
151pub struct CompletedMarker {
152    /// The checkpoint where this node started (for precede).
153    _checkpoint: Checkpoint,
154    /// The kind of the completed node.
155    kind: SyntaxKind,
156}
157
158impl CompletedMarker {
159    /// Create a new marker that will wrap this completed node.
160    ///
161    /// Used for left-associative operators: parse LHS, see operator, then
162    /// wrap LHS in a binary expression node.
163    pub fn precede(self, _p: &mut Parser) -> Marker {
164        // We need a new checkpoint at the same position as the completed node.
165        // Since GreenNodeBuilder doesn't expose the old checkpoint position,
166        // we use start_node_at with the saved checkpoint.
167        Marker::new(self._checkpoint)
168    }
169
170    /// Get the kind of the completed node.
171    #[allow(unused)]
172    pub fn kind(&self) -> SyntaxKind {
173        self.kind
174    }
175}
176
177// =============================================================================
178// Parser
179// =============================================================================
180
181/// The parser state.
182///
183/// This struct maintains the state needed to build a rowan syntax tree from
184/// a token stream. It provides methods for token inspection, consumption,
185/// and tree building.
186pub struct Parser<'t, 's> {
187    /// The source text being parsed.
188    source: &'s str,
189    /// The token stream (from the lexer).
190    tokens: &'t [Token],
191    /// Current position in the token stream.
192    pos: usize,
193    /// Current byte offset into the source text.
194    byte_offset: usize,
195    /// The green tree builder.
196    builder: GreenNodeBuilder<'static>,
197    /// Accumulated parse errors.
198    errors: Vec<ParseError>,
199    /// Nesting depth of parentheses (for delimiter recovery).
200    paren_depth: u32,
201    /// Nesting depth of braces (for delimiter recovery).
202    brace_depth: u32,
203    /// Nesting depth of brackets (for delimiter recovery).
204    bracket_depth: u32,
205}
206
207impl<'t, 's> Parser<'t, 's> {
208    /// Create a new parser for the given source and tokens.
209    pub fn new(source: &'s str, tokens: &'t [Token]) -> Self {
210        Self {
211            source,
212            tokens,
213            pos: 0,
214            byte_offset: 0,
215            builder: GreenNodeBuilder::new(),
216            errors: Vec::new(),
217            paren_depth: 0,
218            brace_depth: 0,
219            bracket_depth: 0,
220        }
221    }
222
223    // =========================================================================
224    // Marker-based Node Building
225    // =========================================================================
226
227    /// Start a new node, returning a Marker.
228    ///
229    /// The marker must be completed with `complete()` or abandoned with
230    /// `abandon()`. Dropping it without doing either will panic.
231    pub fn start(&mut self) -> Marker {
232        Marker::new(self.builder.checkpoint())
233    }
234
235    // =========================================================================
236    // Token Inspection
237    // =========================================================================
238
239    /// Get the kind of the current token (or EOF if at end).
240    pub fn current(&self) -> SyntaxKind {
241        self.nth(0)
242    }
243
244    /// Look ahead by `n` tokens and return the kind (skipping trivia).
245    pub fn nth(&self, n: usize) -> SyntaxKind {
246        self.nth_non_trivia(n)
247    }
248
249    /// Look ahead by `n` non-trivia tokens.
250    fn nth_non_trivia(&self, n: usize) -> SyntaxKind {
251        let mut pos = self.pos;
252        let mut count = 0;
253        while pos < self.tokens.len() {
254            let kind = self.tokens[pos].kind;
255            if !kind.is_trivia() {
256                if count == n {
257                    return kind;
258                }
259                count += 1;
260            }
261            pos += 1;
262        }
263        EOF
264    }
265
266    /// Get the kind of the current token including trivia.
267    pub fn current_including_trivia(&self) -> SyntaxKind {
268        self.tokens.get(self.pos).map(|t| t.kind).unwrap_or(EOF)
269    }
270
271    /// Check if the current token matches the given kind.
272    pub fn at(&self, kind: SyntaxKind) -> bool {
273        self.current() == kind
274    }
275
276    /// Check if the current token matches any of the given kinds.
277    pub fn at_any(&self, kinds: &[SyntaxKind]) -> bool {
278        kinds.contains(&self.current())
279    }
280
281    /// Check if we're at the end of the token stream.
282    pub fn at_eof(&self) -> bool {
283        self.at(EOF)
284    }
285
286    /// Get the text of the current token.
287    #[allow(unused)]
288    pub fn current_text(&self) -> &'s str {
289        if self.pos >= self.tokens.len() {
290            return "";
291        }
292        let len = self.tokens[self.pos].len as usize;
293        &self.source[self.byte_offset..self.byte_offset + len]
294    }
295
296    /// Get the text of the nth token (skipping trivia).
297    #[allow(unused)]
298    pub fn nth_text(&self, n: usize) -> &'s str {
299        let mut pos = self.pos;
300        let mut offset = self.byte_offset;
301        let mut count = 0;
302
303        while pos < self.tokens.len() {
304            let token = &self.tokens[pos];
305            if !token.kind.is_trivia() {
306                if count == n {
307                    return &self.source[offset..offset + token.len as usize];
308                }
309                count += 1;
310            }
311            offset += token.len as usize;
312            pos += 1;
313        }
314        ""
315    }
316
317    // =========================================================================
318    // Token Consumption
319    // =========================================================================
320
321    /// Consume the current token and add it to the tree.
322    pub fn bump(&mut self) {
323        self.bump_raw();
324    }
325
326    /// Consume the current token without skipping trivia first.
327    ///
328    /// Also tracks delimiter depth for recovery purposes.
329    fn bump_raw(&mut self) {
330        if self.pos >= self.tokens.len() {
331            return;
332        }
333        let token = &self.tokens[self.pos];
334
335        // Track delimiter depth for recovery
336        match token.kind {
337            L_PAREN => self.paren_depth += 1,
338            R_PAREN => self.paren_depth = self.paren_depth.saturating_sub(1),
339            L_BRACE => self.brace_depth += 1,
340            R_BRACE => self.brace_depth = self.brace_depth.saturating_sub(1),
341            L_BRACKET => self.bracket_depth += 1,
342            R_BRACKET => self.bracket_depth = self.bracket_depth.saturating_sub(1),
343            _ => {}
344        }
345
346        let text = &self.source[self.byte_offset..self.byte_offset + token.len as usize];
347        self.builder.token(token.kind.into(), text);
348        self.byte_offset += token.len as usize;
349        self.pos += 1;
350    }
351
352    /// Skip trivia tokens, adding them to the tree.
353    pub fn skip_trivia(&mut self) {
354        while self.current_including_trivia().is_trivia() {
355            self.bump_raw();
356        }
357    }
358
359    /// Consume the current token if it matches the given kind.
360    /// Returns true if consumed.
361    pub fn eat(&mut self, kind: SyntaxKind) -> bool {
362        self.skip_trivia();
363        if self.current_including_trivia() == kind {
364            self.bump_raw();
365            true
366        } else {
367            false
368        }
369    }
370
371    /// Consume any trivia and then the next token, regardless of kind.
372    pub fn bump_any(&mut self) {
373        self.skip_trivia();
374        self.bump_raw();
375    }
376
377    // =========================================================================
378    // Direct Node Building
379    // =========================================================================
380
381    /// Start a new node of the given kind.
382    ///
383    /// Used by error recovery. For general parsing, prefer `start()` and
384    /// `Marker::complete()`.
385    pub fn start_node(&mut self, kind: SyntaxKind) {
386        self.builder.start_node(kind.into());
387    }
388
389    /// Finish the current node.
390    pub fn finish_node(&mut self) {
391        self.builder.finish_node();
392    }
393
394    /// Create a checkpoint for later wrapping.
395    pub fn checkpoint(&self) -> Checkpoint {
396        self.builder.checkpoint()
397    }
398
399    /// Start a node at a previous checkpoint.
400    pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
401        self.builder.start_node_at(checkpoint, kind.into());
402    }
403
404    // =========================================================================
405    // Error Handling
406    // =========================================================================
407
408    /// Expect a token of the given kind.
409    ///
410    /// If the current token matches, it is consumed. Otherwise, an error
411    /// is recorded but no token is consumed.
412    pub fn expect(&mut self, kind: SyntaxKind) -> bool {
413        if self.eat(kind) {
414            true
415        } else {
416            self.error(format!("expected {:?}", kind));
417            false
418        }
419    }
420
421    /// Record a parse error at the current position (zero-length range).
422    pub fn error(&mut self, message: String) {
423        let pos = TextSize::new(self.byte_offset as u32);
424        self.errors.push(ParseError { message, range: TextRange::empty(pos) });
425    }
426
427    /// Record a parse error spanning from `start` to the current position.
428    fn error_span(&mut self, message: String, start: usize) {
429        self.errors.push(ParseError {
430            message,
431            range: TextRange::new(TextSize::new(start as u32), TextSize::new(self.byte_offset as u32)),
432        });
433    }
434
435    /// Wrap unexpected tokens in an ERROR node until we reach a recovery point.
436    ///
437    /// This consumes tokens until we see one of the recovery tokens or EOF.
438    /// If already at a recovery token, consumes it to ensure progress is made.
439    /// The error range spans the tokens wrapped in the ERROR node.
440    pub fn error_recover(&mut self, message: &str, recovery: &[SyntaxKind]) {
441        let start = self.byte_offset;
442        self.start_node(ERROR);
443
444        // If we're already at a recovery token, consume it to ensure progress.
445        // This prevents infinite loops when a recovery token (like SEMICOLON)
446        // can't start the expected construct (like a statement).
447        if self.at_any(recovery) && !self.at(R_BRACE) && !self.at_eof() {
448            self.bump_any();
449        }
450
451        while !self.at_eof() && !self.at_any(recovery) {
452            self.bump_any();
453        }
454        self.finish_node();
455
456        // Record error with the span of consumed tokens
457        self.error_span(message.to_string(), start);
458    }
459
460    /// Create an ERROR node containing the current token.
461    /// The error range spans the single consumed token.
462    pub fn error_and_bump(&mut self, message: &str) {
463        let start = self.byte_offset;
464        self.start_node(ERROR);
465        self.bump_any();
466        self.finish_node();
467        self.error_span(message.to_string(), start);
468    }
469
470    // =========================================================================
471    // Delimiter Tracking
472    // =========================================================================
473
474    /// Get the current parenthesis nesting depth.
475    #[allow(unused)]
476    pub fn paren_depth(&self) -> u32 {
477        self.paren_depth
478    }
479
480    /// Get the current brace nesting depth.
481    #[allow(unused)]
482    pub fn brace_depth(&self) -> u32 {
483        self.brace_depth
484    }
485
486    /// Get the current bracket nesting depth.
487    #[allow(unused)]
488    pub fn bracket_depth(&self) -> u32 {
489        self.bracket_depth
490    }
491
492    /// Skip tokens until we reach a balanced closing delimiter at depth 0.
493    ///
494    /// This is useful for recovering from errors inside parenthesized expressions,
495    /// blocks, or array literals. It consumes tokens while tracking delimiter
496    /// depth, stopping when the matching close delimiter would bring us back to
497    /// the starting depth.
498    ///
499    /// Returns true if a balanced delimiter was found, false if EOF was reached.
500    #[allow(unused)]
501    pub fn skip_to_balanced(&mut self, open: SyntaxKind, close: SyntaxKind) -> bool {
502        let mut depth: u32 = 1;
503        while !self.at_eof() && depth > 0 {
504            if self.at(open) {
505                depth += 1;
506            } else if self.at(close) {
507                depth -= 1;
508                if depth == 0 {
509                    return true;
510                }
511            }
512            self.bump_any();
513        }
514        false
515    }
516
517    // =========================================================================
518    // Completion
519    // =========================================================================
520
521    /// Finish parsing and return the parse result.
522    pub fn finish(self) -> Parse {
523        Parse { green: self.builder.finish(), errors: self.errors }
524    }
525}
526
527// =============================================================================
528// Recovery Token Sets
529// =============================================================================
530
531/// Tokens that can start a statement (for recovery).
532pub const STMT_RECOVERY: &[SyntaxKind] =
533    &[KW_LET, KW_CONST, KW_RETURN, KW_IF, KW_FOR, KW_ASSERT, KW_ASSERT_EQ, KW_ASSERT_NEQ, L_BRACE, R_BRACE, SEMICOLON];
534
535/// Tokens that can start a top-level item (for recovery).
536pub const ITEM_RECOVERY: &[SyntaxKind] = &[
537    KW_IMPORT,
538    KW_PROGRAM,
539    KW_FUNCTION,
540    KW_TRANSITION,
541    KW_INLINE,
542    KW_STRUCT,
543    KW_RECORD,
544    KW_MAPPING,
545    KW_STORAGE,
546    KW_CONST,
547    KW_ASYNC,
548    AT,
549    R_BRACE,
550];
551
552/// Tokens that indicate we should stop expression recovery.
553pub const EXPR_RECOVERY: &[SyntaxKind] = &[
554    SEMICOLON,
555    COMMA,
556    R_PAREN,
557    R_BRACKET,
558    R_BRACE,
559    KW_LET,
560    KW_CONST,
561    KW_RETURN,
562    KW_IF,
563    KW_FOR,
564    KW_ASSERT,
565    KW_ASSERT_EQ,
566    KW_ASSERT_NEQ,
567];
568
569/// Tokens for recovering inside type parsing.
570pub const TYPE_RECOVERY: &[SyntaxKind] = &[COMMA, GT, R_BRACKET, R_PAREN, L_BRACE, R_BRACE, EQ, SEMICOLON, ARROW];
571
572/// Tokens for recovering inside parameter lists.
573pub const PARAM_RECOVERY: &[SyntaxKind] = &[COMMA, R_PAREN, L_BRACE, ARROW];
574
575#[cfg(test)]
576mod tests {
577    use super::*;
578    use crate::lexer::lex;
579    use expect_test::{Expect, expect};
580
581    /// Helper to parse source and format the tree for snapshot testing.
582    fn check_parse(input: &str, parse_fn: impl FnOnce(&mut Parser), expect: Expect) {
583        let (tokens, _) = lex(input);
584        let mut parser = Parser::new(input, &tokens);
585        parse_fn(&mut parser);
586        let parse = parser.finish();
587        let output = format!("{:#?}", parse.syntax());
588        expect.assert_eq(&output);
589    }
590
591    /// Helper to check parse errors.
592    fn check_parse_errors(input: &str, parse_fn: impl FnOnce(&mut Parser), expect: Expect) {
593        let (tokens, _) = lex(input);
594        let mut parser = Parser::new(input, &tokens);
595        parse_fn(&mut parser);
596        let parse = parser.finish();
597        let output = parse
598            .errors()
599            .iter()
600            .map(|e| format!("{}..{}:{}", u32::from(e.range.start()), u32::from(e.range.end()), e.message))
601            .collect::<Vec<_>>()
602            .join("\n");
603        expect.assert_eq(&output);
604    }
605
606    // =========================================================================
607    // Legacy tests (using start_node/finish_node)
608    // =========================================================================
609
610    #[test]
611    fn parse_empty() {
612        check_parse(
613            "",
614            |p| {
615                p.start_node(ROOT);
616                p.finish_node();
617            },
618            expect![[r#"
619            ROOT@0..0
620        "#]],
621        );
622    }
623
624    #[test]
625    fn parse_whitespace_only() {
626        check_parse(
627            "   ",
628            |p| {
629                p.start_node(ROOT);
630                p.skip_trivia();
631                p.finish_node();
632            },
633            expect![[r#"
634            ROOT@0..3
635              WHITESPACE@0..3 "   "
636        "#]],
637        );
638    }
639
640    #[test]
641    fn parse_single_token() {
642        check_parse(
643            "foo",
644            |p| {
645                p.start_node(ROOT);
646                p.skip_trivia();
647                p.bump();
648                p.finish_node();
649            },
650            expect![[r#"
651            ROOT@0..3
652              IDENT@0..3 "foo"
653        "#]],
654        );
655    }
656
657    #[test]
658    fn parse_token_with_trivia() {
659        check_parse(
660            "  foo  ",
661            |p| {
662                p.start_node(ROOT);
663                p.skip_trivia();
664                p.bump();
665                p.skip_trivia();
666                p.finish_node();
667            },
668            expect![[r#"
669            ROOT@0..7
670              WHITESPACE@0..2 "  "
671              IDENT@2..5 "foo"
672              WHITESPACE@5..7 "  "
673        "#]],
674        );
675    }
676
677    #[test]
678    fn parse_nested_nodes() {
679        check_parse(
680            "(a)",
681            |p| {
682                p.start_node(ROOT);
683                p.start_node(PAREN_EXPR);
684                p.eat(L_PAREN);
685                p.skip_trivia();
686                p.start_node(NAME_REF);
687                p.bump();
688                p.finish_node();
689                p.eat(R_PAREN);
690                p.finish_node();
691                p.finish_node();
692            },
693            expect![[r#"
694            ROOT@0..3
695              PAREN_EXPR@0..3
696                L_PAREN@0..1 "("
697                NAME_REF@1..2
698                  IDENT@1..2 "a"
699                R_PAREN@2..3 ")"
700        "#]],
701        );
702    }
703
704    #[test]
705    fn parse_with_checkpoint() {
706        check_parse(
707            "1 + 2",
708            |p| {
709                p.start_node(ROOT);
710                p.skip_trivia();
711                let checkpoint = p.checkpoint();
712                p.start_node(LITERAL);
713                p.bump();
714                p.finish_node();
715                p.skip_trivia();
716                p.start_node_at(checkpoint, BINARY_EXPR);
717                p.bump();
718                p.skip_trivia();
719                p.start_node(LITERAL);
720                p.bump();
721                p.finish_node();
722                p.finish_node();
723                p.finish_node();
724            },
725            expect![[r#"
726            ROOT@0..5
727              BINARY_EXPR@0..5
728                LITERAL@0..1
729                  INTEGER@0..1 "1"
730                WHITESPACE@1..2 " "
731                PLUS@2..3 "+"
732                WHITESPACE@3..4 " "
733                LITERAL@4..5
734                  INTEGER@4..5 "2"
735        "#]],
736        );
737    }
738
739    #[test]
740    fn parse_expect_success() {
741        check_parse(
742            ";",
743            |p| {
744                p.start_node(ROOT);
745                p.expect(SEMICOLON);
746                p.finish_node();
747            },
748            expect![[r#"
749            ROOT@0..1
750              SEMICOLON@0..1 ";"
751        "#]],
752        );
753    }
754
755    #[test]
756    fn parse_expect_failure() {
757        check_parse_errors(
758            "foo",
759            |p| {
760                p.start_node(ROOT);
761                p.expect(SEMICOLON);
762                p.finish_node();
763            },
764            expect![[r#"0..0:expected SEMICOLON"#]],
765        );
766    }
767
768    #[test]
769    fn parse_error_recover() {
770        check_parse(
771            "garbage ; ok",
772            |p| {
773                p.start_node(ROOT);
774                p.error_recover("unexpected tokens", &[SEMICOLON]);
775                p.eat(SEMICOLON);
776                p.skip_trivia();
777                p.bump();
778                p.finish_node();
779            },
780            expect![[r#"
781            ROOT@0..12
782              ERROR@0..7
783                IDENT@0..7 "garbage"
784              WHITESPACE@7..8 " "
785              SEMICOLON@8..9 ";"
786              WHITESPACE@9..10 " "
787              IDENT@10..12 "ok"
788        "#]],
789        );
790    }
791
792    #[test]
793    fn parse_error_and_bump() {
794        check_parse(
795            "$",
796            |p| {
797                p.start_node(ROOT);
798                p.skip_trivia();
799                p.error_and_bump("unexpected token");
800                p.finish_node();
801            },
802            expect![[r#"
803            ROOT@0..1
804              ERROR@0..1
805                ERROR@0..1 "$"
806        "#]],
807        );
808    }
809
810    #[test]
811    fn parse_at_and_eat() {
812        check_parse(
813            "let x",
814            |p| {
815                p.start_node(ROOT);
816                assert!(p.at(KW_LET));
817                assert!(!p.at(KW_CONST));
818                p.eat(KW_LET);
819                assert!(p.at(IDENT));
820                p.skip_trivia();
821                p.bump();
822                p.finish_node();
823            },
824            expect![[r#"
825            ROOT@0..5
826              KW_LET@0..3 "let"
827              WHITESPACE@3..4 " "
828              IDENT@4..5 "x"
829        "#]],
830        );
831    }
832
833    #[test]
834    fn parse_nth_lookahead() {
835        check_parse(
836            "a + b * c",
837            |p| {
838                p.start_node(ROOT);
839                assert_eq!(p.nth(0), IDENT);
840                assert_eq!(p.nth(1), PLUS);
841                assert_eq!(p.nth(2), IDENT);
842                assert_eq!(p.nth(3), STAR);
843                assert_eq!(p.nth(4), IDENT);
844                assert_eq!(p.nth(5), EOF);
845                while !p.at_eof() {
846                    p.bump_any();
847                }
848                p.finish_node();
849            },
850            expect![[r#"
851            ROOT@0..9
852              IDENT@0..1 "a"
853              WHITESPACE@1..2 " "
854              PLUS@2..3 "+"
855              WHITESPACE@3..4 " "
856              IDENT@4..5 "b"
857              WHITESPACE@5..6 " "
858              STAR@6..7 "*"
859              WHITESPACE@7..8 " "
860              IDENT@8..9 "c"
861        "#]],
862        );
863    }
864
865    // =========================================================================
866    // Marker tests
867    // =========================================================================
868
869    #[test]
870    fn marker_complete() {
871        check_parse(
872            "foo",
873            |p| {
874                let m = p.start();
875                p.skip_trivia();
876                p.bump();
877                m.complete(p, ROOT);
878            },
879            expect![[r#"
880            ROOT@0..3
881              IDENT@0..3 "foo"
882        "#]],
883        );
884    }
885
886    #[test]
887    fn marker_precede() {
888        // Test precede for binary expression: parse "1 + 2"
889        check_parse(
890            "1 + 2",
891            |p| {
892                let root = p.start();
893                p.skip_trivia();
894
895                // Parse LHS
896                let lhs_m = p.start();
897                p.bump(); // 1
898                let lhs = lhs_m.complete(p, LITERAL);
899
900                p.skip_trivia();
901
902                // See operator - wrap LHS in binary expr
903                let bin_m = lhs.precede(p);
904                p.bump(); // +
905
906                p.skip_trivia();
907
908                // Parse RHS
909                let rhs_m = p.start();
910                p.bump(); // 2
911                rhs_m.complete(p, LITERAL);
912
913                bin_m.complete(p, BINARY_EXPR);
914                root.complete(p, ROOT);
915            },
916            expect![[r#"
917            ROOT@0..5
918              BINARY_EXPR@0..5
919                LITERAL@0..1
920                  INTEGER@0..1 "1"
921                WHITESPACE@1..2 " "
922                PLUS@2..3 "+"
923                WHITESPACE@3..4 " "
924                LITERAL@4..5
925                  INTEGER@4..5 "2"
926        "#]],
927        );
928    }
929
930    #[test]
931    fn marker_nested() {
932        check_parse(
933            "(a)",
934            |p| {
935                let root = p.start();
936                let paren = p.start();
937                p.eat(L_PAREN);
938                p.skip_trivia();
939                let name = p.start();
940                p.bump();
941                name.complete(p, NAME_REF);
942                p.eat(R_PAREN);
943                paren.complete(p, PAREN_EXPR);
944                root.complete(p, ROOT);
945            },
946            expect![[r#"
947            ROOT@0..3
948              PAREN_EXPR@0..3
949                L_PAREN@0..1 "("
950                NAME_REF@1..2
951                  IDENT@1..2 "a"
952                R_PAREN@2..3 ")"
953        "#]],
954        );
955    }
956
957    // =========================================================================
958    // Error Recovery Tests
959    // =========================================================================
960    //
961    // These tests verify that the parser can recover from various error
962    // conditions and continue parsing. The key invariants are:
963    // 1. Parser never panics
964    // 2. ERROR nodes contain malformed content
965    // 3. Valid portions are parsed correctly
966    // 4. Reasonable error messages are generated
967
968    /// Helper to parse a file and return both tree and errors.
969    fn parse_file(input: &str) -> Parse {
970        let (tokens, _) = lex(input);
971        let mut parser = Parser::new(input, &tokens);
972        let root = parser.start();
973        parser.parse_file_items();
974        root.complete(&mut parser, ROOT);
975        parser.finish()
976    }
977
978    /// Helper to parse a statement and return both tree and errors.
979    fn parse_stmt_result(input: &str) -> Parse {
980        let (tokens, _) = lex(input);
981        let mut parser = Parser::new(input, &tokens);
982        let root = parser.start();
983        parser.parse_stmt();
984        parser.skip_trivia();
985        root.complete(&mut parser, ROOT);
986        parser.finish()
987    }
988
989    #[test]
990    fn recover_missing_semicolon_let() {
991        let parse = parse_stmt_result("let x = 1");
992        // Parser should complete but report an error
993        assert!(!parse.errors().is_empty(), "should have errors");
994        // Tree should still have a LET_STMT
995        let tree = format!("{:#?}", parse.syntax());
996        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
997    }
998
999    #[test]
1000    fn recover_missing_semicolon_return() {
1001        let parse = parse_stmt_result("return 42");
1002        assert!(!parse.errors().is_empty(), "should have errors");
1003        let tree = format!("{:#?}", parse.syntax());
1004        assert!(tree.contains("RETURN_STMT"), "tree should have RETURN_STMT");
1005    }
1006
1007    #[test]
1008    fn recover_missing_expr_in_let() {
1009        let parse = parse_stmt_result("let x = ;");
1010        assert!(!parse.errors().is_empty(), "should have errors");
1011        let tree = format!("{:#?}", parse.syntax());
1012        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1013    }
1014
1015    #[test]
1016    fn recover_missing_type_in_let() {
1017        let parse = parse_stmt_result("let x: = 1;");
1018        assert!(!parse.errors().is_empty(), "should have errors");
1019        let tree = format!("{:#?}", parse.syntax());
1020        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1021    }
1022
1023    #[test]
1024    fn recover_missing_condition_in_if() {
1025        // When the condition is missing and we have `if { }`, the `{` is parsed
1026        // as an empty block expression in the condition position. The parser
1027        // should still recover and produce an IF_STMT.
1028        let parse = parse_stmt_result("if { }");
1029        // May or may not have errors depending on interpretation
1030        let tree = format!("{:#?}", parse.syntax());
1031        assert!(tree.contains("IF_STMT"), "tree should have IF_STMT");
1032    }
1033
1034    #[test]
1035    fn recover_missing_block_in_if() {
1036        let parse = parse_stmt_result("if x");
1037        assert!(!parse.errors().is_empty(), "should have errors");
1038        let tree = format!("{:#?}", parse.syntax());
1039        assert!(tree.contains("IF_STMT"), "tree should have IF_STMT");
1040    }
1041
1042    #[test]
1043    fn recover_missing_range_in_for() {
1044        let parse = parse_stmt_result("for i in { }");
1045        assert!(!parse.errors().is_empty(), "should have errors");
1046        let tree = format!("{:#?}", parse.syntax());
1047        assert!(tree.contains("FOR_STMT"), "tree should have FOR_STMT");
1048    }
1049
1050    #[test]
1051    fn recover_missing_operand_binary() {
1052        let parse = parse_stmt_result("let x = 1 + ;");
1053        assert!(!parse.errors().is_empty(), "should have errors");
1054        let tree = format!("{:#?}", parse.syntax());
1055        assert!(tree.contains("BINARY_EXPR"), "tree should have BINARY_EXPR");
1056    }
1057
1058    #[test]
1059    fn recover_missing_operand_unary() {
1060        let parse = parse_stmt_result("let x = -;");
1061        assert!(!parse.errors().is_empty(), "should have errors");
1062        let tree = format!("{:#?}", parse.syntax());
1063        assert!(tree.contains("UNARY_EXPR"), "tree should have UNARY_EXPR");
1064    }
1065
1066    #[test]
1067    fn recover_unclosed_paren() {
1068        let parse = parse_stmt_result("let x = (1 + 2;");
1069        assert!(!parse.errors().is_empty(), "should have errors");
1070        let tree = format!("{:#?}", parse.syntax());
1071        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1072    }
1073
1074    #[test]
1075    fn recover_unclosed_bracket() {
1076        let parse = parse_stmt_result("let x = [1, 2;");
1077        assert!(!parse.errors().is_empty(), "should have errors");
1078        let tree = format!("{:#?}", parse.syntax());
1079        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1080    }
1081
1082    #[test]
1083    fn recover_invalid_token_in_expr() {
1084        let parse = parse_stmt_result("let x = 1 @ 2;");
1085        assert!(!parse.errors().is_empty(), "should have errors");
1086        let tree = format!("{:#?}", parse.syntax());
1087        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1088    }
1089
1090    #[test]
1091    fn recover_malformed_assert() {
1092        let parse = parse_stmt_result("assert();");
1093        assert!(!parse.errors().is_empty(), "should have errors");
1094        let tree = format!("{:#?}", parse.syntax());
1095        assert!(tree.contains("ASSERT_STMT"), "tree should have ASSERT_STMT");
1096    }
1097
1098    #[test]
1099    fn recover_malformed_assert_eq() {
1100        let parse = parse_stmt_result("assert_eq(1);");
1101        assert!(!parse.errors().is_empty(), "should have errors");
1102        let tree = format!("{:#?}", parse.syntax());
1103        assert!(tree.contains("ASSERT_EQ_STMT"), "tree should have ASSERT_EQ_STMT");
1104    }
1105
1106    #[test]
1107    fn recover_missing_assignment_rhs() {
1108        let parse = parse_stmt_result("x = ;");
1109        assert!(!parse.errors().is_empty(), "should have errors");
1110        let tree = format!("{:#?}", parse.syntax());
1111        assert!(tree.contains("ASSIGN_STMT"), "tree should have ASSIGN_STMT");
1112    }
1113
1114    #[test]
1115    fn recover_malformed_function_missing_params() {
1116        let parse = parse_file("program test.aleo { function foo { } }");
1117        assert!(!parse.errors().is_empty(), "should have errors");
1118        let tree = format!("{:#?}", parse.syntax());
1119        assert!(tree.contains("PROGRAM_DECL"), "tree should have PROGRAM_DECL");
1120        assert!(tree.contains("FUNCTION_DEF"), "tree should have FUNCTION_DEF");
1121    }
1122
1123    #[test]
1124    fn recover_malformed_function_missing_body() {
1125        let parse = parse_file("program test.aleo { function foo() }");
1126        assert!(!parse.errors().is_empty(), "should have errors");
1127        let tree = format!("{:#?}", parse.syntax());
1128        assert!(tree.contains("PROGRAM_DECL"), "tree should have PROGRAM_DECL");
1129        assert!(tree.contains("FUNCTION_DEF"), "tree should have FUNCTION_DEF");
1130    }
1131
1132    #[test]
1133    fn recover_malformed_struct_field() {
1134        let parse = parse_file("program test.aleo { struct Foo { x, y: u32 } }");
1135        assert!(!parse.errors().is_empty(), "should have errors");
1136        let tree = format!("{:#?}", parse.syntax());
1137        assert!(tree.contains("STRUCT_DEF"), "tree should have STRUCT_DEF");
1138    }
1139
1140    #[test]
1141    fn recover_malformed_mapping() {
1142        let parse = parse_file("program test.aleo { mapping balances: => u64; }");
1143        assert!(!parse.errors().is_empty(), "should have errors");
1144        let tree = format!("{:#?}", parse.syntax());
1145        assert!(tree.contains("MAPPING_DEF"), "tree should have MAPPING_DEF");
1146    }
1147
1148    #[test]
1149    fn recover_multiple_errors_in_function() {
1150        let parse = parse_file(
1151            r#"program test.aleo {
1152                function foo() {
1153                    let x = ;
1154                    let y: = 1;
1155                    return x +;
1156                }
1157            }"#,
1158        );
1159        // Should have multiple errors but still parse the structure
1160        assert!(parse.errors().len() >= 3, "should have at least 3 errors");
1161        let tree = format!("{:#?}", parse.syntax());
1162        assert!(tree.contains("FUNCTION_DEF"), "tree should have FUNCTION_DEF");
1163        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1164        assert!(tree.contains("RETURN_STMT"), "tree should have RETURN_STMT");
1165    }
1166
1167    #[test]
1168    fn recover_valid_items_after_error() {
1169        let parse = parse_file(
1170            r#"program test.aleo {
1171                struct Invalid { x }
1172                struct Valid { y: u32 }
1173            }"#,
1174        );
1175        // Should have errors but parse both structs
1176        assert!(!parse.errors().is_empty(), "should have errors");
1177        let tree = format!("{:#?}", parse.syntax());
1178        // Both structs should be present
1179        let struct_count = tree.matches("STRUCT_DEF").count();
1180        assert_eq!(struct_count, 2, "should have 2 struct definitions");
1181    }
1182
1183    #[test]
1184    fn recover_empty_tuple_pattern() {
1185        let parse = parse_stmt_result("let () = foo;");
1186        // Empty tuple pattern should work
1187        let tree = format!("{:#?}", parse.syntax());
1188        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1189        assert!(tree.contains("TUPLE_PATTERN"), "tree should have TUPLE_PATTERN");
1190    }
1191
1192    #[test]
1193    fn recover_nested_errors() {
1194        // Errors within nested expressions
1195        let parse = parse_stmt_result("let x = ((1 + ) + 2);");
1196        assert!(!parse.errors().is_empty(), "should have errors");
1197        let tree = format!("{:#?}", parse.syntax());
1198        assert!(tree.contains("LET_STMT"), "tree should have LET_STMT");
1199        assert!(tree.contains("PAREN_EXPR"), "tree should have PAREN_EXPR");
1200    }
1201
1202    #[test]
1203    fn recover_ternary_missing_then() {
1204        let parse = parse_stmt_result("let x = cond ? : 2;");
1205        assert!(!parse.errors().is_empty(), "should have errors");
1206        let tree = format!("{:#?}", parse.syntax());
1207        assert!(tree.contains("TERNARY_EXPR"), "tree should have TERNARY_EXPR");
1208    }
1209
1210    #[test]
1211    fn recover_ternary_missing_else() {
1212        let parse = parse_stmt_result("let x = cond ? 1 :;");
1213        assert!(!parse.errors().is_empty(), "should have errors");
1214        let tree = format!("{:#?}", parse.syntax());
1215        assert!(tree.contains("TERNARY_EXPR"), "tree should have TERNARY_EXPR");
1216    }
1217
1218    #[test]
1219    fn never_panics_on_garbage() {
1220        // Feed various garbage inputs - parser should never panic
1221        let garbage_inputs = [
1222            "@#$%^&*()",
1223            "{{{{{{",
1224            "}}}}}}",
1225            "let let let",
1226            "program { program { program",
1227            "struct struct struct",
1228            ";;;;;;",
1229            "++ -- ** //",
1230            "",
1231            "   \n\t\r  ",
1232            "🦀🦀🦀", // Unicode
1233        ];
1234
1235        for input in garbage_inputs {
1236            // This should never panic
1237            let _ = parse_file(input);
1238        }
1239    }
1240}
leo_parser_rowan/parser/mod.rs

leo_parser_rowan/parser/
mod.rs