Skip to main content

lex_syntax/
parser.rs

1//! Recursive-descent parser for Lex. Pratt-style precedence climbing for
2//! binary operators; everything else is straightforward LL(1)-with-lookahead.
3
4use crate::syntax::*;
5use crate::token::{Token, TokenKind};
6
7pub fn parse(tokens: Vec<Token>) -> Result<Program, ParseError> {
8    // Back-compat entry: no source available, so leading comments are
9    // not recovered. `parse_source` in `lib.rs` calls
10    // `parse_with_src` directly and is the path that preserves them.
11    parse_with_src("", tokens)
12}
13
14/// Parse + attach `#` line-comments to the AST. The source string is
15/// used only to scan the gaps between tokens (where the lexer skipped
16/// whitespace and comments); the parser itself still operates purely
17/// on tokens. See `Program::leading_comments` for the data model.
18pub fn parse_with_src(src: &str, tokens: Vec<Token>) -> Result<Program, ParseError> {
19    let mut p = Parser::new(src, tokens);
20    let program = p.parse_program()?;
21    p.skip_newlines();
22    if !p.at_eof() {
23        return Err(p.error("unexpected token after program"));
24    }
25    Ok(program)
26}
27
28#[derive(Debug, thiserror::Error)]
29#[error("parse error at byte {pos}: {msg}")]
30pub struct ParseError {
31    pub pos: usize,
32    pub msg: String,
33}
34
35struct Parser<'a> {
36    /// Source text — needed only for trivia recovery (line comments
37    /// in the gaps between tokens). Empty when called through the
38    /// legacy `parse(tokens)` entry; comments are silently dropped
39    /// in that path.
40    src: &'a str,
41    tokens: Vec<Token>,
42    idx: usize,
43    /// Recursion depth across `parse_expr`. Capped at `MAX_DEPTH`
44    /// to defend against adversarial input like a long sequence of
45    /// `[[[{{{...` that would otherwise blow the stack. Found by
46    /// the libFuzzer parser target — see `fuzz/fuzz_targets/parser.rs`.
47    depth: u32,
48    /// Counter for `let _ := ...` discard bindings (#200). Each
49    /// discard gets a unique synthetic name so multiple `let _`
50    /// in the same scope shadow rather than collide. The names
51    /// aren't expressible in user syntax (`__lex_discard_N`),
52    /// so user code can't reference them by accident.
53    discard_counter: u32,
54}
55
56/// Maximum nesting depth the parser will accept before refusing
57/// with a parse error. Real Lex code rarely exceeds 30; 96 leaves
58/// generous headroom for legitimate generated code.
59///
60/// Each `parse_expr` level produces ~4-5 stack frames through the
61/// `parse_binary_expr → parse_unary_expr → parse_postfix →
62/// parse_primary → ...` chain, so this caps the actual frame
63/// count around 400-500 — well below even a 2 MiB test stack.
64const MAX_DEPTH: u32 = 96;
65
66impl<'a> Parser<'a> {
67    fn new(src: &'a str, tokens: Vec<Token>) -> Self {
68        Self { src, tokens, idx: 0, depth: 0, discard_counter: 0 }
69    }
70
71    /// Extract `#` line-comments from the source byte range
72    /// `start..end`. The range must be a "gap" between two tokens
73    /// (or between source-start/end and a token); by construction
74    /// such gaps contain only whitespace and `#` comments — string
75    /// contents never appear because the lexer's logos rules would
76    /// have produced a `Str(...)` token covering them.
77    ///
78    /// Each returned entry is a single source line, trimmed of leading
79    /// whitespace (the `#` and everything after it preserved) and of
80    /// trailing whitespace. Blank lines between consecutive comments
81    /// are dropped — preserving inter-comment blank lines is left to
82    /// a follow-up; the bug this addresses (#417) is about comments
83    /// disappearing entirely.
84    fn extract_comments(&self, start: usize, end: usize) -> Vec<String> {
85        if start >= end || end > self.src.len() {
86            return Vec::new();
87        }
88        self.src[start..end]
89            .lines()
90            .filter_map(|line| {
91                let trimmed = line.trim_start();
92                if trimmed.starts_with('#') {
93                    Some(trimmed.trim_end().to_string())
94                } else {
95                    None
96                }
97            })
98            .collect()
99    }
100
101    fn at_eof(&self) -> bool {
102        self.idx >= self.tokens.len()
103    }
104
105    fn peek(&self) -> Option<&TokenKind> {
106        self.tokens.get(self.idx).map(|t| &t.kind)
107    }
108
109    fn bump(&mut self) -> Option<Token> {
110        let t = self.tokens.get(self.idx).cloned();
111        if t.is_some() {
112            self.idx += 1;
113        }
114        t
115    }
116
117    fn current_pos(&self) -> usize {
118        self.tokens
119            .get(self.idx)
120            .map(|t| t.span.start)
121            .unwrap_or_else(|| self.tokens.last().map(|t| t.span.end).unwrap_or(0))
122    }
123
124    fn error(&self, msg: impl Into<String>) -> ParseError {
125        ParseError { pos: self.current_pos(), msg: msg.into() }
126    }
127
128    fn skip_newlines(&mut self) {
129        while matches!(self.peek(), Some(TokenKind::Newline) | Some(TokenKind::Semi)) {
130            self.idx += 1;
131        }
132    }
133
134    fn expect(&mut self, expected: &TokenKind, ctx: &str) -> Result<Token, ParseError> {
135        self.skip_newlines();
136        match self.peek() {
137            Some(k) if std::mem::discriminant(k) == std::mem::discriminant(expected) => {
138                Ok(self.bump().unwrap())
139            }
140            Some(other) => Err(self.error(format!(
141                "expected {expected:?} {ctx}, got {other:?}"
142            ))),
143            None => Err(self.error(format!("expected {expected:?} {ctx}, got EOF"))),
144        }
145    }
146
147    fn eat(&mut self, k: &TokenKind) -> bool {
148        self.skip_newlines();
149        if let Some(cur) = self.peek() {
150            if std::mem::discriminant(cur) == std::mem::discriminant(k) {
151                self.bump();
152                return true;
153            }
154        }
155        false
156    }
157
158    fn expect_ident(&mut self, ctx: &str) -> Result<String, ParseError> {
159        self.skip_newlines();
160        match self.peek() {
161            Some(TokenKind::Ident(_)) => match self.bump().unwrap().kind {
162                TokenKind::Ident(name) => Ok(name),
163                _ => unreachable!(),
164            },
165            other => Err(self.error(format!("expected identifier {ctx}, got {other:?}"))),
166        }
167    }
168
169    // --- top level ---
170
171    fn parse_program(&mut self) -> Result<Program, ParseError> {
172        let mut items = Vec::new();
173        let mut leading_comments: Vec<String> = Vec::new();
174        // Byte offset of the end of the last consumed token (or 0 at
175        // start of file). The next gap to scan for comments is from
176        // here up to the start of the upcoming item's first token.
177        let mut gap_start: usize = 0;
178        loop {
179            self.skip_newlines();
180            if self.at_eof() {
181                break;
182            }
183            let item_start = self.tokens[self.idx].span.start;
184            let gap_comments = self.extract_comments(gap_start, item_start);
185            let pending_comments = if items.is_empty() {
186                leading_comments = gap_comments;
187                Vec::new()
188            } else {
189                gap_comments
190            };
191            let mut item = self.parse_item()?;
192            if !pending_comments.is_empty() {
193                attach_leading_comments(&mut item, pending_comments);
194            }
195            gap_start = self
196                .tokens
197                .get(self.idx.saturating_sub(1))
198                .map(|t| t.span.end)
199                .unwrap_or(gap_start);
200            items.push(item);
201        }
202        // Trailing comments live after the last consumed token (or
203        // span the whole file when there are no items).
204        let trailing_comments = self.extract_comments(gap_start, self.src.len());
205        Ok(Program { items, leading_comments, trailing_comments })
206    }
207
208    fn parse_item(&mut self) -> Result<Item, ParseError> {
209        match self.peek() {
210            Some(TokenKind::Import) => self.parse_import().map(Item::Import),
211            Some(TokenKind::Type) => self.parse_type_decl().map(Item::TypeDecl),
212            Some(TokenKind::Fn) => self.parse_fn_decl().map(Item::FnDecl),
213            other => Err(self.error(format!(
214                "expected `import`, `type`, or `fn` at top level, got {other:?}"
215            ))),
216        }
217    }
218
219    fn parse_import(&mut self) -> Result<Import, ParseError> {
220        self.expect(&TokenKind::Import, "in import")?;
221        let reference = match self.bump().map(|t| t.kind) {
222            Some(TokenKind::Str(s)) => s,
223            other => return Err(self.error(format!("expected string after `import`, got {other:?}"))),
224        };
225        self.expect(&TokenKind::As, "in import")?;
226        let alias = self.expect_ident("for import alias")?;
227        Ok(Import { reference, alias, leading_comments: Vec::new() })
228    }
229
230    fn parse_type_decl(&mut self) -> Result<TypeDecl, ParseError> {
231        self.expect(&TokenKind::Type, "in type decl")?;
232        let name = self.expect_ident("for type name")?;
233        let params = if self.eat(&TokenKind::LBracket) {
234            let ps = self.parse_ident_list()?;
235            self.expect(&TokenKind::RBracket, "after type params")?;
236            ps
237        } else {
238            Vec::new()
239        };
240        self.expect(&TokenKind::Eq, "in type decl")?;
241        let definition = self.parse_type_decl_rhs()?;
242        Ok(TypeDecl { name, params, definition, leading_comments: Vec::new() })
243    }
244
245    fn parse_ident_list(&mut self) -> Result<Vec<String>, ParseError> {
246        let mut out = Vec::new();
247        out.push(self.expect_ident("in identifier list")?);
248        while self.eat(&TokenKind::Comma) {
249            if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
250            out.push(self.expect_ident("in identifier list")?);
251        }
252        Ok(out)
253    }
254
255    /// `type Foo = Variant1 | Variant2(Payload)` is a union; otherwise a plain type expression.
256    fn parse_type_decl_rhs(&mut self) -> Result<TypeExpr, ParseError> {
257        let first = self.parse_type_expr()?;
258        // Detect union: PascalCase ident (or named type w/ optional payload) followed by `|`.
259        if matches!(self.peek_skip_newlines(), Some(TokenKind::Bar)) {
260            let mut variants = Vec::new();
261            variants.push(type_to_variant(first)?);
262            while self.eat(&TokenKind::Bar) {
263                let next = self.parse_type_expr()?;
264                variants.push(type_to_variant(next)?);
265            }
266            Ok(TypeExpr::Union(variants))
267        } else {
268            // Single-variant union without `|`: `type Msg = Execute(Str)`.
269            // The `(...)` constructor-payload syntax is distinguishable from
270            // `[...]` type-application by checking the last consumed token.
271            // `Execute(Str)` ends with `)`, `List[Int]` ends with `]`,
272            // `AnotherType` ends with the ident token.
273            let last_was_rparen = self.idx > 0 && matches!(
274                self.tokens.get(self.idx - 1).map(|t| &t.kind),
275                Some(TokenKind::RParen)
276            );
277            if last_was_rparen {
278                if let TypeExpr::Named { ref name, .. } = first {
279                    let unqual = name.split('.').next_back().unwrap_or(name.as_str());
280                    if unqual.chars().next().map(|c| c.is_ascii_uppercase()).unwrap_or(false) {
281                        return Ok(TypeExpr::Union(vec![type_to_variant(first)?]));
282                    }
283                }
284            }
285            Ok(first)
286        }
287    }
288
289    fn peek_skip_newlines(&mut self) -> Option<TokenKind> {
290        let saved = self.idx;
291        self.skip_newlines();
292        let out = self.peek().cloned();
293        self.idx = saved;
294        out
295    }
296
297    fn parse_type_expr(&mut self) -> Result<TypeExpr, ParseError> {
298        let base = self.parse_type_expr_base()?;
299        self.maybe_wrap_refinement(base)
300    }
301
302    fn parse_type_expr_base(&mut self) -> Result<TypeExpr, ParseError> {
303        self.skip_newlines();
304        match self.peek() {
305            Some(TokenKind::LBrace) => self.parse_record_type(),
306            Some(TokenKind::LParen) => self.parse_paren_type_or_function(),
307            Some(TokenKind::Ident(_)) => {
308                let mut name = self.expect_ident("in type expr")?;
309                // Module-qualified type: `m.Type` or `m.n.Type`. We accept
310                // dotted names here and let the loader rewrite them to the
311                // file-local mangled form. After the loader pass, all type
312                // names referenced by the type checker are single segments.
313                while matches!(self.peek(), Some(TokenKind::Dot)) {
314                    self.bump();
315                    let next = self.expect_ident("after `.` in qualified type")?;
316                    name.push('.');
317                    name.push_str(&next);
318                }
319                let args = if matches!(self.peek(), Some(TokenKind::LBracket)) {
320                    self.bump();
321                    let mut args = Vec::new();
322                    args.push(self.parse_type_expr()?);
323                    while self.eat(&TokenKind::Comma) {
324                        if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
325                        args.push(self.parse_type_expr()?);
326                    }
327                    self.expect(&TokenKind::RBracket, "after type args")?;
328                    args
329                } else if matches!(self.peek(), Some(TokenKind::LParen)) {
330                    // Constructor type with payload: `Name(T)` or `Name(T1, T2)`.
331                    self.bump();
332                    let mut args = Vec::new();
333                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
334                        args.push(self.parse_type_expr()?);
335                        while self.eat(&TokenKind::Comma) {
336                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
337                            args.push(self.parse_type_expr()?);
338                        }
339                    }
340                    self.expect(&TokenKind::RParen, "after constructor payload")?;
341                    args
342                } else {
343                    Vec::new()
344                };
345                Ok(TypeExpr::Named { name, args })
346            }
347            other => Err(self.error(format!("expected type expression, got {other:?}"))),
348        }
349    }
350
351    /// Refinement type postfix (#209): `BaseType{binding | predicate}`.
352    ///
353    /// Disambiguates from a function body's opening brace by peeking
354    /// three tokens ahead — refinement requires `{ Ident |`, a body
355    /// begins with `{ <expr-starting-token>`. This means a refinement
356    /// binding name can't start with `|`, but that's fine since
357    /// identifiers don't.
358    fn maybe_wrap_refinement(&mut self, base: TypeExpr) -> Result<TypeExpr, ParseError> {
359        let next0 = self.tokens.get(self.idx).map(|t| &t.kind);
360        let next1 = self.tokens.get(self.idx + 1).map(|t| &t.kind);
361        let next2 = self.tokens.get(self.idx + 2).map(|t| &t.kind);
362        let is_refinement_lookahead = matches!(next0, Some(TokenKind::LBrace))
363            && matches!(next1, Some(TokenKind::Ident(_)))
364            && matches!(next2, Some(TokenKind::Bar));
365        if !is_refinement_lookahead {
366            return Ok(base);
367        }
368        self.bump(); // `{`
369        let binding = self.expect_ident("for refinement binding")?;
370        self.expect(&TokenKind::Bar, "after refinement binding")?;
371        let predicate = self.parse_expr()?;
372        self.expect(&TokenKind::RBrace, "to close refinement")?;
373        Ok(TypeExpr::Refined {
374            base: Box::new(base),
375            binding,
376            predicate: Box::new(predicate),
377        })
378    }
379
380    fn parse_record_type(&mut self) -> Result<TypeExpr, ParseError> {
381        self.expect(&TokenKind::LBrace, "in record type")?;
382        let mut fields = Vec::new();
383        let mut spreads = Vec::new();
384        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
385            loop {
386                self.skip_newlines();
387                if matches!(self.peek(), Some(TokenKind::DotDotDot)) {
388                    self.bump(); // consume `...`
389                    let name = self.expect_ident("after `...` in record type spread")?;
390                    spreads.push(name);
391                } else {
392                    let name = self.expect_ident("in record field")?;
393                    self.expect(&TokenKind::ColonColon, "after record field name")?;
394                    let ty = self.parse_type_expr()?;
395                    fields.push(TypeField { name, ty });
396                }
397                self.skip_newlines();
398                if !self.eat(&TokenKind::Comma) { break; }
399                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
400            }
401        }
402        self.expect(&TokenKind::RBrace, "in record type")?;
403        if spreads.is_empty() {
404            Ok(TypeExpr::Record(fields))
405        } else {
406            Ok(TypeExpr::RecordWithSpreads { spreads, fields })
407        }
408    }
409
410    fn parse_paren_type_or_function(&mut self) -> Result<TypeExpr, ParseError> {
411        self.expect(&TokenKind::LParen, "in type")?;
412        let mut args = Vec::new();
413        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
414            args.push(self.parse_type_expr()?);
415            while self.eat(&TokenKind::Comma) {
416                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
417                args.push(self.parse_type_expr()?);
418            }
419        }
420        self.expect(&TokenKind::RParen, "in type")?;
421        // Function type if followed by `->`.
422        if matches!(self.peek_skip_newlines(), Some(TokenKind::Arrow)) {
423            self.skip_newlines();
424            self.bump();
425            let effects = self.parse_effects()?;
426            let ret = self.parse_type_expr()?;
427            Ok(TypeExpr::Function {
428                params: args,
429                effects,
430                ret: Box::new(ret),
431            })
432        } else if args.len() == 1 {
433            // Parenthesized type expression.
434            Ok(args.into_iter().next().unwrap())
435        } else {
436            Ok(TypeExpr::Tuple(args))
437        }
438    }
439
440    fn parse_fn_decl(&mut self) -> Result<FnDecl, ParseError> {
441        self.expect(&TokenKind::Fn, "in fn decl")?;
442        let name = self.expect_ident("for function name")?;
443        let type_params = if self.eat(&TokenKind::LBracket) {
444            let ps = self.parse_ident_list()?;
445            self.expect(&TokenKind::RBracket, "after type params")?;
446            ps
447        } else {
448            Vec::new()
449        };
450        self.expect(&TokenKind::LParen, "before params")?;
451        let mut params = Vec::new();
452        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
453            params.push(self.parse_param()?);
454            while self.eat(&TokenKind::Comma) {
455                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
456                params.push(self.parse_param()?);
457            }
458        }
459        self.expect(&TokenKind::RParen, "after params")?;
460        self.expect(&TokenKind::Arrow, "before return type")?;
461        let effects = self.parse_effects()?;
462        let return_type = self.parse_type_expr()?;
463        let examples = self.parse_examples_block()?;
464        let body = self.parse_block()?;
465        Ok(FnDecl { name, type_params, params, effects, return_type, body, examples, leading_comments: Vec::new() })
466    }
467
468    /// Parse an optional `examples { call(a, b) => expected, ... }` block
469    /// sitting between the return type and the body (#369). Returns an
470    /// empty vec when no block is present.
471    fn parse_examples_block(&mut self) -> Result<Vec<Example>, ParseError> {
472        // Contextual: not a reserved keyword. Peek for the literal
473        // identifier `examples` followed by `{`; otherwise no block.
474        let is_examples_kw = matches!(
475            self.peek_skip_newlines(),
476            Some(TokenKind::Ident(s)) if s == "examples"
477        );
478        if !is_examples_kw {
479            return Ok(Vec::new());
480        }
481        self.skip_newlines();
482        self.bump(); // consume `examples`
483        self.expect(&TokenKind::LBrace, "after `examples`")?;
484        let mut cases = Vec::new();
485        loop {
486            self.skip_newlines();
487            if matches!(self.peek(), Some(TokenKind::RBrace)) { break; }
488            let call = self.parse_expr()?;
489            self.expect(&TokenKind::FatArrow, "in example case (between call and expected)")?;
490            let expected = self.parse_expr()?;
491            let (args, _) = match call {
492                Expr::Call { callee: _, args } => (args, ()),
493                other => return Err(self.error(
494                    format!("example case must be a call to the function under definition; got {other:?}")
495                )),
496            };
497            cases.push(Example { args, expected });
498            self.skip_newlines();
499            if !self.eat(&TokenKind::Comma) {
500                self.skip_newlines();
501                break;
502            }
503        }
504        self.expect(&TokenKind::RBrace, "to close examples block")?;
505        Ok(cases)
506    }
507
508    fn parse_param(&mut self) -> Result<Param, ParseError> {
509        let name = if matches!(self.peek_skip_newlines(), Some(TokenKind::Underscore)) {
510            self.skip_newlines();
511            self.bump();
512            self.discard_counter += 1;
513            format!("__lex_discard_{}", self.discard_counter)
514        } else {
515            self.expect_ident("for parameter name")?
516        };
517        self.expect(&TokenKind::ColonColon, "after parameter name")?;
518        let ty = self.parse_type_expr()?;
519        Ok(Param { name, ty })
520    }
521
522    fn parse_effects(&mut self) -> Result<Vec<Effect>, ParseError> {
523        if !self.eat(&TokenKind::LBracket) {
524            return Ok(Vec::new());
525        }
526        let mut out = Vec::new();
527        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) {
528            out.push(self.parse_effect()?);
529            while self.eat(&TokenKind::Comma) {
530                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
531                out.push(self.parse_effect()?);
532            }
533        }
534        self.expect(&TokenKind::RBracket, "after effects")?;
535        Ok(out)
536    }
537
538    fn parse_effect(&mut self) -> Result<Effect, ParseError> {
539        let name = self.expect_ident("for effect name")?;
540        let arg = if self.eat(&TokenKind::LParen) {
541            let arg = match self.bump().map(|t| t.kind) {
542                Some(TokenKind::Str(s)) => EffectArg::Str(s),
543                Some(TokenKind::Int(n)) => EffectArg::Int(n),
544                Some(TokenKind::Ident(s)) => EffectArg::Ident(s),
545                other => return Err(self.error(format!("invalid effect arg: {other:?}"))),
546            };
547            self.expect(&TokenKind::RParen, "after effect arg")?;
548            Some(arg)
549        } else {
550            None
551        };
552        Ok(Effect { name, arg })
553    }
554
555    // --- blocks and statements ---
556
557    fn parse_block(&mut self) -> Result<Block, ParseError> {
558        self.expect(&TokenKind::LBrace, "before block")?;
559        let mut statements = Vec::new();
560        let result;
561        loop {
562            self.skip_newlines();
563            if matches!(self.peek(), Some(TokenKind::RBrace)) {
564                // Empty block: synthesize Unit literal.
565                result = Box::new(Expr::Lit(Literal::Unit));
566                break;
567            }
568            // Try parsing a let; otherwise an expression.
569            if matches!(self.peek(), Some(TokenKind::Let)) {
570                let stmt = self.parse_let_statement()?;
571                statements.push(stmt);
572                self.skip_newlines();
573                continue;
574            }
575            let expr = self.parse_expr()?;
576            self.skip_newlines();
577            // If the next token is `}`, this expression is the block's result.
578            if matches!(self.peek(), Some(TokenKind::RBrace)) {
579                result = Box::new(expr);
580                break;
581            }
582            statements.push(Statement::Expr(expr));
583        }
584        self.expect(&TokenKind::RBrace, "to close block")?;
585        Ok(Block { statements, result })
586    }
587
588    fn parse_let_statement(&mut self) -> Result<Statement, ParseError> {
589        self.expect(&TokenKind::Let, "in let")?;
590        // `let _ := expr` is the discard idiom (#200). The RHS is
591        // still evaluated for its effect, but the result is bound
592        // to a synthetic name nothing else references — so the
593        // type-checker / VM treat it like a normal let, but user
594        // code can't accidentally reach it.
595        let name = if matches!(self.peek_skip_newlines(), Some(TokenKind::Underscore)) {
596            self.skip_newlines();
597            self.bump();
598            self.discard_counter += 1;
599            format!("__lex_discard_{}", self.discard_counter)
600        } else {
601            self.expect_ident("after `let`")?
602        };
603        let ty = if self.eat(&TokenKind::ColonColon) {
604            Some(self.parse_type_expr()?)
605        } else {
606            None
607        };
608        self.expect(&TokenKind::ColonEq, "in let")?;
609        let value = self.parse_expr()?;
610        Ok(Statement::Let { name, ty, value })
611    }
612
613    // --- expressions ---
614
615    fn parse_expr(&mut self) -> Result<Expr, ParseError> {
616        // Recursion gate: every nested expression — match arms,
617        // tuple/list/record/block elements, function args, etc. —
618        // enters here, so this is the right place to bound depth.
619        // Decrement happens whether the inner call succeeds or fails.
620        if self.depth >= MAX_DEPTH {
621            return Err(ParseError {
622                pos: self.current_pos(),
623                msg: format!(
624                    "expression nests too deeply (max {MAX_DEPTH}); \
625                     malformed or hand-crafted input?"),
626            });
627        }
628        self.depth += 1;
629        let r = self.parse_expr_inner();
630        self.depth -= 1;
631        r
632    }
633
634    fn parse_expr_inner(&mut self) -> Result<Expr, ParseError> {
635        // Pipes are left-associative and bind less tightly than binary ops.
636        let mut left = self.parse_binary_expr(0)?;
637        while matches!(self.peek_skip_newlines(), Some(TokenKind::Pipe)) {
638            self.skip_newlines();
639            self.bump();
640            let right = self.parse_binary_expr(0)?;
641            left = Expr::Pipe { left: Box::new(left), right: Box::new(right) };
642        }
643        Ok(left)
644    }
645
646    fn parse_binary_expr(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
647        let mut lhs = self.parse_unary()?;
648        loop {
649            let op = match self.peek_binop() {
650                Some(op) if op.precedence() >= min_prec => op,
651                _ => break,
652            };
653            self.skip_newlines();
654            self.bump();
655            let rhs = self.parse_binary_expr(op.precedence() + 1)?;
656            lhs = Expr::BinOp { op, lhs: Box::new(lhs), rhs: Box::new(rhs) };
657        }
658        Ok(lhs)
659    }
660
661    fn peek_binop(&mut self) -> Option<BinOp> {
662        match self.peek_skip_newlines()? {
663            TokenKind::Plus => Some(BinOp::Add),
664            TokenKind::Minus => Some(BinOp::Sub),
665            TokenKind::Star => Some(BinOp::Mul),
666            TokenKind::Slash => Some(BinOp::Div),
667            TokenKind::Percent => Some(BinOp::Mod),
668            TokenKind::EqEq => Some(BinOp::Eq),
669            TokenKind::BangEq => Some(BinOp::Neq),
670            TokenKind::Lt => Some(BinOp::Lt),
671            TokenKind::LtEq => Some(BinOp::Lte),
672            TokenKind::Gt => Some(BinOp::Gt),
673            TokenKind::GtEq => Some(BinOp::Gte),
674            TokenKind::And => Some(BinOp::And),
675            TokenKind::Or => Some(BinOp::Or),
676            _ => None,
677        }
678    }
679
680    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
681        self.skip_newlines();
682        match self.peek() {
683            Some(TokenKind::Not) => {
684                self.bump();
685                let inner = self.parse_unary()?;
686                Ok(Expr::UnaryOp { op: UnaryOp::Not, expr: Box::new(inner) })
687            }
688            Some(TokenKind::Minus) => {
689                self.bump();
690                let inner = self.parse_unary()?;
691                Ok(Expr::UnaryOp { op: UnaryOp::Neg, expr: Box::new(inner) })
692            }
693            _ => self.parse_postfix(),
694        }
695    }
696
697    fn parse_postfix(&mut self) -> Result<Expr, ParseError> {
698        let mut expr = self.parse_primary()?;
699        loop {
700            // Postfix operations don't cross newlines (they bind tightly).
701            match self.peek() {
702                Some(TokenKind::Dot) => {
703                    self.bump();
704                    let field = self.expect_ident("after `.`")?;
705                    expr = Expr::Field { value: Box::new(expr), field };
706                }
707                Some(TokenKind::LParen) => {
708                    self.bump();
709                    let mut args = Vec::new();
710                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
711                        args.push(self.parse_expr()?);
712                        while self.eat(&TokenKind::Comma) {
713                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
714                            args.push(self.parse_expr()?);
715                        }
716                    }
717                    self.expect(&TokenKind::RParen, "in call")?;
718                    expr = Expr::Call { callee: Box::new(expr), args };
719                }
720                Some(TokenKind::Question) => {
721                    self.bump();
722                    expr = Expr::Try(Box::new(expr));
723                }
724                _ => break,
725            }
726        }
727        Ok(expr)
728    }
729
730    fn parse_primary(&mut self) -> Result<Expr, ParseError> {
731        self.skip_newlines();
732        match self.peek() {
733            Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
734                TokenKind::Int(n) => Ok(Expr::Lit(Literal::Int(n))),
735                _ => unreachable!(),
736            },
737            Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
738                TokenKind::Float(n) => Ok(Expr::Lit(Literal::Float(n))),
739                _ => unreachable!(),
740            },
741            Some(TokenKind::Str(_)) => match self.bump().unwrap().kind {
742                TokenKind::Str(s) => Ok(Expr::Lit(Literal::Str(s))),
743                _ => unreachable!(),
744            },
745            Some(TokenKind::Bytes(_)) => match self.bump().unwrap().kind {
746                TokenKind::Bytes(b) => Ok(Expr::Lit(Literal::Bytes(b))),
747                _ => unreachable!(),
748            },
749            Some(TokenKind::True) => { self.bump(); Ok(Expr::Lit(Literal::Bool(true))) }
750            Some(TokenKind::False) => { self.bump(); Ok(Expr::Lit(Literal::Bool(false))) }
751            Some(TokenKind::If) => self.parse_if(),
752            Some(TokenKind::Match) => self.parse_match(),
753            Some(TokenKind::Fn) => self.parse_lambda(),
754            Some(TokenKind::LBrace) => self.parse_brace_expr(),
755            Some(TokenKind::LBracket) => self.parse_list_literal(),
756            Some(TokenKind::LParen) => self.parse_paren_or_tuple(),
757            Some(TokenKind::Ident(_)) => self.parse_ident_or_record(),
758            other => Err(self.error(format!("expected expression, got {other:?}"))),
759        }
760    }
761
762    /// Disambiguate `{` between record literal and block.
763    /// Lookahead: `{ Ident :` is a record literal; `{ }` is also a record
764    /// (empty block has no use). Anything else is a block.
765    fn parse_brace_expr(&mut self) -> Result<Expr, ParseError> {
766        // Save position; peek 2-3 tokens past `{` (skipping newlines).
767        let saved = self.idx;
768        self.bump(); // `{`
769        // Skip newlines.
770        while matches!(self.peek(), Some(TokenKind::Newline) | Some(TokenKind::Semi)) {
771            self.idx += 1;
772        }
773        let is_record = matches!(self.peek(), Some(TokenKind::RBrace))
774            || (matches!(self.peek(), Some(TokenKind::Ident(_)))
775                && matches!(self.tokens.get(self.idx + 1).map(|t| &t.kind), Some(TokenKind::Colon) | Some(TokenKind::Comma) | Some(TokenKind::RBrace)));
776        self.idx = saved;
777        if is_record {
778            self.parse_record_literal()
779        } else {
780            Ok(Expr::Block(self.parse_block()?))
781        }
782    }
783
784    fn parse_record_literal(&mut self) -> Result<Expr, ParseError> {
785        self.expect(&TokenKind::LBrace, "in record literal")?;
786        let mut fields = Vec::new();
787        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
788            loop {
789                self.skip_newlines();
790                let name = self.expect_ident("in record literal")?;
791                let value = if self.eat(&TokenKind::Colon) {
792                    self.parse_expr()?
793                } else {
794                    // shorthand: `{ name }` => `{ name: name }`
795                    Expr::Var(name.clone())
796                };
797                fields.push(RecordLitField { name, value });
798                self.skip_newlines();
799                if !self.eat(&TokenKind::Comma) { break; }
800                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
801            }
802        }
803        self.expect(&TokenKind::RBrace, "after record literal")?;
804        Ok(Expr::RecordLit(fields))
805    }
806
807    fn parse_if(&mut self) -> Result<Expr, ParseError> {
808        self.expect(&TokenKind::If, "in if")?;
809        let cond = self.parse_expr()?;
810        let then_block = self.parse_block()?;
811        self.expect(&TokenKind::Else, "expected `else`")?;
812        let else_block = self.parse_block()?;
813        Ok(Expr::If { cond: Box::new(cond), then_block, else_block })
814    }
815
816    fn parse_match(&mut self) -> Result<Expr, ParseError> {
817        self.expect(&TokenKind::Match, "in match")?;
818        let scrutinee = self.parse_expr()?;
819        self.expect(&TokenKind::LBrace, "before match arms")?;
820        let mut arms = Vec::new();
821        loop {
822            self.skip_newlines();
823            if matches!(self.peek(), Some(TokenKind::RBrace)) { break; }
824            let pattern = self.parse_pattern()?;
825            self.expect(&TokenKind::FatArrow, "in match arm")?;
826            let body = self.parse_expr()?;
827            arms.push(Arm { pattern, body });
828            self.skip_newlines();
829            if !self.eat(&TokenKind::Comma) { break; }
830        }
831        self.expect(&TokenKind::RBrace, "after match arms")?;
832        Ok(Expr::Match { scrutinee: Box::new(scrutinee), arms })
833    }
834
835    fn parse_lambda(&mut self) -> Result<Expr, ParseError> {
836        self.expect(&TokenKind::Fn, "in lambda")?;
837        self.expect(&TokenKind::LParen, "before lambda params")?;
838        let mut params = Vec::new();
839        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
840            params.push(self.parse_param()?);
841            while self.eat(&TokenKind::Comma) {
842                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
843                params.push(self.parse_param()?);
844            }
845        }
846        self.expect(&TokenKind::RParen, "after lambda params")?;
847        self.expect(&TokenKind::Arrow, "before lambda return type")?;
848        let effects = self.parse_effects()?;
849        let return_type = self.parse_type_expr()?;
850        let body = self.parse_block()?;
851        Ok(Expr::Lambda(Box::new(Lambda { params, effects, return_type, body })))
852    }
853
854    fn parse_list_literal(&mut self) -> Result<Expr, ParseError> {
855        self.expect(&TokenKind::LBracket, "before list literal")?;
856        let mut items = Vec::new();
857        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) {
858            items.push(self.parse_expr()?);
859            while self.eat(&TokenKind::Comma) {
860                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
861                items.push(self.parse_expr()?);
862            }
863        }
864        self.expect(&TokenKind::RBracket, "after list literal")?;
865        Ok(Expr::ListLit(items))
866    }
867
868    fn parse_paren_or_tuple(&mut self) -> Result<Expr, ParseError> {
869        self.expect(&TokenKind::LParen, "")?;
870        if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
871            self.bump();
872            return Ok(Expr::Lit(Literal::Unit));
873        }
874        let first = self.parse_expr()?;
875        // Inline type ascription: `(expr :: Type)` — peek for `::` before
876        // deciding whether this is a tuple, a grouping, or an ascription.
877        if self.eat(&TokenKind::ColonColon) {
878            let ty = self.parse_type_expr()?;
879            self.expect(&TokenKind::RParen, "after type ascription")?;
880            return Ok(Expr::Ascription { value: Box::new(first), ty });
881        }
882        if self.eat(&TokenKind::Comma) {
883            let mut items = vec![first];
884            if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
885                items.push(self.parse_expr()?);
886                while self.eat(&TokenKind::Comma) {
887                    if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
888                    items.push(self.parse_expr()?);
889                }
890            }
891            self.expect(&TokenKind::RParen, "after tuple")?;
892            Ok(Expr::TupleLit(items))
893        } else {
894            self.expect(&TokenKind::RParen, "after parenthesized expression")?;
895            Ok(first)
896        }
897    }
898
899    fn parse_ident_or_record(&mut self) -> Result<Expr, ParseError> {
900        // Ident is parsed as a Var; later postfix (`(`, `.`, `?`) attach.
901        let name = self.expect_ident("")?;
902        Ok(Expr::Var(name))
903    }
904
905    // --- patterns ---
906
907    fn parse_pattern(&mut self) -> Result<Pattern, ParseError> {
908        self.skip_newlines();
909        match self.peek() {
910            Some(TokenKind::Minus) => {
911                self.bump();
912                self.skip_newlines();
913                match self.peek() {
914                    Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
915                        TokenKind::Int(n) => Ok(Pattern::Lit(Literal::Int(-n))),
916                        _ => unreachable!(),
917                    },
918                    Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
919                        TokenKind::Float(n) => Ok(Pattern::Lit(Literal::Float(-n))),
920                        _ => unreachable!(),
921                    },
922                    other => Err(self.error(format!("expected Int or Float after `-` in pattern, got {other:?}"))),
923                }
924            }
925            Some(TokenKind::Underscore) => { self.bump(); Ok(Pattern::Wild) }
926            Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
927                TokenKind::Int(n) => Ok(Pattern::Lit(Literal::Int(n))),
928                _ => unreachable!(),
929            },
930            Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
931                TokenKind::Float(n) => Ok(Pattern::Lit(Literal::Float(n))),
932                _ => unreachable!(),
933            },
934            Some(TokenKind::Str(_)) => match self.bump().unwrap().kind {
935                TokenKind::Str(s) => Ok(Pattern::Lit(Literal::Str(s))),
936                _ => unreachable!(),
937            },
938            Some(TokenKind::True) => { self.bump(); Ok(Pattern::Lit(Literal::Bool(true))) }
939            Some(TokenKind::False) => { self.bump(); Ok(Pattern::Lit(Literal::Bool(false))) }
940            Some(TokenKind::LBrace) => self.parse_record_pattern(),
941            Some(TokenKind::LParen) => self.parse_tuple_pattern(),
942            Some(TokenKind::Ident(_)) => {
943                let mut name = self.expect_ident("")?;
944                // Handle module-qualified constructor patterns: `module.Constructor(args)`.
945                // Strip the qualifier and keep only the final name, matching how the
946                // compiler emits MakeVariant with the unqualified constructor name.
947                while matches!(self.peek(), Some(TokenKind::Dot)) {
948                    self.bump();
949                    name = self.expect_ident("after `.` in qualified pattern")?;
950                }
951                if matches!(self.peek(), Some(TokenKind::LParen)) {
952                    self.bump();
953                    let mut args = Vec::new();
954                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
955                        args.push(self.parse_pattern()?);
956                        while self.eat(&TokenKind::Comma) {
957                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
958                            args.push(self.parse_pattern()?);
959                        }
960                    }
961                    self.expect(&TokenKind::RParen, "after constructor pattern")?;
962                    Ok(Pattern::Constructor { name, args })
963                } else {
964                    Ok(Pattern::Var(name))
965                }
966            }
967            other => Err(self.error(format!("expected pattern, got {other:?}"))),
968        }
969    }
970
971    fn parse_record_pattern(&mut self) -> Result<Pattern, ParseError> {
972        self.expect(&TokenKind::LBrace, "")?;
973        let mut fields = Vec::new();
974        let rest = false;
975        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
976            loop {
977                self.skip_newlines();
978                let name = self.expect_ident("in record pattern")?;
979                let pattern = if self.eat(&TokenKind::Colon) {
980                    Some(self.parse_pattern()?)
981                } else {
982                    None
983                };
984                fields.push(RecordPatField { name, pattern });
985                self.skip_newlines();
986                if !self.eat(&TokenKind::Comma) { break; }
987                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
988            }
989        }
990        self.expect(&TokenKind::RBrace, "after record pattern")?;
991        Ok(Pattern::Record { fields, rest })
992    }
993
994    fn parse_tuple_pattern(&mut self) -> Result<Pattern, ParseError> {
995        self.expect(&TokenKind::LParen, "")?;
996        let mut items = Vec::new();
997        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
998            items.push(self.parse_pattern()?);
999            while self.eat(&TokenKind::Comma) {
1000                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
1001                items.push(self.parse_pattern()?);
1002            }
1003        }
1004        self.expect(&TokenKind::RParen, "after tuple pattern")?;
1005        if items.len() == 1 {
1006            Ok(items.into_iter().next().unwrap())
1007        } else {
1008            Ok(Pattern::Tuple(items))
1009        }
1010    }
1011}
1012
1013/// In a union RHS, every leaf must be a `Named` type expression — that is, a
1014/// PascalCase ident with optional payload via `Variant(payload_type)`.
1015fn type_to_variant(t: TypeExpr) -> Result<UnionVariant, ParseError> {
1016    match t {
1017        TypeExpr::Named { name, args } => {
1018            let payload = match args.len() {
1019                0 => None,
1020                1 => Some(args.into_iter().next().unwrap()),
1021                _ => Some(TypeExpr::Tuple(args)),
1022            };
1023            Ok(UnionVariant { name, payload })
1024        }
1025        // `Foo({ field :: T })` parses as Named with one arg = Record. handled above.
1026        _ => Err(ParseError {
1027            pos: 0,
1028            msg: "union variant must be a constructor name".into(),
1029        }),
1030    }
1031}
1032
1033/// Attach a collected list of `#` comments to whichever top-level
1034/// item variant carries them. Empty input is a no-op; the per-variant
1035/// `leading_comments: Vec<String>` field is always present.
1036fn attach_leading_comments(item: &mut Item, comments: Vec<String>) {
1037    if comments.is_empty() {
1038        return;
1039    }
1040    match item {
1041        Item::Import(i) => i.leading_comments = comments,
1042        Item::TypeDecl(t) => t.leading_comments = comments,
1043        Item::FnDecl(f) => f.leading_comments = comments,
1044    }
1045}