Skip to main content

lex_syntax/
parser.rs

1//! Recursive-descent parser for Lex. Pratt-style precedence climbing for
2//! binary operators; everything else is straightforward LL(1)-with-lookahead.
3
4use crate::syntax::*;
5use crate::token::{Token, TokenKind};
6
7pub fn parse(tokens: Vec<Token>) -> Result<Program, ParseError> {
8    // Back-compat entry: no source available, so leading comments are
9    // not recovered. `parse_source` in `lib.rs` calls
10    // `parse_with_src` directly and is the path that preserves them.
11    parse_with_src("", tokens)
12}
13
14/// Parse + attach `#` line-comments to the AST. The source string is
15/// used only to scan the gaps between tokens (where the lexer skipped
16/// whitespace and comments); the parser itself still operates purely
17/// on tokens. See `Program::leading_comments` for the data model.
18pub fn parse_with_src(src: &str, tokens: Vec<Token>) -> Result<Program, ParseError> {
19    let mut p = Parser::new(src, tokens);
20    let program = p.parse_program()?;
21    p.skip_newlines();
22    if !p.at_eof() {
23        return Err(p.error("unexpected token after program"));
24    }
25    Ok(program)
26}
27
28#[derive(Debug, thiserror::Error)]
29#[error("parse error at byte {pos}: {msg}")]
30pub struct ParseError {
31    pub pos: usize,
32    pub msg: String,
33}
34
35struct Parser<'a> {
36    /// Source text — needed only for trivia recovery (line comments
37    /// in the gaps between tokens). Empty when called through the
38    /// legacy `parse(tokens)` entry; comments are silently dropped
39    /// in that path.
40    src: &'a str,
41    tokens: Vec<Token>,
42    idx: usize,
43    /// Recursion depth across `parse_expr`. Capped at `MAX_DEPTH`
44    /// to defend against adversarial input like a long sequence of
45    /// `[[[{{{...` that would otherwise blow the stack. Found by
46    /// the libFuzzer parser target — see `fuzz/fuzz_targets/parser.rs`.
47    depth: u32,
48    /// Counter for `let _ := ...` discard bindings (#200). Each
49    /// discard gets a unique synthetic name so multiple `let _`
50    /// in the same scope shadow rather than collide. The names
51    /// aren't expressible in user syntax (`__lex_discard_N`),
52    /// so user code can't reference them by accident.
53    discard_counter: u32,
54}
55
56/// Maximum nesting depth the parser will accept before refusing
57/// with a parse error. Real Lex code rarely exceeds 30; 96 leaves
58/// generous headroom for legitimate generated code.
59///
60/// Each `parse_expr` level produces ~4-5 stack frames through the
61/// `parse_binary_expr → parse_unary_expr → parse_postfix →
62/// parse_primary → ...` chain, so this caps the actual frame
63/// count around 400-500 — well below even a 2 MiB test stack.
64const MAX_DEPTH: u32 = 96;
65
66impl<'a> Parser<'a> {
67    fn new(src: &'a str, tokens: Vec<Token>) -> Self {
68        Self { src, tokens, idx: 0, depth: 0, discard_counter: 0 }
69    }
70
71    /// Extract `#` line-comments from the source byte range
72    /// `start..end`. The range must be a "gap" between two tokens
73    /// (or between source-start/end and a token); by construction
74    /// such gaps contain only whitespace and `#` comments — string
75    /// contents never appear because the lexer's logos rules would
76    /// have produced a `Str(...)` token covering them.
77    ///
78    /// Each returned entry is a single source line, trimmed of leading
79    /// whitespace (the `#` and everything after it preserved) and of
80    /// trailing whitespace. Blank lines between consecutive comments
81    /// are dropped — preserving inter-comment blank lines is left to
82    /// a follow-up; the bug this addresses (#417) is about comments
83    /// disappearing entirely.
84    fn extract_comments(&self, start: usize, end: usize) -> Vec<String> {
85        if start >= end || end > self.src.len() {
86            return Vec::new();
87        }
88        self.src[start..end]
89            .lines()
90            .filter_map(|line| {
91                let trimmed = line.trim_start();
92                if trimmed.starts_with('#') {
93                    Some(trimmed.trim_end().to_string())
94                } else {
95                    None
96                }
97            })
98            .collect()
99    }
100
101    fn at_eof(&self) -> bool {
102        self.idx >= self.tokens.len()
103    }
104
105    fn peek(&self) -> Option<&TokenKind> {
106        self.tokens.get(self.idx).map(|t| &t.kind)
107    }
108
109    fn bump(&mut self) -> Option<Token> {
110        let t = self.tokens.get(self.idx).cloned();
111        if t.is_some() {
112            self.idx += 1;
113        }
114        t
115    }
116
117    fn current_pos(&self) -> usize {
118        self.tokens
119            .get(self.idx)
120            .map(|t| t.span.start)
121            .unwrap_or_else(|| self.tokens.last().map(|t| t.span.end).unwrap_or(0))
122    }
123
124    fn error(&self, msg: impl Into<String>) -> ParseError {
125        ParseError { pos: self.current_pos(), msg: msg.into() }
126    }
127
128    fn skip_newlines(&mut self) {
129        while matches!(self.peek(), Some(TokenKind::Newline) | Some(TokenKind::Semi)) {
130            self.idx += 1;
131        }
132    }
133
134    fn expect(&mut self, expected: &TokenKind, ctx: &str) -> Result<Token, ParseError> {
135        self.skip_newlines();
136        match self.peek() {
137            Some(k) if std::mem::discriminant(k) == std::mem::discriminant(expected) => {
138                Ok(self.bump().unwrap())
139            }
140            Some(other) => Err(self.error(format!(
141                "expected {expected:?} {ctx}, got {other:?}"
142            ))),
143            None => Err(self.error(format!("expected {expected:?} {ctx}, got EOF"))),
144        }
145    }
146
147    fn eat(&mut self, k: &TokenKind) -> bool {
148        self.skip_newlines();
149        if let Some(cur) = self.peek() {
150            if std::mem::discriminant(cur) == std::mem::discriminant(k) {
151                self.bump();
152                return true;
153            }
154        }
155        false
156    }
157
158    fn expect_ident(&mut self, ctx: &str) -> Result<String, ParseError> {
159        self.skip_newlines();
160        match self.peek() {
161            Some(TokenKind::Ident(_)) => match self.bump().unwrap().kind {
162                TokenKind::Ident(name) => Ok(name),
163                _ => unreachable!(),
164            },
165            other => Err(self.error(format!("expected identifier {ctx}, got {other:?}"))),
166        }
167    }
168
169    // --- top level ---
170
171    fn parse_program(&mut self) -> Result<Program, ParseError> {
172        let mut items = Vec::new();
173        let mut leading_comments: Vec<String> = Vec::new();
174        // Byte offset of the end of the last consumed token (or 0 at
175        // start of file). The next gap to scan for comments is from
176        // here up to the start of the upcoming item's first token.
177        let mut gap_start: usize = 0;
178        loop {
179            self.skip_newlines();
180            if self.at_eof() {
181                break;
182            }
183            let item_start = self.tokens[self.idx].span.start;
184            let gap_comments = self.extract_comments(gap_start, item_start);
185            let pending_comments = if items.is_empty() {
186                leading_comments = gap_comments;
187                Vec::new()
188            } else {
189                gap_comments
190            };
191            let mut item = self.parse_item()?;
192            if !pending_comments.is_empty() {
193                attach_leading_comments(&mut item, pending_comments);
194            }
195            gap_start = self
196                .tokens
197                .get(self.idx.saturating_sub(1))
198                .map(|t| t.span.end)
199                .unwrap_or(gap_start);
200            items.push(item);
201        }
202        // Trailing comments live after the last consumed token (or
203        // span the whole file when there are no items).
204        let trailing_comments = self.extract_comments(gap_start, self.src.len());
205        Ok(Program { items, leading_comments, trailing_comments })
206    }
207
208    fn parse_item(&mut self) -> Result<Item, ParseError> {
209        match self.peek() {
210            Some(TokenKind::Import) => self.parse_import().map(Item::Import),
211            Some(TokenKind::Type) => self.parse_type_decl().map(Item::TypeDecl),
212            Some(TokenKind::Fn) => self.parse_fn_decl().map(Item::FnDecl),
213            other => Err(self.error(format!(
214                "expected `import`, `type`, or `fn` at top level, got {other:?}"
215            ))),
216        }
217    }
218
219    fn parse_import(&mut self) -> Result<Import, ParseError> {
220        self.expect(&TokenKind::Import, "in import")?;
221        let reference = match self.bump().map(|t| t.kind) {
222            Some(TokenKind::Str(s)) => s,
223            other => return Err(self.error(format!("expected string after `import`, got {other:?}"))),
224        };
225        self.expect(&TokenKind::As, "in import")?;
226        let alias = self.expect_ident("for import alias")?;
227        Ok(Import { reference, alias, leading_comments: Vec::new() })
228    }
229
230    fn parse_type_decl(&mut self) -> Result<TypeDecl, ParseError> {
231        self.expect(&TokenKind::Type, "in type decl")?;
232        let name = self.expect_ident("for type name")?;
233        let params = if self.eat(&TokenKind::LBracket) {
234            let ps = self.parse_ident_list()?;
235            self.expect(&TokenKind::RBracket, "after type params")?;
236            ps
237        } else {
238            Vec::new()
239        };
240        self.expect(&TokenKind::Eq, "in type decl")?;
241        let definition = self.parse_type_decl_rhs()?;
242        Ok(TypeDecl { name, params, definition, leading_comments: Vec::new() })
243    }
244
245    fn parse_ident_list(&mut self) -> Result<Vec<String>, ParseError> {
246        let mut out = Vec::new();
247        out.push(self.expect_ident("in identifier list")?);
248        while self.eat(&TokenKind::Comma) {
249            if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
250            out.push(self.expect_ident("in identifier list")?);
251        }
252        Ok(out)
253    }
254
255    /// `type Foo = Variant1 | Variant2(Payload)` is a union; otherwise a plain type expression.
256    fn parse_type_decl_rhs(&mut self) -> Result<TypeExpr, ParseError> {
257        let first = self.parse_type_expr()?;
258        // Detect union: PascalCase ident (or named type w/ optional payload) followed by `|`.
259        if matches!(self.peek_skip_newlines(), Some(TokenKind::Bar)) {
260            let mut variants = Vec::new();
261            variants.push(type_to_variant(first)?);
262            while self.eat(&TokenKind::Bar) {
263                let next = self.parse_type_expr()?;
264                variants.push(type_to_variant(next)?);
265            }
266            Ok(TypeExpr::Union(variants))
267        } else {
268            Ok(first)
269        }
270    }
271
272    fn peek_skip_newlines(&mut self) -> Option<TokenKind> {
273        let saved = self.idx;
274        self.skip_newlines();
275        let out = self.peek().cloned();
276        self.idx = saved;
277        out
278    }
279
280    fn parse_type_expr(&mut self) -> Result<TypeExpr, ParseError> {
281        let base = self.parse_type_expr_base()?;
282        self.maybe_wrap_refinement(base)
283    }
284
285    fn parse_type_expr_base(&mut self) -> Result<TypeExpr, ParseError> {
286        self.skip_newlines();
287        match self.peek() {
288            Some(TokenKind::LBrace) => self.parse_record_type(),
289            Some(TokenKind::LParen) => self.parse_paren_type_or_function(),
290            Some(TokenKind::Ident(_)) => {
291                let mut name = self.expect_ident("in type expr")?;
292                // Module-qualified type: `m.Type` or `m.n.Type`. We accept
293                // dotted names here and let the loader rewrite them to the
294                // file-local mangled form. After the loader pass, all type
295                // names referenced by the type checker are single segments.
296                while matches!(self.peek(), Some(TokenKind::Dot)) {
297                    self.bump();
298                    let next = self.expect_ident("after `.` in qualified type")?;
299                    name.push('.');
300                    name.push_str(&next);
301                }
302                let args = if matches!(self.peek(), Some(TokenKind::LBracket)) {
303                    self.bump();
304                    let mut args = Vec::new();
305                    args.push(self.parse_type_expr()?);
306                    while self.eat(&TokenKind::Comma) {
307                        if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
308                        args.push(self.parse_type_expr()?);
309                    }
310                    self.expect(&TokenKind::RBracket, "after type args")?;
311                    args
312                } else if matches!(self.peek(), Some(TokenKind::LParen)) {
313                    // Constructor type with payload: `Name(T)` or `Name(T1, T2)`.
314                    self.bump();
315                    let mut args = Vec::new();
316                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
317                        args.push(self.parse_type_expr()?);
318                        while self.eat(&TokenKind::Comma) {
319                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
320                            args.push(self.parse_type_expr()?);
321                        }
322                    }
323                    self.expect(&TokenKind::RParen, "after constructor payload")?;
324                    args
325                } else {
326                    Vec::new()
327                };
328                Ok(TypeExpr::Named { name, args })
329            }
330            other => Err(self.error(format!("expected type expression, got {other:?}"))),
331        }
332    }
333
334    /// Refinement type postfix (#209): `BaseType{binding | predicate}`.
335    ///
336    /// Disambiguates from a function body's opening brace by peeking
337    /// three tokens ahead — refinement requires `{ Ident |`, a body
338    /// begins with `{ <expr-starting-token>`. This means a refinement
339    /// binding name can't start with `|`, but that's fine since
340    /// identifiers don't.
341    fn maybe_wrap_refinement(&mut self, base: TypeExpr) -> Result<TypeExpr, ParseError> {
342        let next0 = self.tokens.get(self.idx).map(|t| &t.kind);
343        let next1 = self.tokens.get(self.idx + 1).map(|t| &t.kind);
344        let next2 = self.tokens.get(self.idx + 2).map(|t| &t.kind);
345        let is_refinement_lookahead = matches!(next0, Some(TokenKind::LBrace))
346            && matches!(next1, Some(TokenKind::Ident(_)))
347            && matches!(next2, Some(TokenKind::Bar));
348        if !is_refinement_lookahead {
349            return Ok(base);
350        }
351        self.bump(); // `{`
352        let binding = self.expect_ident("for refinement binding")?;
353        self.expect(&TokenKind::Bar, "after refinement binding")?;
354        let predicate = self.parse_expr()?;
355        self.expect(&TokenKind::RBrace, "to close refinement")?;
356        Ok(TypeExpr::Refined {
357            base: Box::new(base),
358            binding,
359            predicate: Box::new(predicate),
360        })
361    }
362
363    fn parse_record_type(&mut self) -> Result<TypeExpr, ParseError> {
364        self.expect(&TokenKind::LBrace, "in record type")?;
365        let mut fields = Vec::new();
366        let mut spreads = Vec::new();
367        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
368            loop {
369                self.skip_newlines();
370                if matches!(self.peek(), Some(TokenKind::DotDotDot)) {
371                    self.bump(); // consume `...`
372                    let name = self.expect_ident("after `...` in record type spread")?;
373                    spreads.push(name);
374                } else {
375                    let name = self.expect_ident("in record field")?;
376                    self.expect(&TokenKind::ColonColon, "after record field name")?;
377                    let ty = self.parse_type_expr()?;
378                    fields.push(TypeField { name, ty });
379                }
380                self.skip_newlines();
381                if !self.eat(&TokenKind::Comma) { break; }
382                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
383            }
384        }
385        self.expect(&TokenKind::RBrace, "in record type")?;
386        if spreads.is_empty() {
387            Ok(TypeExpr::Record(fields))
388        } else {
389            Ok(TypeExpr::RecordWithSpreads { spreads, fields })
390        }
391    }
392
393    fn parse_paren_type_or_function(&mut self) -> Result<TypeExpr, ParseError> {
394        self.expect(&TokenKind::LParen, "in type")?;
395        let mut args = Vec::new();
396        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
397            args.push(self.parse_type_expr()?);
398            while self.eat(&TokenKind::Comma) {
399                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
400                args.push(self.parse_type_expr()?);
401            }
402        }
403        self.expect(&TokenKind::RParen, "in type")?;
404        // Function type if followed by `->`.
405        if matches!(self.peek_skip_newlines(), Some(TokenKind::Arrow)) {
406            self.skip_newlines();
407            self.bump();
408            let effects = self.parse_effects()?;
409            let ret = self.parse_type_expr()?;
410            Ok(TypeExpr::Function {
411                params: args,
412                effects,
413                ret: Box::new(ret),
414            })
415        } else if args.len() == 1 {
416            // Parenthesized type expression.
417            Ok(args.into_iter().next().unwrap())
418        } else {
419            Ok(TypeExpr::Tuple(args))
420        }
421    }
422
423    fn parse_fn_decl(&mut self) -> Result<FnDecl, ParseError> {
424        self.expect(&TokenKind::Fn, "in fn decl")?;
425        let name = self.expect_ident("for function name")?;
426        let type_params = if self.eat(&TokenKind::LBracket) {
427            let ps = self.parse_ident_list()?;
428            self.expect(&TokenKind::RBracket, "after type params")?;
429            ps
430        } else {
431            Vec::new()
432        };
433        self.expect(&TokenKind::LParen, "before params")?;
434        let mut params = Vec::new();
435        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
436            params.push(self.parse_param()?);
437            while self.eat(&TokenKind::Comma) {
438                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
439                params.push(self.parse_param()?);
440            }
441        }
442        self.expect(&TokenKind::RParen, "after params")?;
443        self.expect(&TokenKind::Arrow, "before return type")?;
444        let effects = self.parse_effects()?;
445        let return_type = self.parse_type_expr()?;
446        let examples = self.parse_examples_block()?;
447        let body = self.parse_block()?;
448        Ok(FnDecl { name, type_params, params, effects, return_type, body, examples, leading_comments: Vec::new() })
449    }
450
451    /// Parse an optional `examples { call(a, b) => expected, ... }` block
452    /// sitting between the return type and the body (#369). Returns an
453    /// empty vec when no block is present.
454    fn parse_examples_block(&mut self) -> Result<Vec<Example>, ParseError> {
455        // Contextual: not a reserved keyword. Peek for the literal
456        // identifier `examples` followed by `{`; otherwise no block.
457        let is_examples_kw = matches!(
458            self.peek_skip_newlines(),
459            Some(TokenKind::Ident(s)) if s == "examples"
460        );
461        if !is_examples_kw {
462            return Ok(Vec::new());
463        }
464        self.skip_newlines();
465        self.bump(); // consume `examples`
466        self.expect(&TokenKind::LBrace, "after `examples`")?;
467        let mut cases = Vec::new();
468        loop {
469            self.skip_newlines();
470            if matches!(self.peek(), Some(TokenKind::RBrace)) { break; }
471            let call = self.parse_expr()?;
472            self.expect(&TokenKind::FatArrow, "in example case (between call and expected)")?;
473            let expected = self.parse_expr()?;
474            let (args, _) = match call {
475                Expr::Call { callee: _, args } => (args, ()),
476                other => return Err(self.error(
477                    format!("example case must be a call to the function under definition; got {other:?}")
478                )),
479            };
480            cases.push(Example { args, expected });
481            self.skip_newlines();
482            if !self.eat(&TokenKind::Comma) {
483                self.skip_newlines();
484                break;
485            }
486        }
487        self.expect(&TokenKind::RBrace, "to close examples block")?;
488        Ok(cases)
489    }
490
491    fn parse_param(&mut self) -> Result<Param, ParseError> {
492        let name = if matches!(self.peek_skip_newlines(), Some(TokenKind::Underscore)) {
493            self.skip_newlines();
494            self.bump();
495            self.discard_counter += 1;
496            format!("__lex_discard_{}", self.discard_counter)
497        } else {
498            self.expect_ident("for parameter name")?
499        };
500        self.expect(&TokenKind::ColonColon, "after parameter name")?;
501        let ty = self.parse_type_expr()?;
502        Ok(Param { name, ty })
503    }
504
505    fn parse_effects(&mut self) -> Result<Vec<Effect>, ParseError> {
506        if !self.eat(&TokenKind::LBracket) {
507            return Ok(Vec::new());
508        }
509        let mut out = Vec::new();
510        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) {
511            out.push(self.parse_effect()?);
512            while self.eat(&TokenKind::Comma) {
513                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
514                out.push(self.parse_effect()?);
515            }
516        }
517        self.expect(&TokenKind::RBracket, "after effects")?;
518        Ok(out)
519    }
520
521    fn parse_effect(&mut self) -> Result<Effect, ParseError> {
522        let name = self.expect_ident("for effect name")?;
523        let arg = if self.eat(&TokenKind::LParen) {
524            let arg = match self.bump().map(|t| t.kind) {
525                Some(TokenKind::Str(s)) => EffectArg::Str(s),
526                Some(TokenKind::Int(n)) => EffectArg::Int(n),
527                Some(TokenKind::Ident(s)) => EffectArg::Ident(s),
528                other => return Err(self.error(format!("invalid effect arg: {other:?}"))),
529            };
530            self.expect(&TokenKind::RParen, "after effect arg")?;
531            Some(arg)
532        } else {
533            None
534        };
535        Ok(Effect { name, arg })
536    }
537
538    // --- blocks and statements ---
539
540    fn parse_block(&mut self) -> Result<Block, ParseError> {
541        self.expect(&TokenKind::LBrace, "before block")?;
542        let mut statements = Vec::new();
543        let result;
544        loop {
545            self.skip_newlines();
546            if matches!(self.peek(), Some(TokenKind::RBrace)) {
547                // Empty block: synthesize Unit literal.
548                result = Box::new(Expr::Lit(Literal::Unit));
549                break;
550            }
551            // Try parsing a let; otherwise an expression.
552            if matches!(self.peek(), Some(TokenKind::Let)) {
553                let stmt = self.parse_let_statement()?;
554                statements.push(stmt);
555                self.skip_newlines();
556                continue;
557            }
558            let expr = self.parse_expr()?;
559            self.skip_newlines();
560            // If the next token is `}`, this expression is the block's result.
561            if matches!(self.peek(), Some(TokenKind::RBrace)) {
562                result = Box::new(expr);
563                break;
564            }
565            statements.push(Statement::Expr(expr));
566        }
567        self.expect(&TokenKind::RBrace, "to close block")?;
568        Ok(Block { statements, result })
569    }
570
571    fn parse_let_statement(&mut self) -> Result<Statement, ParseError> {
572        self.expect(&TokenKind::Let, "in let")?;
573        // `let _ := expr` is the discard idiom (#200). The RHS is
574        // still evaluated for its effect, but the result is bound
575        // to a synthetic name nothing else references — so the
576        // type-checker / VM treat it like a normal let, but user
577        // code can't accidentally reach it.
578        let name = if matches!(self.peek_skip_newlines(), Some(TokenKind::Underscore)) {
579            self.skip_newlines();
580            self.bump();
581            self.discard_counter += 1;
582            format!("__lex_discard_{}", self.discard_counter)
583        } else {
584            self.expect_ident("after `let`")?
585        };
586        let ty = if self.eat(&TokenKind::ColonColon) {
587            Some(self.parse_type_expr()?)
588        } else {
589            None
590        };
591        self.expect(&TokenKind::ColonEq, "in let")?;
592        let value = self.parse_expr()?;
593        Ok(Statement::Let { name, ty, value })
594    }
595
596    // --- expressions ---
597
598    fn parse_expr(&mut self) -> Result<Expr, ParseError> {
599        // Recursion gate: every nested expression — match arms,
600        // tuple/list/record/block elements, function args, etc. —
601        // enters here, so this is the right place to bound depth.
602        // Decrement happens whether the inner call succeeds or fails.
603        if self.depth >= MAX_DEPTH {
604            return Err(ParseError {
605                pos: self.current_pos(),
606                msg: format!(
607                    "expression nests too deeply (max {MAX_DEPTH}); \
608                     malformed or hand-crafted input?"),
609            });
610        }
611        self.depth += 1;
612        let r = self.parse_expr_inner();
613        self.depth -= 1;
614        r
615    }
616
617    fn parse_expr_inner(&mut self) -> Result<Expr, ParseError> {
618        // Pipes are left-associative and bind less tightly than binary ops.
619        let mut left = self.parse_binary_expr(0)?;
620        while matches!(self.peek_skip_newlines(), Some(TokenKind::Pipe)) {
621            self.skip_newlines();
622            self.bump();
623            let right = self.parse_binary_expr(0)?;
624            left = Expr::Pipe { left: Box::new(left), right: Box::new(right) };
625        }
626        Ok(left)
627    }
628
629    fn parse_binary_expr(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
630        let mut lhs = self.parse_unary()?;
631        loop {
632            let op = match self.peek_binop() {
633                Some(op) if op.precedence() >= min_prec => op,
634                _ => break,
635            };
636            self.skip_newlines();
637            self.bump();
638            let rhs = self.parse_binary_expr(op.precedence() + 1)?;
639            lhs = Expr::BinOp { op, lhs: Box::new(lhs), rhs: Box::new(rhs) };
640        }
641        Ok(lhs)
642    }
643
644    fn peek_binop(&mut self) -> Option<BinOp> {
645        match self.peek_skip_newlines()? {
646            TokenKind::Plus => Some(BinOp::Add),
647            TokenKind::Minus => Some(BinOp::Sub),
648            TokenKind::Star => Some(BinOp::Mul),
649            TokenKind::Slash => Some(BinOp::Div),
650            TokenKind::Percent => Some(BinOp::Mod),
651            TokenKind::EqEq => Some(BinOp::Eq),
652            TokenKind::BangEq => Some(BinOp::Neq),
653            TokenKind::Lt => Some(BinOp::Lt),
654            TokenKind::LtEq => Some(BinOp::Lte),
655            TokenKind::Gt => Some(BinOp::Gt),
656            TokenKind::GtEq => Some(BinOp::Gte),
657            TokenKind::And => Some(BinOp::And),
658            TokenKind::Or => Some(BinOp::Or),
659            _ => None,
660        }
661    }
662
663    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
664        self.skip_newlines();
665        match self.peek() {
666            Some(TokenKind::Not) => {
667                self.bump();
668                let inner = self.parse_unary()?;
669                Ok(Expr::UnaryOp { op: UnaryOp::Not, expr: Box::new(inner) })
670            }
671            Some(TokenKind::Minus) => {
672                self.bump();
673                let inner = self.parse_unary()?;
674                Ok(Expr::UnaryOp { op: UnaryOp::Neg, expr: Box::new(inner) })
675            }
676            _ => self.parse_postfix(),
677        }
678    }
679
680    fn parse_postfix(&mut self) -> Result<Expr, ParseError> {
681        let mut expr = self.parse_primary()?;
682        loop {
683            // Postfix operations don't cross newlines (they bind tightly).
684            match self.peek() {
685                Some(TokenKind::Dot) => {
686                    self.bump();
687                    let field = self.expect_ident("after `.`")?;
688                    expr = Expr::Field { value: Box::new(expr), field };
689                }
690                Some(TokenKind::LParen) => {
691                    self.bump();
692                    let mut args = Vec::new();
693                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
694                        args.push(self.parse_expr()?);
695                        while self.eat(&TokenKind::Comma) {
696                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
697                            args.push(self.parse_expr()?);
698                        }
699                    }
700                    self.expect(&TokenKind::RParen, "in call")?;
701                    expr = Expr::Call { callee: Box::new(expr), args };
702                }
703                Some(TokenKind::Question) => {
704                    self.bump();
705                    expr = Expr::Try(Box::new(expr));
706                }
707                _ => break,
708            }
709        }
710        Ok(expr)
711    }
712
713    fn parse_primary(&mut self) -> Result<Expr, ParseError> {
714        self.skip_newlines();
715        match self.peek() {
716            Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
717                TokenKind::Int(n) => Ok(Expr::Lit(Literal::Int(n))),
718                _ => unreachable!(),
719            },
720            Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
721                TokenKind::Float(n) => Ok(Expr::Lit(Literal::Float(n))),
722                _ => unreachable!(),
723            },
724            Some(TokenKind::Str(_)) => match self.bump().unwrap().kind {
725                TokenKind::Str(s) => Ok(Expr::Lit(Literal::Str(s))),
726                _ => unreachable!(),
727            },
728            Some(TokenKind::Bytes(_)) => match self.bump().unwrap().kind {
729                TokenKind::Bytes(b) => Ok(Expr::Lit(Literal::Bytes(b))),
730                _ => unreachable!(),
731            },
732            Some(TokenKind::True) => { self.bump(); Ok(Expr::Lit(Literal::Bool(true))) }
733            Some(TokenKind::False) => { self.bump(); Ok(Expr::Lit(Literal::Bool(false))) }
734            Some(TokenKind::If) => self.parse_if(),
735            Some(TokenKind::Match) => self.parse_match(),
736            Some(TokenKind::Fn) => self.parse_lambda(),
737            Some(TokenKind::LBrace) => self.parse_brace_expr(),
738            Some(TokenKind::LBracket) => self.parse_list_literal(),
739            Some(TokenKind::LParen) => self.parse_paren_or_tuple(),
740            Some(TokenKind::Ident(_)) => self.parse_ident_or_record(),
741            other => Err(self.error(format!("expected expression, got {other:?}"))),
742        }
743    }
744
745    /// Disambiguate `{` between record literal and block.
746    /// Lookahead: `{ Ident :` is a record literal; `{ }` is also a record
747    /// (empty block has no use). Anything else is a block.
748    fn parse_brace_expr(&mut self) -> Result<Expr, ParseError> {
749        // Save position; peek 2-3 tokens past `{` (skipping newlines).
750        let saved = self.idx;
751        self.bump(); // `{`
752        // Skip newlines.
753        while matches!(self.peek(), Some(TokenKind::Newline) | Some(TokenKind::Semi)) {
754            self.idx += 1;
755        }
756        let is_record = matches!(self.peek(), Some(TokenKind::RBrace))
757            || (matches!(self.peek(), Some(TokenKind::Ident(_)))
758                && matches!(self.tokens.get(self.idx + 1).map(|t| &t.kind), Some(TokenKind::Colon) | Some(TokenKind::Comma) | Some(TokenKind::RBrace)));
759        self.idx = saved;
760        if is_record {
761            self.parse_record_literal()
762        } else {
763            Ok(Expr::Block(self.parse_block()?))
764        }
765    }
766
767    fn parse_record_literal(&mut self) -> Result<Expr, ParseError> {
768        self.expect(&TokenKind::LBrace, "in record literal")?;
769        let mut fields = Vec::new();
770        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
771            loop {
772                self.skip_newlines();
773                let name = self.expect_ident("in record literal")?;
774                let value = if self.eat(&TokenKind::Colon) {
775                    self.parse_expr()?
776                } else {
777                    // shorthand: `{ name }` => `{ name: name }`
778                    Expr::Var(name.clone())
779                };
780                fields.push(RecordLitField { name, value });
781                self.skip_newlines();
782                if !self.eat(&TokenKind::Comma) { break; }
783                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
784            }
785        }
786        self.expect(&TokenKind::RBrace, "after record literal")?;
787        Ok(Expr::RecordLit(fields))
788    }
789
790    fn parse_if(&mut self) -> Result<Expr, ParseError> {
791        self.expect(&TokenKind::If, "in if")?;
792        let cond = self.parse_expr()?;
793        let then_block = self.parse_block()?;
794        self.expect(&TokenKind::Else, "expected `else`")?;
795        let else_block = self.parse_block()?;
796        Ok(Expr::If { cond: Box::new(cond), then_block, else_block })
797    }
798
799    fn parse_match(&mut self) -> Result<Expr, ParseError> {
800        self.expect(&TokenKind::Match, "in match")?;
801        let scrutinee = self.parse_expr()?;
802        self.expect(&TokenKind::LBrace, "before match arms")?;
803        let mut arms = Vec::new();
804        loop {
805            self.skip_newlines();
806            if matches!(self.peek(), Some(TokenKind::RBrace)) { break; }
807            let pattern = self.parse_pattern()?;
808            self.expect(&TokenKind::FatArrow, "in match arm")?;
809            let body = self.parse_expr()?;
810            arms.push(Arm { pattern, body });
811            self.skip_newlines();
812            if !self.eat(&TokenKind::Comma) { break; }
813        }
814        self.expect(&TokenKind::RBrace, "after match arms")?;
815        Ok(Expr::Match { scrutinee: Box::new(scrutinee), arms })
816    }
817
818    fn parse_lambda(&mut self) -> Result<Expr, ParseError> {
819        self.expect(&TokenKind::Fn, "in lambda")?;
820        self.expect(&TokenKind::LParen, "before lambda params")?;
821        let mut params = Vec::new();
822        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
823            params.push(self.parse_param()?);
824            while self.eat(&TokenKind::Comma) {
825                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
826                params.push(self.parse_param()?);
827            }
828        }
829        self.expect(&TokenKind::RParen, "after lambda params")?;
830        self.expect(&TokenKind::Arrow, "before lambda return type")?;
831        let effects = self.parse_effects()?;
832        let return_type = self.parse_type_expr()?;
833        let body = self.parse_block()?;
834        Ok(Expr::Lambda(Box::new(Lambda { params, effects, return_type, body })))
835    }
836
837    fn parse_list_literal(&mut self) -> Result<Expr, ParseError> {
838        self.expect(&TokenKind::LBracket, "before list literal")?;
839        let mut items = Vec::new();
840        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) {
841            items.push(self.parse_expr()?);
842            while self.eat(&TokenKind::Comma) {
843                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
844                items.push(self.parse_expr()?);
845            }
846        }
847        self.expect(&TokenKind::RBracket, "after list literal")?;
848        Ok(Expr::ListLit(items))
849    }
850
851    fn parse_paren_or_tuple(&mut self) -> Result<Expr, ParseError> {
852        self.expect(&TokenKind::LParen, "")?;
853        if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
854            self.bump();
855            return Ok(Expr::Lit(Literal::Unit));
856        }
857        let first = self.parse_expr()?;
858        // Inline type ascription: `(expr :: Type)` — peek for `::` before
859        // deciding whether this is a tuple, a grouping, or an ascription.
860        if self.eat(&TokenKind::ColonColon) {
861            let ty = self.parse_type_expr()?;
862            self.expect(&TokenKind::RParen, "after type ascription")?;
863            return Ok(Expr::Ascription { value: Box::new(first), ty });
864        }
865        if self.eat(&TokenKind::Comma) {
866            let mut items = vec![first];
867            if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
868                items.push(self.parse_expr()?);
869                while self.eat(&TokenKind::Comma) {
870                    if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
871                    items.push(self.parse_expr()?);
872                }
873            }
874            self.expect(&TokenKind::RParen, "after tuple")?;
875            Ok(Expr::TupleLit(items))
876        } else {
877            self.expect(&TokenKind::RParen, "after parenthesized expression")?;
878            Ok(first)
879        }
880    }
881
882    fn parse_ident_or_record(&mut self) -> Result<Expr, ParseError> {
883        // Ident is parsed as a Var; later postfix (`(`, `.`, `?`) attach.
884        let name = self.expect_ident("")?;
885        Ok(Expr::Var(name))
886    }
887
888    // --- patterns ---
889
890    fn parse_pattern(&mut self) -> Result<Pattern, ParseError> {
891        self.skip_newlines();
892        match self.peek() {
893            Some(TokenKind::Minus) => {
894                self.bump();
895                self.skip_newlines();
896                match self.peek() {
897                    Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
898                        TokenKind::Int(n) => Ok(Pattern::Lit(Literal::Int(-n))),
899                        _ => unreachable!(),
900                    },
901                    Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
902                        TokenKind::Float(n) => Ok(Pattern::Lit(Literal::Float(-n))),
903                        _ => unreachable!(),
904                    },
905                    other => Err(self.error(format!("expected Int or Float after `-` in pattern, got {other:?}"))),
906                }
907            }
908            Some(TokenKind::Underscore) => { self.bump(); Ok(Pattern::Wild) }
909            Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
910                TokenKind::Int(n) => Ok(Pattern::Lit(Literal::Int(n))),
911                _ => unreachable!(),
912            },
913            Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
914                TokenKind::Float(n) => Ok(Pattern::Lit(Literal::Float(n))),
915                _ => unreachable!(),
916            },
917            Some(TokenKind::Str(_)) => match self.bump().unwrap().kind {
918                TokenKind::Str(s) => Ok(Pattern::Lit(Literal::Str(s))),
919                _ => unreachable!(),
920            },
921            Some(TokenKind::True) => { self.bump(); Ok(Pattern::Lit(Literal::Bool(true))) }
922            Some(TokenKind::False) => { self.bump(); Ok(Pattern::Lit(Literal::Bool(false))) }
923            Some(TokenKind::LBrace) => self.parse_record_pattern(),
924            Some(TokenKind::LParen) => self.parse_tuple_pattern(),
925            Some(TokenKind::Ident(_)) => {
926                let name = self.expect_ident("")?;
927                if matches!(self.peek(), Some(TokenKind::LParen)) {
928                    self.bump();
929                    let mut args = Vec::new();
930                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
931                        args.push(self.parse_pattern()?);
932                        while self.eat(&TokenKind::Comma) {
933                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
934                            args.push(self.parse_pattern()?);
935                        }
936                    }
937                    self.expect(&TokenKind::RParen, "after constructor pattern")?;
938                    Ok(Pattern::Constructor { name, args })
939                } else {
940                    Ok(Pattern::Var(name))
941                }
942            }
943            other => Err(self.error(format!("expected pattern, got {other:?}"))),
944        }
945    }
946
947    fn parse_record_pattern(&mut self) -> Result<Pattern, ParseError> {
948        self.expect(&TokenKind::LBrace, "")?;
949        let mut fields = Vec::new();
950        let rest = false;
951        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
952            loop {
953                self.skip_newlines();
954                let name = self.expect_ident("in record pattern")?;
955                let pattern = if self.eat(&TokenKind::Colon) {
956                    Some(self.parse_pattern()?)
957                } else {
958                    None
959                };
960                fields.push(RecordPatField { name, pattern });
961                self.skip_newlines();
962                if !self.eat(&TokenKind::Comma) { break; }
963                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
964            }
965        }
966        self.expect(&TokenKind::RBrace, "after record pattern")?;
967        Ok(Pattern::Record { fields, rest })
968    }
969
970    fn parse_tuple_pattern(&mut self) -> Result<Pattern, ParseError> {
971        self.expect(&TokenKind::LParen, "")?;
972        let mut items = Vec::new();
973        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
974            items.push(self.parse_pattern()?);
975            while self.eat(&TokenKind::Comma) {
976                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
977                items.push(self.parse_pattern()?);
978            }
979        }
980        self.expect(&TokenKind::RParen, "after tuple pattern")?;
981        if items.len() == 1 {
982            Ok(items.into_iter().next().unwrap())
983        } else {
984            Ok(Pattern::Tuple(items))
985        }
986    }
987}
988
989/// In a union RHS, every leaf must be a `Named` type expression — that is, a
990/// PascalCase ident with optional payload via `Variant(payload_type)`.
991fn type_to_variant(t: TypeExpr) -> Result<UnionVariant, ParseError> {
992    match t {
993        TypeExpr::Named { name, args } => {
994            let payload = match args.len() {
995                0 => None,
996                1 => Some(args.into_iter().next().unwrap()),
997                _ => Some(TypeExpr::Tuple(args)),
998            };
999            Ok(UnionVariant { name, payload })
1000        }
1001        // `Foo({ field :: T })` parses as Named with one arg = Record. handled above.
1002        _ => Err(ParseError {
1003            pos: 0,
1004            msg: "union variant must be a constructor name".into(),
1005        }),
1006    }
1007}
1008
1009/// Attach a collected list of `#` comments to whichever top-level
1010/// item variant carries them. Empty input is a no-op; the per-variant
1011/// `leading_comments: Vec<String>` field is always present.
1012fn attach_leading_comments(item: &mut Item, comments: Vec<String>) {
1013    if comments.is_empty() {
1014        return;
1015    }
1016    match item {
1017        Item::Import(i) => i.leading_comments = comments,
1018        Item::TypeDecl(t) => t.leading_comments = comments,
1019        Item::FnDecl(f) => f.leading_comments = comments,
1020    }
1021}