Skip to main content

lex_syntax/
parser.rs

1//! Recursive-descent parser for Lex. Pratt-style precedence climbing for
2//! binary operators; everything else is straightforward LL(1)-with-lookahead.
3
4use crate::syntax::*;
5use crate::token::{Token, TokenKind, lex as lex_tokens};
6
7pub fn parse(tokens: Vec<Token>) -> Result<Program, ParseError> {
8    // Back-compat entry: no source available, so leading comments are
9    // not recovered. `parse_source` in `lib.rs` calls
10    // `parse_with_src` directly and is the path that preserves them.
11    parse_with_src("", tokens)
12}
13
14/// Parse + attach `#` line-comments to the AST. The source string is
15/// used only to scan the gaps between tokens (where the lexer skipped
16/// whitespace and comments); the parser itself still operates purely
17/// on tokens. See `Program::leading_comments` for the data model.
18pub fn parse_with_src(src: &str, tokens: Vec<Token>) -> Result<Program, ParseError> {
19    let mut p = Parser::new(src, tokens);
20    let program = p.parse_program()?;
21    p.skip_newlines();
22    if !p.at_eof() {
23        return Err(p.error("unexpected token after program"));
24    }
25    Ok(program)
26}
27
28#[derive(Debug, thiserror::Error)]
29#[error("parse error at byte {pos}: {msg}")]
30pub struct ParseError {
31    pub pos: usize,
32    pub msg: String,
33}
34
35struct Parser<'a> {
36    /// Source text — needed only for trivia recovery (line comments
37    /// in the gaps between tokens). Empty when called through the
38    /// legacy `parse(tokens)` entry; comments are silently dropped
39    /// in that path.
40    src: &'a str,
41    tokens: Vec<Token>,
42    idx: usize,
43    /// Recursion depth across `parse_expr`. Capped at `MAX_DEPTH`
44    /// to defend against adversarial input like a long sequence of
45    /// `[[[{{{...` that would otherwise blow the stack. Found by
46    /// the libFuzzer parser target — see `fuzz/fuzz_targets/parser.rs`.
47    depth: u32,
48    /// Counter for `let _ := ...` discard bindings (#200). Each
49    /// discard gets a unique synthetic name so multiple `let _`
50    /// in the same scope shadow rather than collide. The names
51    /// aren't expressible in user syntax (`__lex_discard_N`),
52    /// so user code can't reference them by accident.
53    discard_counter: u32,
54}
55
56/// Maximum nesting depth the parser will accept before refusing
57/// with a parse error. Real Lex code rarely exceeds 30; 96 leaves
58/// generous headroom for legitimate generated code.
59///
60/// Each `parse_expr` level produces ~4-5 stack frames through the
61/// `parse_binary_expr → parse_unary_expr → parse_postfix →
62/// parse_primary → ...` chain, so this caps the actual frame
63/// count around 400-500 — well below even a 2 MiB test stack.
64const MAX_DEPTH: u32 = 96;
65
66/// A segment of a string interpolation literal (#562).
67enum InterpPart<'a> {
68    Text(&'a str),
69    Expr(&'a str),
70}
71
72/// Split `s` into text and `{expr}` segments. Tracks brace depth so
73/// record literals inside interpolations don't confuse the scanner.
74fn split_interp_parts(s: &str) -> Result<Vec<InterpPart<'_>>, String> {
75    let mut parts = Vec::new();
76    let mut rest = s;
77    while let Some(open) = rest.find('{') {
78        if open > 0 {
79            parts.push(InterpPart::Text(&rest[..open]));
80        }
81        let after_open = &rest[open + 1..];
82        let close = find_closing_brace(after_open)
83            .ok_or_else(|| "unclosed `{` in string interpolation".to_string())?;
84        let expr_content = &after_open[..close];
85        if expr_content.trim().is_empty() {
86            return Err("empty `{}` in string interpolation".to_string());
87        }
88        parts.push(InterpPart::Expr(expr_content));
89        rest = &after_open[close + 1..];
90    }
91    if !rest.is_empty() {
92        parts.push(InterpPart::Text(rest));
93    }
94    Ok(parts)
95}
96
97/// Find the closing `}` matching the opening `{` that was already consumed.
98/// Tracks nested brace depth so `{a: {x: 1}}` returns the outer `}`.
99fn find_closing_brace(s: &str) -> Option<usize> {
100    let mut depth: usize = 1;
101    for (i, c) in s.char_indices() {
102        match c {
103            '{' => depth += 1,
104            '}' => {
105                depth -= 1;
106                if depth == 0 {
107                    return Some(i);
108                }
109            }
110            _ => {}
111        }
112    }
113    None
114}
115
116/// Build a `str.concat(left, right)` call expression.
117fn str_concat_expr(left: Expr, right: Expr) -> Expr {
118    Expr::Call {
119        callee: Box::new(Expr::Field {
120            value: Box::new(Expr::Var("str".into())),
121            field: "concat".into(),
122        }),
123        args: vec![left, right],
124    }
125}
126
127impl<'a> Parser<'a> {
128    fn new(src: &'a str, tokens: Vec<Token>) -> Self {
129        Self { src, tokens, idx: 0, depth: 0, discard_counter: 0 }
130    }
131
132    /// Desugar `"hello {name}"` to `str.concat("hello ", name)` (#562).
133    /// Calls the lexer on each `{...}` content and parses it as an expression.
134    fn desugar_string_interpolation(&self, s: &str) -> Result<Expr, ParseError> {
135        let parts = match split_interp_parts(s) {
136            Ok(p) => p,
137            Err(msg) => return Err(ParseError { pos: 0, msg }),
138        };
139        let mut exprs: Vec<Expr> = Vec::new();
140        for part in parts {
141            match part {
142                InterpPart::Text(t) => {
143                    if !t.is_empty() {
144                        exprs.push(Expr::Lit(Literal::Str(t.to_string())));
145                    }
146                }
147                InterpPart::Expr(content) => {
148                    let tokens = lex_tokens(content.trim()).map_err(|e| ParseError {
149                        pos: 0,
150                        msg: format!("in string interpolation `{{{content}}}`': {e}"),
151                    })?;
152                    let mut sub = Parser::new("", tokens);
153                    exprs.push(sub.parse_expr()?);
154                    if !sub.at_eof() {
155                        return Err(ParseError {
156                            pos: 0,
157                            msg: format!("unexpected tokens after expression in `{{{content}}}`"),
158                        });
159                    }
160                }
161            }
162        }
163        if exprs.is_empty() {
164            return Ok(Expr::Lit(Literal::Str(String::new())));
165        }
166        let mut result = exprs.remove(0);
167        for next in exprs {
168            result = str_concat_expr(result, next);
169        }
170        Ok(result)
171    }
172
173    /// Extract `#` line-comments from the source byte range
174    /// `start..end`. The range must be a "gap" between two tokens
175    /// (or between source-start/end and a token); by construction
176    /// such gaps contain only whitespace and `#` comments — string
177    /// contents never appear because the lexer's logos rules would
178    /// have produced a `Str(...)` token covering them.
179    ///
180    /// Each returned entry is a single source line, trimmed of leading
181    /// whitespace (the `#` and everything after it preserved) and of
182    /// trailing whitespace. Blank lines between consecutive comments
183    /// are dropped — preserving inter-comment blank lines is left to
184    /// a follow-up; the bug this addresses (#417) is about comments
185    /// disappearing entirely.
186    fn extract_comments(&self, start: usize, end: usize) -> Vec<String> {
187        if start >= end || end > self.src.len() {
188            return Vec::new();
189        }
190        self.src[start..end]
191            .lines()
192            .filter_map(|line| {
193                let trimmed = line.trim_start();
194                if trimmed.starts_with('#') {
195                    Some(trimmed.trim_end().to_string())
196                } else {
197                    None
198                }
199            })
200            .collect()
201    }
202
203    fn at_eof(&self) -> bool {
204        self.idx >= self.tokens.len()
205    }
206
207    fn peek(&self) -> Option<&TokenKind> {
208        self.tokens.get(self.idx).map(|t| &t.kind)
209    }
210
211    fn bump(&mut self) -> Option<Token> {
212        let t = self.tokens.get(self.idx).cloned();
213        if t.is_some() {
214            self.idx += 1;
215        }
216        t
217    }
218
219    fn current_pos(&self) -> usize {
220        self.tokens
221            .get(self.idx)
222            .map(|t| t.span.start)
223            .unwrap_or_else(|| self.tokens.last().map(|t| t.span.end).unwrap_or(0))
224    }
225
226    fn error(&self, msg: impl Into<String>) -> ParseError {
227        ParseError { pos: self.current_pos(), msg: msg.into() }
228    }
229
230    fn skip_newlines(&mut self) {
231        while matches!(self.peek(), Some(TokenKind::Newline) | Some(TokenKind::Semi)) {
232            self.idx += 1;
233        }
234    }
235
236    fn expect(&mut self, expected: &TokenKind, ctx: &str) -> Result<Token, ParseError> {
237        self.skip_newlines();
238        match self.peek() {
239            Some(k) if std::mem::discriminant(k) == std::mem::discriminant(expected) => {
240                Ok(self.bump().unwrap())
241            }
242            Some(other) => Err(self.error(format!(
243                "expected {expected:?} {ctx}, got {other:?}"
244            ))),
245            None => Err(self.error(format!("expected {expected:?} {ctx}, got EOF"))),
246        }
247    }
248
249    fn eat(&mut self, k: &TokenKind) -> bool {
250        self.skip_newlines();
251        if let Some(cur) = self.peek() {
252            if std::mem::discriminant(cur) == std::mem::discriminant(k) {
253                self.bump();
254                return true;
255            }
256        }
257        false
258    }
259
260    fn expect_ident(&mut self, ctx: &str) -> Result<String, ParseError> {
261        self.skip_newlines();
262        match self.peek() {
263            Some(TokenKind::Ident(_)) => match self.bump().unwrap().kind {
264                TokenKind::Ident(name) => Ok(name),
265                _ => unreachable!(),
266            },
267            other => Err(self.error(format!("expected identifier {ctx}, got {other:?}"))),
268        }
269    }
270
271    // --- top level ---
272
273    fn parse_program(&mut self) -> Result<Program, ParseError> {
274        let mut items = Vec::new();
275        let mut leading_comments: Vec<String> = Vec::new();
276        // Byte offset of the end of the last consumed token (or 0 at
277        // start of file). The next gap to scan for comments is from
278        // here up to the start of the upcoming item's first token.
279        let mut gap_start: usize = 0;
280        loop {
281            self.skip_newlines();
282            if self.at_eof() {
283                break;
284            }
285            let item_start = self.tokens[self.idx].span.start;
286            let gap_comments = self.extract_comments(gap_start, item_start);
287            let pending_comments = if items.is_empty() {
288                leading_comments = gap_comments;
289                Vec::new()
290            } else {
291                gap_comments
292            };
293            let mut item = self.parse_item()?;
294            if !pending_comments.is_empty() {
295                attach_leading_comments(&mut item, pending_comments);
296            }
297            gap_start = self
298                .tokens
299                .get(self.idx.saturating_sub(1))
300                .map(|t| t.span.end)
301                .unwrap_or(gap_start);
302            items.push(item);
303        }
304        // Trailing comments live after the last consumed token (or
305        // span the whole file when there are no items).
306        let trailing_comments = self.extract_comments(gap_start, self.src.len());
307        Ok(Program { items, leading_comments, trailing_comments })
308    }
309
310    fn parse_item(&mut self) -> Result<Item, ParseError> {
311        match self.peek() {
312            Some(TokenKind::Import) => self.parse_import().map(Item::Import),
313            Some(TokenKind::Type) => self.parse_type_decl().map(Item::TypeDecl),
314            Some(TokenKind::Fn) => self.parse_fn_decl().map(Item::FnDecl),
315            other => Err(self.error(format!(
316                "expected `import`, `type`, or `fn` at top level, got {other:?}"
317            ))),
318        }
319    }
320
321    fn parse_import(&mut self) -> Result<Import, ParseError> {
322        self.expect(&TokenKind::Import, "in import")?;
323        let reference = match self.bump().map(|t| t.kind) {
324            Some(TokenKind::Str(s)) => s,
325            other => return Err(self.error(format!("expected string after `import`, got {other:?}"))),
326        };
327        self.expect(&TokenKind::As, "in import")?;
328        let alias = self.expect_ident("for import alias")?;
329        Ok(Import { reference, alias, leading_comments: Vec::new() })
330    }
331
332    fn parse_type_decl(&mut self) -> Result<TypeDecl, ParseError> {
333        self.expect(&TokenKind::Type, "in type decl")?;
334        let name = self.expect_ident("for type name")?;
335        let params = if self.eat(&TokenKind::LBracket) {
336            let ps = self.parse_ident_list()?;
337            self.expect(&TokenKind::RBracket, "after type params")?;
338            ps
339        } else {
340            Vec::new()
341        };
342        self.expect(&TokenKind::Eq, "in type decl")?;
343        let definition = self.parse_type_decl_rhs()?;
344        Ok(TypeDecl { name, params, definition, leading_comments: Vec::new() })
345    }
346
347    fn parse_ident_list(&mut self) -> Result<Vec<String>, ParseError> {
348        let mut out = Vec::new();
349        out.push(self.expect_ident("in identifier list")?);
350        while self.eat(&TokenKind::Comma) {
351            if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
352            out.push(self.expect_ident("in identifier list")?);
353        }
354        Ok(out)
355    }
356
357    /// `type Foo = Variant1 | Variant2(Payload)` is a union; otherwise a plain type expression.
358    fn parse_type_decl_rhs(&mut self) -> Result<TypeExpr, ParseError> {
359        let first = self.parse_type_expr()?;
360        // Detect union: PascalCase ident (or named type w/ optional payload) followed by `|`.
361        if matches!(self.peek_skip_newlines(), Some(TokenKind::Bar)) {
362            let mut variants = Vec::new();
363            variants.push(type_to_variant(first)?);
364            while self.eat(&TokenKind::Bar) {
365                let next = self.parse_type_expr()?;
366                variants.push(type_to_variant(next)?);
367            }
368            Ok(TypeExpr::Union(variants))
369        } else {
370            // Single-variant union without `|`: `type Msg = Execute(Str)`.
371            // The `(...)` constructor-payload syntax is distinguishable from
372            // `[...]` type-application by checking the last consumed token.
373            // `Execute(Str)` ends with `)`, `List[Int]` ends with `]`,
374            // `AnotherType` ends with the ident token.
375            let last_was_rparen = self.idx > 0 && matches!(
376                self.tokens.get(self.idx - 1).map(|t| &t.kind),
377                Some(TokenKind::RParen)
378            );
379            if last_was_rparen {
380                if let TypeExpr::Named { ref name, .. } = first {
381                    let unqual = name.split('.').next_back().unwrap_or(name.as_str());
382                    if unqual.chars().next().map(|c| c.is_ascii_uppercase()).unwrap_or(false) {
383                        return Ok(TypeExpr::Union(vec![type_to_variant(first)?]));
384                    }
385                }
386            }
387            Ok(first)
388        }
389    }
390
391    fn peek_skip_newlines(&mut self) -> Option<TokenKind> {
392        let saved = self.idx;
393        self.skip_newlines();
394        let out = self.peek().cloned();
395        self.idx = saved;
396        out
397    }
398
399    fn parse_type_expr(&mut self) -> Result<TypeExpr, ParseError> {
400        let base = self.parse_type_expr_base()?;
401        self.maybe_wrap_refinement(base)
402    }
403
404    fn parse_type_expr_base(&mut self) -> Result<TypeExpr, ParseError> {
405        self.skip_newlines();
406        match self.peek() {
407            Some(TokenKind::LBrace) => self.parse_record_type(),
408            Some(TokenKind::LParen) => self.parse_paren_type_or_function(),
409            Some(TokenKind::Ident(_)) => {
410                let mut name = self.expect_ident("in type expr")?;
411                // Module-qualified type: `m.Type` or `m.n.Type`. We accept
412                // dotted names here and let the loader rewrite them to the
413                // file-local mangled form. After the loader pass, all type
414                // names referenced by the type checker are single segments.
415                while matches!(self.peek(), Some(TokenKind::Dot)) {
416                    self.bump();
417                    let next = self.expect_ident("after `.` in qualified type")?;
418                    name.push('.');
419                    name.push_str(&next);
420                }
421                let args = if matches!(self.peek(), Some(TokenKind::LBracket)) {
422                    self.bump();
423                    let mut args = Vec::new();
424                    args.push(self.parse_type_expr()?);
425                    while self.eat(&TokenKind::Comma) {
426                        if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
427                        args.push(self.parse_type_expr()?);
428                    }
429                    self.expect(&TokenKind::RBracket, "after type args")?;
430                    args
431                } else if matches!(self.peek(), Some(TokenKind::LParen)) {
432                    // Constructor type with payload: `Name(T)` or `Name(T1, T2)`.
433                    self.bump();
434                    let mut args = Vec::new();
435                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
436                        args.push(self.parse_type_expr()?);
437                        while self.eat(&TokenKind::Comma) {
438                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
439                            args.push(self.parse_type_expr()?);
440                        }
441                    }
442                    self.expect(&TokenKind::RParen, "after constructor payload")?;
443                    args
444                } else {
445                    Vec::new()
446                };
447                Ok(TypeExpr::Named { name, args })
448            }
449            other => Err(self.error(format!("expected type expression, got {other:?}"))),
450        }
451    }
452
453    /// Refinement type postfix (#209): `BaseType{binding | predicate}`.
454    ///
455    /// Disambiguates from a function body's opening brace by peeking
456    /// three tokens ahead — refinement requires `{ Ident |`, a body
457    /// begins with `{ <expr-starting-token>`. This means a refinement
458    /// binding name can't start with `|`, but that's fine since
459    /// identifiers don't.
460    fn maybe_wrap_refinement(&mut self, base: TypeExpr) -> Result<TypeExpr, ParseError> {
461        let next0 = self.tokens.get(self.idx).map(|t| &t.kind);
462        let next1 = self.tokens.get(self.idx + 1).map(|t| &t.kind);
463        let next2 = self.tokens.get(self.idx + 2).map(|t| &t.kind);
464        let is_refinement_lookahead = matches!(next0, Some(TokenKind::LBrace))
465            && matches!(next1, Some(TokenKind::Ident(_)))
466            && matches!(next2, Some(TokenKind::Bar));
467        if !is_refinement_lookahead {
468            return Ok(base);
469        }
470        self.bump(); // `{`
471        let binding = self.expect_ident("for refinement binding")?;
472        self.expect(&TokenKind::Bar, "after refinement binding")?;
473        let predicate = self.parse_expr()?;
474        self.expect(&TokenKind::RBrace, "to close refinement")?;
475        Ok(TypeExpr::Refined {
476            base: Box::new(base),
477            binding,
478            predicate: Box::new(predicate),
479        })
480    }
481
482    fn parse_record_type(&mut self) -> Result<TypeExpr, ParseError> {
483        self.expect(&TokenKind::LBrace, "in record type")?;
484        let mut fields = Vec::new();
485        let mut spreads = Vec::new();
486        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
487            loop {
488                self.skip_newlines();
489                if matches!(self.peek(), Some(TokenKind::DotDotDot)) {
490                    self.bump(); // consume `...`
491                    let name = self.expect_ident("after `...` in record type spread")?;
492                    spreads.push(name);
493                } else {
494                    let name = self.expect_ident("in record field")?;
495                    self.expect(&TokenKind::ColonColon, "after record field name")?;
496                    let ty = self.parse_type_expr()?;
497                    fields.push(TypeField { name, ty });
498                }
499                self.skip_newlines();
500                if !self.eat(&TokenKind::Comma) { break; }
501                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
502            }
503        }
504        self.expect(&TokenKind::RBrace, "in record type")?;
505        if spreads.is_empty() {
506            Ok(TypeExpr::Record(fields))
507        } else {
508            Ok(TypeExpr::RecordWithSpreads { spreads, fields })
509        }
510    }
511
512    fn parse_paren_type_or_function(&mut self) -> Result<TypeExpr, ParseError> {
513        self.expect(&TokenKind::LParen, "in type")?;
514        let mut args = Vec::new();
515        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
516            args.push(self.parse_type_expr()?);
517            while self.eat(&TokenKind::Comma) {
518                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
519                args.push(self.parse_type_expr()?);
520            }
521        }
522        self.expect(&TokenKind::RParen, "in type")?;
523        // Function type if followed by `->`.
524        if matches!(self.peek_skip_newlines(), Some(TokenKind::Arrow)) {
525            self.skip_newlines();
526            self.bump();
527            let (effects, effect_row_var) = self.parse_effects()?;
528            let ret = self.parse_type_expr()?;
529            Ok(TypeExpr::Function {
530                params: args,
531                effects,
532                effect_row_var,
533                ret: Box::new(ret),
534            })
535        } else if args.len() == 1 {
536            // Parenthesized type expression.
537            Ok(args.into_iter().next().unwrap())
538        } else {
539            Ok(TypeExpr::Tuple(args))
540        }
541    }
542
543    fn parse_fn_decl(&mut self) -> Result<FnDecl, ParseError> {
544        self.expect(&TokenKind::Fn, "in fn decl")?;
545        let name = self.expect_ident("for function name")?;
546        let type_params = if self.eat(&TokenKind::LBracket) {
547            let ps = self.parse_ident_list()?;
548            self.expect(&TokenKind::RBracket, "after type params")?;
549            ps
550        } else {
551            Vec::new()
552        };
553        self.expect(&TokenKind::LParen, "before params")?;
554        let mut params = Vec::new();
555        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
556            params.push(self.parse_param()?);
557            while self.eat(&TokenKind::Comma) {
558                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
559                params.push(self.parse_param()?);
560            }
561        }
562        self.expect(&TokenKind::RParen, "after params")?;
563        self.expect(&TokenKind::Arrow, "before return type")?;
564        let (effects, effect_row_var) = self.parse_effects()?;
565        let return_type = self.parse_type_expr()?;
566        let examples = self.parse_examples_block()?;
567        let body = self.parse_block()?;
568        Ok(FnDecl { name, type_params, params, effects, effect_row_var, return_type, body, examples, leading_comments: Vec::new() })
569    }
570
571    /// Parse an optional `examples { call(a, b) => expected, ... }` block
572    /// sitting between the return type and the body (#369). Returns an
573    /// empty vec when no block is present.
574    fn parse_examples_block(&mut self) -> Result<Vec<Example>, ParseError> {
575        // Contextual: not a reserved keyword. Peek for the literal
576        // identifier `examples` followed by `{`; otherwise no block.
577        let is_examples_kw = matches!(
578            self.peek_skip_newlines(),
579            Some(TokenKind::Ident(s)) if s == "examples"
580        );
581        if !is_examples_kw {
582            return Ok(Vec::new());
583        }
584        self.skip_newlines();
585        self.bump(); // consume `examples`
586        self.expect(&TokenKind::LBrace, "after `examples`")?;
587        let mut cases = Vec::new();
588        loop {
589            self.skip_newlines();
590            if matches!(self.peek(), Some(TokenKind::RBrace)) { break; }
591            let call = self.parse_expr()?;
592            self.expect(&TokenKind::FatArrow, "in example case (between call and expected)")?;
593            let expected = self.parse_expr()?;
594            let (args, _) = match call {
595                Expr::Call { callee: _, args } => (args, ()),
596                other => return Err(self.error(
597                    format!("example case must be a call to the function under definition; got {other:?}")
598                )),
599            };
600            cases.push(Example { args, expected });
601            self.skip_newlines();
602            if !self.eat(&TokenKind::Comma) {
603                self.skip_newlines();
604                break;
605            }
606        }
607        self.expect(&TokenKind::RBrace, "to close examples block")?;
608        Ok(cases)
609    }
610
611    fn parse_param(&mut self) -> Result<Param, ParseError> {
612        let name = if matches!(self.peek_skip_newlines(), Some(TokenKind::Underscore)) {
613            self.skip_newlines();
614            self.bump();
615            self.discard_counter += 1;
616            format!("__lex_discard_{}", self.discard_counter)
617        } else {
618            self.expect_ident("for parameter name")?
619        };
620        self.expect(&TokenKind::ColonColon, "after parameter name")?;
621        let ty = self.parse_type_expr()?;
622        Ok(Param { name, ty })
623    }
624
625    /// Parse an effect annotation `[a, b]`, optionally with an open-row tail
626    /// `[a, b | E]` (or `[| E]` for an empty concrete base). Returns the
627    /// concrete effects and the optional row-variable name.
628    fn parse_effects(&mut self) -> Result<(Vec<Effect>, Option<String>), ParseError> {
629        if !self.eat(&TokenKind::LBracket) {
630            return Ok((Vec::new(), None));
631        }
632        let mut out = Vec::new();
633        let mut tail = None;
634        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) {
635            // `[| E]` — open row with no concrete lower bound.
636            if !matches!(self.peek_skip_newlines(), Some(TokenKind::Bar)) {
637                out.push(self.parse_effect()?);
638                while self.eat(&TokenKind::Comma) {
639                    if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
640                    if matches!(self.peek_skip_newlines(), Some(TokenKind::Bar)) { break; }
641                    out.push(self.parse_effect()?);
642                }
643            }
644            // Optional open-row tail: `| E`.
645            self.skip_newlines();
646            if self.eat(&TokenKind::Bar) {
647                tail = Some(self.expect_ident("for effect row variable")?);
648            }
649        }
650        self.expect(&TokenKind::RBracket, "after effects")?;
651        Ok((out, tail))
652    }
653
654    fn parse_effect(&mut self) -> Result<Effect, ParseError> {
655        let name = self.expect_ident("for effect name")?;
656        let arg = if self.eat(&TokenKind::LParen) {
657            let arg = match self.bump().map(|t| t.kind) {
658                Some(TokenKind::Str(s)) => EffectArg::Str(s),
659                Some(TokenKind::Int(n)) => EffectArg::Int(n),
660                Some(TokenKind::Ident(s)) => EffectArg::Ident(s),
661                other => return Err(self.error(format!("invalid effect arg: {other:?}"))),
662            };
663            self.expect(&TokenKind::RParen, "after effect arg")?;
664            Some(arg)
665        } else {
666            None
667        };
668        Ok(Effect { name, arg })
669    }
670
671    // --- blocks and statements ---
672
673    fn parse_block(&mut self) -> Result<Block, ParseError> {
674        self.expect(&TokenKind::LBrace, "before block")?;
675        let mut statements = Vec::new();
676        let result;
677        loop {
678            self.skip_newlines();
679            if matches!(self.peek(), Some(TokenKind::RBrace)) {
680                // Empty block: synthesize Unit literal.
681                result = Box::new(Expr::Lit(Literal::Unit));
682                break;
683            }
684            // Try parsing a let; otherwise an expression.
685            if matches!(self.peek(), Some(TokenKind::Let)) {
686                let stmt = self.parse_let_statement()?;
687                statements.push(stmt);
688                self.skip_newlines();
689                continue;
690            }
691            let expr = self.parse_expr()?;
692            self.skip_newlines();
693            // If the next token is `}`, this expression is the block's result.
694            if matches!(self.peek(), Some(TokenKind::RBrace)) {
695                result = Box::new(expr);
696                break;
697            }
698            statements.push(Statement::Expr(expr));
699        }
700        self.expect(&TokenKind::RBrace, "to close block")?;
701        Ok(Block { statements, result })
702    }
703
704    fn parse_let_statement(&mut self) -> Result<Statement, ParseError> {
705        self.expect(&TokenKind::Let, "in let")?;
706        // `let _ := expr` is the discard idiom (#200). The RHS is
707        // still evaluated for its effect, but the result is bound
708        // to a synthetic name nothing else references — so the
709        // type-checker / VM treat it like a normal let, but user
710        // code can't accidentally reach it.
711        let name = if matches!(self.peek_skip_newlines(), Some(TokenKind::Underscore)) {
712            self.skip_newlines();
713            self.bump();
714            self.discard_counter += 1;
715            format!("__lex_discard_{}", self.discard_counter)
716        } else {
717            self.expect_ident("after `let`")?
718        };
719        let ty = if self.eat(&TokenKind::ColonColon) {
720            Some(self.parse_type_expr()?)
721        } else {
722            None
723        };
724        self.expect(&TokenKind::ColonEq, "in let")?;
725        let value = self.parse_expr()?;
726        Ok(Statement::Let { name, ty, value })
727    }
728
729    // --- expressions ---
730
731    fn parse_expr(&mut self) -> Result<Expr, ParseError> {
732        // Recursion gate: every nested expression — match arms,
733        // tuple/list/record/block elements, function args, etc. —
734        // enters here, so this is the right place to bound depth.
735        // Decrement happens whether the inner call succeeds or fails.
736        if self.depth >= MAX_DEPTH {
737            return Err(ParseError {
738                pos: self.current_pos(),
739                msg: format!(
740                    "expression nests too deeply (max {MAX_DEPTH}); \
741                     malformed or hand-crafted input?"),
742            });
743        }
744        self.depth += 1;
745        let r = self.parse_expr_inner();
746        self.depth -= 1;
747        r
748    }
749
750    fn parse_expr_inner(&mut self) -> Result<Expr, ParseError> {
751        // Pipes are left-associative and bind less tightly than binary ops.
752        let mut left = self.parse_binary_expr(0)?;
753        while matches!(self.peek_skip_newlines(), Some(TokenKind::Pipe)) {
754            self.skip_newlines();
755            self.bump();
756            let right = self.parse_binary_expr(0)?;
757            left = Expr::Pipe { left: Box::new(left), right: Box::new(right) };
758        }
759        Ok(left)
760    }
761
762    fn parse_binary_expr(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
763        let mut lhs = self.parse_unary()?;
764        loop {
765            let op = match self.peek_binop() {
766                Some(op) if op.precedence() >= min_prec => op,
767                _ => break,
768            };
769            self.skip_newlines();
770            self.bump();
771            let rhs = self.parse_binary_expr(op.precedence() + 1)?;
772            lhs = Expr::BinOp { op, lhs: Box::new(lhs), rhs: Box::new(rhs) };
773        }
774        Ok(lhs)
775    }
776
777    fn peek_binop(&mut self) -> Option<BinOp> {
778        match self.peek_skip_newlines()? {
779            TokenKind::Plus => Some(BinOp::Add),
780            TokenKind::Minus => Some(BinOp::Sub),
781            TokenKind::Star => Some(BinOp::Mul),
782            TokenKind::Slash => Some(BinOp::Div),
783            TokenKind::Percent => Some(BinOp::Mod),
784            TokenKind::EqEq => Some(BinOp::Eq),
785            TokenKind::BangEq => Some(BinOp::Neq),
786            TokenKind::Lt => Some(BinOp::Lt),
787            TokenKind::LtEq => Some(BinOp::Lte),
788            TokenKind::Gt => Some(BinOp::Gt),
789            TokenKind::GtEq => Some(BinOp::Gte),
790            TokenKind::And => Some(BinOp::And),
791            TokenKind::Or => Some(BinOp::Or),
792            _ => None,
793        }
794    }
795
796    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
797        self.skip_newlines();
798        match self.peek() {
799            Some(TokenKind::Not) => {
800                self.bump();
801                let inner = self.parse_unary()?;
802                Ok(Expr::UnaryOp { op: UnaryOp::Not, expr: Box::new(inner) })
803            }
804            Some(TokenKind::Minus) => {
805                self.bump();
806                let inner = self.parse_unary()?;
807                Ok(Expr::UnaryOp { op: UnaryOp::Neg, expr: Box::new(inner) })
808            }
809            _ => self.parse_postfix(),
810        }
811    }
812
813    fn parse_postfix(&mut self) -> Result<Expr, ParseError> {
814        let mut expr = self.parse_primary()?;
815        loop {
816            // Postfix operations don't cross newlines (they bind tightly).
817            match self.peek() {
818                Some(TokenKind::Dot) => {
819                    self.bump();
820                    let field = self.expect_ident("after `.`")?;
821                    expr = Expr::Field { value: Box::new(expr), field };
822                }
823                Some(TokenKind::LParen) => {
824                    self.bump();
825                    let mut args = Vec::new();
826                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
827                        args.push(self.parse_expr()?);
828                        while self.eat(&TokenKind::Comma) {
829                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
830                            args.push(self.parse_expr()?);
831                        }
832                    }
833                    self.expect(&TokenKind::RParen, "in call")?;
834                    expr = Expr::Call { callee: Box::new(expr), args };
835                }
836                Some(TokenKind::Question) => {
837                    self.bump();
838                    expr = Expr::Try(Box::new(expr));
839                }
840                _ => break,
841            }
842        }
843        Ok(expr)
844    }
845
846    fn parse_primary(&mut self) -> Result<Expr, ParseError> {
847        self.skip_newlines();
848        match self.peek() {
849            Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
850                TokenKind::Int(n) => Ok(Expr::Lit(Literal::Int(n))),
851                _ => unreachable!(),
852            },
853            Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
854                TokenKind::Float(n) => Ok(Expr::Lit(Literal::Float(n))),
855                _ => unreachable!(),
856            },
857            Some(TokenKind::Str(_)) => match self.bump().unwrap().kind {
858                TokenKind::Str(s) => Ok(Expr::Lit(Literal::Str(s))),
859                _ => unreachable!(),
860            },
861            Some(TokenKind::FStr(_)) => match self.bump().unwrap().kind {
862                TokenKind::FStr(s) => self.desugar_string_interpolation(&s),
863                _ => unreachable!(),
864            },
865            Some(TokenKind::Bytes(_)) => match self.bump().unwrap().kind {
866                TokenKind::Bytes(b) => Ok(Expr::Lit(Literal::Bytes(b))),
867                _ => unreachable!(),
868            },
869            Some(TokenKind::True) => { self.bump(); Ok(Expr::Lit(Literal::Bool(true))) }
870            Some(TokenKind::False) => { self.bump(); Ok(Expr::Lit(Literal::Bool(false))) }
871            Some(TokenKind::If) => self.parse_if(),
872            Some(TokenKind::Match) => self.parse_match(),
873            Some(TokenKind::Fn) => self.parse_lambda(),
874            Some(TokenKind::LBrace) => self.parse_brace_expr(),
875            Some(TokenKind::LBracket) => self.parse_list_literal(),
876            Some(TokenKind::LParen) => self.parse_paren_or_tuple(),
877            Some(TokenKind::Ident(_)) => self.parse_ident_or_record(),
878            other => Err(self.error(format!("expected expression, got {other:?}"))),
879        }
880    }
881
882    /// Disambiguate `{` between record literal and block.
883    /// Lookahead: `{ Ident :` is a record literal; `{ }` is also a record
884    /// (empty block has no use). Anything else is a block.
885    fn parse_brace_expr(&mut self) -> Result<Expr, ParseError> {
886        // Save position; peek 2-3 tokens past `{` (skipping newlines).
887        let saved = self.idx;
888        self.bump(); // `{`
889        // Skip newlines.
890        while matches!(self.peek(), Some(TokenKind::Newline) | Some(TokenKind::Semi)) {
891            self.idx += 1;
892        }
893        let is_record = matches!(self.peek(), Some(TokenKind::RBrace))
894            || (matches!(self.peek(), Some(TokenKind::Ident(_)))
895                && matches!(self.tokens.get(self.idx + 1).map(|t| &t.kind), Some(TokenKind::Colon) | Some(TokenKind::Comma) | Some(TokenKind::RBrace)));
896        self.idx = saved;
897        if is_record {
898            self.parse_record_literal()
899        } else {
900            Ok(Expr::Block(self.parse_block()?))
901        }
902    }
903
904    fn parse_record_literal(&mut self) -> Result<Expr, ParseError> {
905        self.expect(&TokenKind::LBrace, "in record literal")?;
906        let mut fields = Vec::new();
907        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
908            loop {
909                self.skip_newlines();
910                let name = self.expect_ident("in record literal")?;
911                let value = if self.eat(&TokenKind::Colon) {
912                    self.parse_expr()?
913                } else {
914                    // shorthand: `{ name }` => `{ name: name }`
915                    Expr::Var(name.clone())
916                };
917                fields.push(RecordLitField { name, value });
918                self.skip_newlines();
919                if !self.eat(&TokenKind::Comma) { break; }
920                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
921            }
922        }
923        self.expect(&TokenKind::RBrace, "after record literal")?;
924        Ok(Expr::RecordLit(fields))
925    }
926
927    fn parse_if(&mut self) -> Result<Expr, ParseError> {
928        self.expect(&TokenKind::If, "in if")?;
929        let cond = self.parse_expr()?;
930        let then_block = self.parse_block()?;
931        self.expect(&TokenKind::Else, "expected `else`")?;
932        let else_block = self.parse_block()?;
933        Ok(Expr::If { cond: Box::new(cond), then_block, else_block })
934    }
935
936    fn parse_match(&mut self) -> Result<Expr, ParseError> {
937        self.expect(&TokenKind::Match, "in match")?;
938        let scrutinee = self.parse_expr()?;
939        self.expect(&TokenKind::LBrace, "before match arms")?;
940        let mut arms = Vec::new();
941        loop {
942            self.skip_newlines();
943            if matches!(self.peek(), Some(TokenKind::RBrace)) { break; }
944            let pattern = self.parse_pattern()?;
945            self.expect(&TokenKind::FatArrow, "in match arm")?;
946            let body = self.parse_expr()?;
947            arms.push(Arm { pattern, body });
948            self.skip_newlines();
949            if !self.eat(&TokenKind::Comma) { break; }
950        }
951        self.expect(&TokenKind::RBrace, "after match arms")?;
952        Ok(Expr::Match { scrutinee: Box::new(scrutinee), arms })
953    }
954
955    fn parse_lambda(&mut self) -> Result<Expr, ParseError> {
956        self.expect(&TokenKind::Fn, "in lambda")?;
957        self.expect(&TokenKind::LParen, "before lambda params")?;
958        let mut params = Vec::new();
959        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
960            params.push(self.parse_param()?);
961            while self.eat(&TokenKind::Comma) {
962                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
963                params.push(self.parse_param()?);
964            }
965        }
966        self.expect(&TokenKind::RParen, "after lambda params")?;
967        self.expect(&TokenKind::Arrow, "before lambda return type")?;
968        let (effects, effect_row_var) = self.parse_effects()?;
969        let return_type = self.parse_type_expr()?;
970        let body = self.parse_block()?;
971        Ok(Expr::Lambda(Box::new(Lambda { params, effects, effect_row_var, return_type, body })))
972    }
973
974    fn parse_list_literal(&mut self) -> Result<Expr, ParseError> {
975        self.expect(&TokenKind::LBracket, "before list literal")?;
976        let mut items = Vec::new();
977        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) {
978            items.push(self.parse_expr()?);
979            while self.eat(&TokenKind::Comma) {
980                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
981                items.push(self.parse_expr()?);
982            }
983        }
984        self.expect(&TokenKind::RBracket, "after list literal")?;
985        Ok(Expr::ListLit(items))
986    }
987
988    fn parse_paren_or_tuple(&mut self) -> Result<Expr, ParseError> {
989        self.expect(&TokenKind::LParen, "")?;
990        if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
991            self.bump();
992            return Ok(Expr::Lit(Literal::Unit));
993        }
994        let first = self.parse_expr()?;
995        // Inline type ascription: `(expr :: Type)` — peek for `::` before
996        // deciding whether this is a tuple, a grouping, or an ascription.
997        if self.eat(&TokenKind::ColonColon) {
998            let ty = self.parse_type_expr()?;
999            self.expect(&TokenKind::RParen, "after type ascription")?;
1000            return Ok(Expr::Ascription { value: Box::new(first), ty });
1001        }
1002        if self.eat(&TokenKind::Comma) {
1003            let mut items = vec![first];
1004            if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
1005                items.push(self.parse_expr()?);
1006                while self.eat(&TokenKind::Comma) {
1007                    if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
1008                    items.push(self.parse_expr()?);
1009                }
1010            }
1011            self.expect(&TokenKind::RParen, "after tuple")?;
1012            Ok(Expr::TupleLit(items))
1013        } else {
1014            self.expect(&TokenKind::RParen, "after parenthesized expression")?;
1015            Ok(first)
1016        }
1017    }
1018
1019    fn parse_ident_or_record(&mut self) -> Result<Expr, ParseError> {
1020        // Ident is parsed as a Var; later postfix (`(`, `.`, `?`) attach.
1021        let name = self.expect_ident("")?;
1022        Ok(Expr::Var(name))
1023    }
1024
1025    // --- patterns ---
1026
1027    fn parse_pattern(&mut self) -> Result<Pattern, ParseError> {
1028        self.skip_newlines();
1029        match self.peek() {
1030            Some(TokenKind::Minus) => {
1031                self.bump();
1032                self.skip_newlines();
1033                match self.peek() {
1034                    Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
1035                        TokenKind::Int(n) => Ok(Pattern::Lit(Literal::Int(-n))),
1036                        _ => unreachable!(),
1037                    },
1038                    Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
1039                        TokenKind::Float(n) => Ok(Pattern::Lit(Literal::Float(-n))),
1040                        _ => unreachable!(),
1041                    },
1042                    other => Err(self.error(format!("expected Int or Float after `-` in pattern, got {other:?}"))),
1043                }
1044            }
1045            Some(TokenKind::Underscore) => { self.bump(); Ok(Pattern::Wild) }
1046            Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
1047                TokenKind::Int(n) => Ok(Pattern::Lit(Literal::Int(n))),
1048                _ => unreachable!(),
1049            },
1050            Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
1051                TokenKind::Float(n) => Ok(Pattern::Lit(Literal::Float(n))),
1052                _ => unreachable!(),
1053            },
1054            Some(TokenKind::Str(_)) => match self.bump().unwrap().kind {
1055                TokenKind::Str(s) => Ok(Pattern::Lit(Literal::Str(s))),
1056                _ => unreachable!(),
1057            },
1058            Some(TokenKind::True) => { self.bump(); Ok(Pattern::Lit(Literal::Bool(true))) }
1059            Some(TokenKind::False) => { self.bump(); Ok(Pattern::Lit(Literal::Bool(false))) }
1060            Some(TokenKind::LBrace) => self.parse_record_pattern(),
1061            Some(TokenKind::LParen) => self.parse_tuple_pattern(),
1062            Some(TokenKind::Ident(_)) => {
1063                let mut name = self.expect_ident("")?;
1064                // Handle module-qualified constructor patterns: `module.Constructor(args)`.
1065                // Strip the qualifier and keep only the final name, matching how the
1066                // compiler emits MakeVariant with the unqualified constructor name.
1067                while matches!(self.peek(), Some(TokenKind::Dot)) {
1068                    self.bump();
1069                    name = self.expect_ident("after `.` in qualified pattern")?;
1070                }
1071                if matches!(self.peek(), Some(TokenKind::LParen)) {
1072                    self.bump();
1073                    let mut args = Vec::new();
1074                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
1075                        args.push(self.parse_pattern()?);
1076                        while self.eat(&TokenKind::Comma) {
1077                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
1078                            args.push(self.parse_pattern()?);
1079                        }
1080                    }
1081                    self.expect(&TokenKind::RParen, "after constructor pattern")?;
1082                    Ok(Pattern::Constructor { name, args })
1083                } else {
1084                    Ok(Pattern::Var(name))
1085                }
1086            }
1087            other => Err(self.error(format!("expected pattern, got {other:?}"))),
1088        }
1089    }
1090
1091    fn parse_record_pattern(&mut self) -> Result<Pattern, ParseError> {
1092        self.expect(&TokenKind::LBrace, "")?;
1093        let mut fields = Vec::new();
1094        let rest = false;
1095        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
1096            loop {
1097                self.skip_newlines();
1098                let name = self.expect_ident("in record pattern")?;
1099                let pattern = if self.eat(&TokenKind::Colon) {
1100                    Some(self.parse_pattern()?)
1101                } else {
1102                    None
1103                };
1104                fields.push(RecordPatField { name, pattern });
1105                self.skip_newlines();
1106                if !self.eat(&TokenKind::Comma) { break; }
1107                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
1108            }
1109        }
1110        self.expect(&TokenKind::RBrace, "after record pattern")?;
1111        Ok(Pattern::Record { fields, rest })
1112    }
1113
1114    fn parse_tuple_pattern(&mut self) -> Result<Pattern, ParseError> {
1115        self.expect(&TokenKind::LParen, "")?;
1116        let mut items = Vec::new();
1117        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
1118            items.push(self.parse_pattern()?);
1119            while self.eat(&TokenKind::Comma) {
1120                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
1121                items.push(self.parse_pattern()?);
1122            }
1123        }
1124        self.expect(&TokenKind::RParen, "after tuple pattern")?;
1125        if items.len() == 1 {
1126            Ok(items.into_iter().next().unwrap())
1127        } else {
1128            Ok(Pattern::Tuple(items))
1129        }
1130    }
1131}
1132
1133/// In a union RHS, every leaf must be a `Named` type expression — that is, a
1134/// PascalCase ident with optional payload via `Variant(payload_type)`.
1135fn type_to_variant(t: TypeExpr) -> Result<UnionVariant, ParseError> {
1136    match t {
1137        TypeExpr::Named { name, args } => {
1138            let payload = match args.len() {
1139                0 => None,
1140                1 => Some(args.into_iter().next().unwrap()),
1141                _ => Some(TypeExpr::Tuple(args)),
1142            };
1143            Ok(UnionVariant { name, payload })
1144        }
1145        // `Foo({ field :: T })` parses as Named with one arg = Record. handled above.
1146        _ => Err(ParseError {
1147            pos: 0,
1148            msg: "union variant must be a constructor name".into(),
1149        }),
1150    }
1151}
1152
1153/// Attach a collected list of `#` comments to whichever top-level
1154/// item variant carries them. Empty input is a no-op; the per-variant
1155/// `leading_comments: Vec<String>` field is always present.
1156fn attach_leading_comments(item: &mut Item, comments: Vec<String>) {
1157    if comments.is_empty() {
1158        return;
1159    }
1160    match item {
1161        Item::Import(i) => i.leading_comments = comments,
1162        Item::TypeDecl(t) => t.leading_comments = comments,
1163        Item::FnDecl(f) => f.leading_comments = comments,
1164    }
1165}