Skip to main content

lex_syntax/
parser.rs

1//! Recursive-descent parser for Lex. Pratt-style precedence climbing for
2//! binary operators; everything else is straightforward LL(1)-with-lookahead.
3
4use crate::syntax::*;
5use crate::token::{Token, TokenKind, lex as lex_tokens};
6
7pub fn parse(tokens: Vec<Token>) -> Result<Program, ParseError> {
8    // Back-compat entry: no source available, so leading comments are
9    // not recovered. `parse_source` in `lib.rs` calls
10    // `parse_with_src` directly and is the path that preserves them.
11    parse_with_src("", tokens)
12}
13
14/// Parse + attach `#` line-comments to the AST. The source string is
15/// used only to scan the gaps between tokens (where the lexer skipped
16/// whitespace and comments); the parser itself still operates purely
17/// on tokens. See `Program::leading_comments` for the data model.
18pub fn parse_with_src(src: &str, tokens: Vec<Token>) -> Result<Program, ParseError> {
19    let mut p = Parser::new(src, tokens);
20    let program = p.parse_program()?;
21    p.skip_newlines();
22    if !p.at_eof() {
23        return Err(p.error("unexpected token after program"));
24    }
25    Ok(program)
26}
27
28#[derive(Debug, thiserror::Error)]
29#[error("parse error at byte {pos}: {msg}")]
30pub struct ParseError {
31    pub pos: usize,
32    pub msg: String,
33}
34
35struct Parser<'a> {
36    /// Source text — needed only for trivia recovery (line comments
37    /// in the gaps between tokens). Empty when called through the
38    /// legacy `parse(tokens)` entry; comments are silently dropped
39    /// in that path.
40    src: &'a str,
41    tokens: Vec<Token>,
42    idx: usize,
43    /// Recursion depth across `parse_expr`. Capped at `MAX_DEPTH`
44    /// to defend against adversarial input like a long sequence of
45    /// `[[[{{{...` that would otherwise blow the stack. Found by
46    /// the libFuzzer parser target — see `fuzz/fuzz_targets/parser.rs`.
47    depth: u32,
48    /// Counter for `let _ := ...` discard bindings (#200). Each
49    /// discard gets a unique synthetic name so multiple `let _`
50    /// in the same scope shadow rather than collide. The names
51    /// aren't expressible in user syntax (`__lex_discard_N`),
52    /// so user code can't reference them by accident.
53    discard_counter: u32,
54}
55
56/// Maximum nesting depth the parser will accept before refusing
57/// with a parse error. Real Lex code rarely exceeds 30; 96 leaves
58/// generous headroom for legitimate generated code.
59///
60/// Each `parse_expr` level produces ~4-5 stack frames through the
61/// `parse_binary_expr → parse_unary_expr → parse_postfix →
62/// parse_primary → ...` chain, so this caps the actual frame
63/// count around 400-500 — well below even a 2 MiB test stack.
64const MAX_DEPTH: u32 = 96;
65
66/// A segment of a string interpolation literal (#562).
67enum InterpPart<'a> {
68    Text(&'a str),
69    Expr(&'a str),
70}
71
72/// Split `s` into text and `{expr}` segments. Tracks brace depth so
73/// record literals inside interpolations don't confuse the scanner.
74fn split_interp_parts(s: &str) -> Result<Vec<InterpPart<'_>>, String> {
75    let mut parts = Vec::new();
76    let mut rest = s;
77    while let Some(open) = rest.find('{') {
78        if open > 0 {
79            parts.push(InterpPart::Text(&rest[..open]));
80        }
81        let after_open = &rest[open + 1..];
82        let close = find_closing_brace(after_open)
83            .ok_or_else(|| "unclosed `{` in string interpolation".to_string())?;
84        let expr_content = &after_open[..close];
85        if expr_content.trim().is_empty() {
86            return Err("empty `{}` in string interpolation".to_string());
87        }
88        parts.push(InterpPart::Expr(expr_content));
89        rest = &after_open[close + 1..];
90    }
91    if !rest.is_empty() {
92        parts.push(InterpPart::Text(rest));
93    }
94    Ok(parts)
95}
96
97/// Find the closing `}` matching the opening `{` that was already consumed.
98/// Tracks nested brace depth so `{a: {x: 1}}` returns the outer `}`.
99fn find_closing_brace(s: &str) -> Option<usize> {
100    let mut depth: usize = 1;
101    for (i, c) in s.char_indices() {
102        match c {
103            '{' => depth += 1,
104            '}' => {
105                depth -= 1;
106                if depth == 0 {
107                    return Some(i);
108                }
109            }
110            _ => {}
111        }
112    }
113    None
114}
115
116/// Build a `str.concat(left, right)` call expression.
117fn str_concat_expr(left: Expr, right: Expr) -> Expr {
118    Expr::Call {
119        callee: Box::new(Expr::Field {
120            value: Box::new(Expr::Var("str".into())),
121            field: "concat".into(),
122        }),
123        args: vec![left, right],
124    }
125}
126
127impl<'a> Parser<'a> {
128    fn new(src: &'a str, tokens: Vec<Token>) -> Self {
129        Self { src, tokens, idx: 0, depth: 0, discard_counter: 0 }
130    }
131
132    /// Desugar `"hello {name}"` to `str.concat("hello ", name)` (#562).
133    /// Calls the lexer on each `{...}` content and parses it as an expression.
134    fn desugar_string_interpolation(&self, s: &str) -> Result<Expr, ParseError> {
135        let parts = match split_interp_parts(s) {
136            Ok(p) => p,
137            Err(msg) => return Err(ParseError { pos: 0, msg }),
138        };
139        let mut exprs: Vec<Expr> = Vec::new();
140        for part in parts {
141            match part {
142                InterpPart::Text(t) => {
143                    if !t.is_empty() {
144                        exprs.push(Expr::Lit(Literal::Str(t.to_string())));
145                    }
146                }
147                InterpPart::Expr(content) => {
148                    let tokens = lex_tokens(content.trim()).map_err(|e| ParseError {
149                        pos: 0,
150                        msg: format!("in string interpolation `{{{content}}}`': {e}"),
151                    })?;
152                    let mut sub = Parser::new("", tokens);
153                    exprs.push(sub.parse_expr()?);
154                    if !sub.at_eof() {
155                        return Err(ParseError {
156                            pos: 0,
157                            msg: format!("unexpected tokens after expression in `{{{content}}}`"),
158                        });
159                    }
160                }
161            }
162        }
163        if exprs.is_empty() {
164            return Ok(Expr::Lit(Literal::Str(String::new())));
165        }
166        let mut result = exprs.remove(0);
167        for next in exprs {
168            result = str_concat_expr(result, next);
169        }
170        Ok(result)
171    }
172
173    /// Extract `#` line-comments from the source byte range
174    /// `start..end`. The range must be a "gap" between two tokens
175    /// (or between source-start/end and a token); by construction
176    /// such gaps contain only whitespace and `#` comments — string
177    /// contents never appear because the lexer's logos rules would
178    /// have produced a `Str(...)` token covering them.
179    ///
180    /// Each returned entry is a single source line, trimmed of leading
181    /// whitespace (the `#` and everything after it preserved) and of
182    /// trailing whitespace. Blank lines between consecutive comments
183    /// are dropped — preserving inter-comment blank lines is left to
184    /// a follow-up; the bug this addresses (#417) is about comments
185    /// disappearing entirely.
186    fn extract_comments(&self, start: usize, end: usize) -> Vec<String> {
187        if start >= end || end > self.src.len() {
188            return Vec::new();
189        }
190        self.src[start..end]
191            .lines()
192            .filter_map(|line| {
193                let trimmed = line.trim_start();
194                if trimmed.starts_with('#') {
195                    Some(trimmed.trim_end().to_string())
196                } else {
197                    None
198                }
199            })
200            .collect()
201    }
202
203    fn at_eof(&self) -> bool {
204        self.idx >= self.tokens.len()
205    }
206
207    fn peek(&self) -> Option<&TokenKind> {
208        self.tokens.get(self.idx).map(|t| &t.kind)
209    }
210
211    fn bump(&mut self) -> Option<Token> {
212        let t = self.tokens.get(self.idx).cloned();
213        if t.is_some() {
214            self.idx += 1;
215        }
216        t
217    }
218
219    fn current_pos(&self) -> usize {
220        self.tokens
221            .get(self.idx)
222            .map(|t| t.span.start)
223            .unwrap_or_else(|| self.tokens.last().map(|t| t.span.end).unwrap_or(0))
224    }
225
226    fn error(&self, msg: impl Into<String>) -> ParseError {
227        ParseError { pos: self.current_pos(), msg: msg.into() }
228    }
229
230    fn skip_newlines(&mut self) {
231        while matches!(self.peek(), Some(TokenKind::Newline) | Some(TokenKind::Semi)) {
232            self.idx += 1;
233        }
234    }
235
236    fn expect(&mut self, expected: &TokenKind, ctx: &str) -> Result<Token, ParseError> {
237        self.skip_newlines();
238        match self.peek() {
239            Some(k) if std::mem::discriminant(k) == std::mem::discriminant(expected) => {
240                Ok(self.bump().unwrap())
241            }
242            Some(other) => Err(self.error(format!(
243                "expected {expected:?} {ctx}, got {other:?}"
244            ))),
245            None => Err(self.error(format!("expected {expected:?} {ctx}, got EOF"))),
246        }
247    }
248
249    fn eat(&mut self, k: &TokenKind) -> bool {
250        self.skip_newlines();
251        if let Some(cur) = self.peek() {
252            if std::mem::discriminant(cur) == std::mem::discriminant(k) {
253                self.bump();
254                return true;
255            }
256        }
257        false
258    }
259
260    fn expect_ident(&mut self, ctx: &str) -> Result<String, ParseError> {
261        self.skip_newlines();
262        match self.peek() {
263            Some(TokenKind::Ident(_)) => match self.bump().unwrap().kind {
264                TokenKind::Ident(name) => Ok(name),
265                _ => unreachable!(),
266            },
267            other => Err(self.error(format!("expected identifier {ctx}, got {other:?}"))),
268        }
269    }
270
271    // --- top level ---
272
273    fn parse_program(&mut self) -> Result<Program, ParseError> {
274        let mut items = Vec::new();
275        let mut leading_comments: Vec<String> = Vec::new();
276        // Byte offset of the end of the last consumed token (or 0 at
277        // start of file). The next gap to scan for comments is from
278        // here up to the start of the upcoming item's first token.
279        let mut gap_start: usize = 0;
280        loop {
281            self.skip_newlines();
282            if self.at_eof() {
283                break;
284            }
285            let item_start = self.tokens[self.idx].span.start;
286            let gap_comments = self.extract_comments(gap_start, item_start);
287            let pending_comments = if items.is_empty() {
288                leading_comments = gap_comments;
289                Vec::new()
290            } else {
291                gap_comments
292            };
293            let mut item = self.parse_item()?;
294            if !pending_comments.is_empty() {
295                attach_leading_comments(&mut item, pending_comments);
296            }
297            gap_start = self
298                .tokens
299                .get(self.idx.saturating_sub(1))
300                .map(|t| t.span.end)
301                .unwrap_or(gap_start);
302            items.push(item);
303        }
304        // Trailing comments live after the last consumed token (or
305        // span the whole file when there are no items).
306        let trailing_comments = self.extract_comments(gap_start, self.src.len());
307        Ok(Program { items, leading_comments, trailing_comments })
308    }
309
310    fn parse_item(&mut self) -> Result<Item, ParseError> {
311        match self.peek() {
312            Some(TokenKind::Import) => self.parse_import().map(Item::Import),
313            Some(TokenKind::Type) => self.parse_type_decl().map(Item::TypeDecl),
314            Some(TokenKind::Fn) => self.parse_fn_decl().map(Item::FnDecl),
315            other => Err(self.error(format!(
316                "expected `import`, `type`, or `fn` at top level, got {other:?}"
317            ))),
318        }
319    }
320
321    fn parse_import(&mut self) -> Result<Import, ParseError> {
322        self.expect(&TokenKind::Import, "in import")?;
323        let reference = match self.bump().map(|t| t.kind) {
324            Some(TokenKind::Str(s)) => s,
325            other => return Err(self.error(format!("expected string after `import`, got {other:?}"))),
326        };
327        self.expect(&TokenKind::As, "in import")?;
328        let alias = self.expect_ident("for import alias")?;
329        Ok(Import { reference, alias, leading_comments: Vec::new() })
330    }
331
332    fn parse_type_decl(&mut self) -> Result<TypeDecl, ParseError> {
333        self.expect(&TokenKind::Type, "in type decl")?;
334        let name = self.expect_ident("for type name")?;
335        let params = if self.eat(&TokenKind::LBracket) {
336            let ps = self.parse_ident_list()?;
337            self.expect(&TokenKind::RBracket, "after type params")?;
338            ps
339        } else {
340            Vec::new()
341        };
342        self.expect(&TokenKind::Eq, "in type decl")?;
343        let definition = self.parse_type_decl_rhs()?;
344        Ok(TypeDecl { name, params, definition, leading_comments: Vec::new() })
345    }
346
347    fn parse_ident_list(&mut self) -> Result<Vec<String>, ParseError> {
348        let mut out = Vec::new();
349        out.push(self.expect_ident("in identifier list")?);
350        while self.eat(&TokenKind::Comma) {
351            if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
352            out.push(self.expect_ident("in identifier list")?);
353        }
354        Ok(out)
355    }
356
357    /// `type Foo = Variant1 | Variant2(Payload)` is a union; otherwise a plain type expression.
358    fn parse_type_decl_rhs(&mut self) -> Result<TypeExpr, ParseError> {
359        let first = self.parse_type_expr()?;
360        // Detect union: PascalCase ident (or named type w/ optional payload) followed by `|`.
361        if matches!(self.peek_skip_newlines(), Some(TokenKind::Bar)) {
362            let mut variants = Vec::new();
363            variants.push(type_to_variant(first)?);
364            while self.eat(&TokenKind::Bar) {
365                let next = self.parse_type_expr()?;
366                variants.push(type_to_variant(next)?);
367            }
368            Ok(TypeExpr::Union(variants))
369        } else {
370            // Single-variant union without `|`: `type Msg = Execute(Str)`.
371            // The `(...)` constructor-payload syntax is distinguishable from
372            // `[...]` type-application by checking the last consumed token.
373            // `Execute(Str)` ends with `)`, `List[Int]` ends with `]`,
374            // `AnotherType` ends with the ident token.
375            let last_was_rparen = self.idx > 0 && matches!(
376                self.tokens.get(self.idx - 1).map(|t| &t.kind),
377                Some(TokenKind::RParen)
378            );
379            if last_was_rparen {
380                if let TypeExpr::Named { ref name, .. } = first {
381                    let unqual = name.split('.').next_back().unwrap_or(name.as_str());
382                    if unqual.chars().next().map(|c| c.is_ascii_uppercase()).unwrap_or(false) {
383                        return Ok(TypeExpr::Union(vec![type_to_variant(first)?]));
384                    }
385                }
386            }
387            Ok(first)
388        }
389    }
390
391    fn peek_skip_newlines(&mut self) -> Option<TokenKind> {
392        let saved = self.idx;
393        self.skip_newlines();
394        let out = self.peek().cloned();
395        self.idx = saved;
396        out
397    }
398
399    fn parse_type_expr(&mut self) -> Result<TypeExpr, ParseError> {
400        let base = self.parse_type_expr_base()?;
401        self.maybe_wrap_refinement(base)
402    }
403
404    fn parse_type_expr_base(&mut self) -> Result<TypeExpr, ParseError> {
405        self.skip_newlines();
406        match self.peek() {
407            Some(TokenKind::LBrace) => self.parse_record_type(),
408            Some(TokenKind::LParen) => self.parse_paren_type_or_function(),
409            Some(TokenKind::Ident(_)) => {
410                let mut name = self.expect_ident("in type expr")?;
411                // Module-qualified type: `m.Type` or `m.n.Type`. We accept
412                // dotted names here and let the loader rewrite them to the
413                // file-local mangled form. After the loader pass, all type
414                // names referenced by the type checker are single segments.
415                while matches!(self.peek(), Some(TokenKind::Dot)) {
416                    self.bump();
417                    let next = self.expect_ident("after `.` in qualified type")?;
418                    name.push('.');
419                    name.push_str(&next);
420                }
421                let args = if matches!(self.peek(), Some(TokenKind::LBracket)) {
422                    self.bump();
423                    let mut args = Vec::new();
424                    args.push(self.parse_type_expr()?);
425                    while self.eat(&TokenKind::Comma) {
426                        if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
427                        args.push(self.parse_type_expr()?);
428                    }
429                    self.expect(&TokenKind::RBracket, "after type args")?;
430                    args
431                } else if matches!(self.peek(), Some(TokenKind::LParen)) {
432                    // Constructor type with payload: `Name(T)` or `Name(T1, T2)`.
433                    self.bump();
434                    let mut args = Vec::new();
435                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
436                        args.push(self.parse_type_expr()?);
437                        while self.eat(&TokenKind::Comma) {
438                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
439                            args.push(self.parse_type_expr()?);
440                        }
441                    }
442                    self.expect(&TokenKind::RParen, "after constructor payload")?;
443                    args
444                } else {
445                    Vec::new()
446                };
447                Ok(TypeExpr::Named { name, args })
448            }
449            other => Err(self.error(format!("expected type expression, got {other:?}"))),
450        }
451    }
452
453    /// Refinement type postfix (#209): `BaseType{binding | predicate}`.
454    ///
455    /// Disambiguates from a function body's opening brace by peeking
456    /// three tokens ahead — refinement requires `{ Ident |`, a body
457    /// begins with `{ <expr-starting-token>`. This means a refinement
458    /// binding name can't start with `|`, but that's fine since
459    /// identifiers don't.
460    fn maybe_wrap_refinement(&mut self, base: TypeExpr) -> Result<TypeExpr, ParseError> {
461        let next0 = self.tokens.get(self.idx).map(|t| &t.kind);
462        let next1 = self.tokens.get(self.idx + 1).map(|t| &t.kind);
463        let next2 = self.tokens.get(self.idx + 2).map(|t| &t.kind);
464        let is_refinement_lookahead = matches!(next0, Some(TokenKind::LBrace))
465            && matches!(next1, Some(TokenKind::Ident(_)))
466            && matches!(next2, Some(TokenKind::Bar));
467        if !is_refinement_lookahead {
468            return Ok(base);
469        }
470        self.bump(); // `{`
471        let binding = self.expect_ident("for refinement binding")?;
472        self.expect(&TokenKind::Bar, "after refinement binding")?;
473        let predicate = self.parse_expr()?;
474        self.expect(&TokenKind::RBrace, "to close refinement")?;
475        Ok(TypeExpr::Refined {
476            base: Box::new(base),
477            binding,
478            predicate: Box::new(predicate),
479        })
480    }
481
482    fn parse_record_type(&mut self) -> Result<TypeExpr, ParseError> {
483        self.expect(&TokenKind::LBrace, "in record type")?;
484        let mut fields = Vec::new();
485        let mut spreads = Vec::new();
486        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
487            loop {
488                self.skip_newlines();
489                if matches!(self.peek(), Some(TokenKind::DotDotDot)) {
490                    self.bump(); // consume `...`
491                    let name = self.expect_ident("after `...` in record type spread")?;
492                    spreads.push(name);
493                } else {
494                    let name = self.expect_ident("in record field")?;
495                    self.expect(&TokenKind::ColonColon, "after record field name")?;
496                    let ty = self.parse_type_expr()?;
497                    fields.push(TypeField { name, ty });
498                }
499                self.skip_newlines();
500                if !self.eat(&TokenKind::Comma) { break; }
501                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
502            }
503        }
504        self.expect(&TokenKind::RBrace, "in record type")?;
505        if spreads.is_empty() {
506            Ok(TypeExpr::Record(fields))
507        } else {
508            Ok(TypeExpr::RecordWithSpreads { spreads, fields })
509        }
510    }
511
512    fn parse_paren_type_or_function(&mut self) -> Result<TypeExpr, ParseError> {
513        self.expect(&TokenKind::LParen, "in type")?;
514        let mut args = Vec::new();
515        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
516            args.push(self.parse_type_expr()?);
517            while self.eat(&TokenKind::Comma) {
518                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
519                args.push(self.parse_type_expr()?);
520            }
521        }
522        self.expect(&TokenKind::RParen, "in type")?;
523        // Function type if followed by `->`.
524        if matches!(self.peek_skip_newlines(), Some(TokenKind::Arrow)) {
525            self.skip_newlines();
526            self.bump();
527            let effects = self.parse_effects()?;
528            let ret = self.parse_type_expr()?;
529            Ok(TypeExpr::Function {
530                params: args,
531                effects,
532                ret: Box::new(ret),
533            })
534        } else if args.len() == 1 {
535            // Parenthesized type expression.
536            Ok(args.into_iter().next().unwrap())
537        } else {
538            Ok(TypeExpr::Tuple(args))
539        }
540    }
541
542    fn parse_fn_decl(&mut self) -> Result<FnDecl, ParseError> {
543        self.expect(&TokenKind::Fn, "in fn decl")?;
544        let name = self.expect_ident("for function name")?;
545        let type_params = if self.eat(&TokenKind::LBracket) {
546            let ps = self.parse_ident_list()?;
547            self.expect(&TokenKind::RBracket, "after type params")?;
548            ps
549        } else {
550            Vec::new()
551        };
552        self.expect(&TokenKind::LParen, "before params")?;
553        let mut params = Vec::new();
554        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
555            params.push(self.parse_param()?);
556            while self.eat(&TokenKind::Comma) {
557                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
558                params.push(self.parse_param()?);
559            }
560        }
561        self.expect(&TokenKind::RParen, "after params")?;
562        self.expect(&TokenKind::Arrow, "before return type")?;
563        let effects = self.parse_effects()?;
564        let return_type = self.parse_type_expr()?;
565        let examples = self.parse_examples_block()?;
566        let body = self.parse_block()?;
567        Ok(FnDecl { name, type_params, params, effects, return_type, body, examples, leading_comments: Vec::new() })
568    }
569
570    /// Parse an optional `examples { call(a, b) => expected, ... }` block
571    /// sitting between the return type and the body (#369). Returns an
572    /// empty vec when no block is present.
573    fn parse_examples_block(&mut self) -> Result<Vec<Example>, ParseError> {
574        // Contextual: not a reserved keyword. Peek for the literal
575        // identifier `examples` followed by `{`; otherwise no block.
576        let is_examples_kw = matches!(
577            self.peek_skip_newlines(),
578            Some(TokenKind::Ident(s)) if s == "examples"
579        );
580        if !is_examples_kw {
581            return Ok(Vec::new());
582        }
583        self.skip_newlines();
584        self.bump(); // consume `examples`
585        self.expect(&TokenKind::LBrace, "after `examples`")?;
586        let mut cases = Vec::new();
587        loop {
588            self.skip_newlines();
589            if matches!(self.peek(), Some(TokenKind::RBrace)) { break; }
590            let call = self.parse_expr()?;
591            self.expect(&TokenKind::FatArrow, "in example case (between call and expected)")?;
592            let expected = self.parse_expr()?;
593            let (args, _) = match call {
594                Expr::Call { callee: _, args } => (args, ()),
595                other => return Err(self.error(
596                    format!("example case must be a call to the function under definition; got {other:?}")
597                )),
598            };
599            cases.push(Example { args, expected });
600            self.skip_newlines();
601            if !self.eat(&TokenKind::Comma) {
602                self.skip_newlines();
603                break;
604            }
605        }
606        self.expect(&TokenKind::RBrace, "to close examples block")?;
607        Ok(cases)
608    }
609
610    fn parse_param(&mut self) -> Result<Param, ParseError> {
611        let name = if matches!(self.peek_skip_newlines(), Some(TokenKind::Underscore)) {
612            self.skip_newlines();
613            self.bump();
614            self.discard_counter += 1;
615            format!("__lex_discard_{}", self.discard_counter)
616        } else {
617            self.expect_ident("for parameter name")?
618        };
619        self.expect(&TokenKind::ColonColon, "after parameter name")?;
620        let ty = self.parse_type_expr()?;
621        Ok(Param { name, ty })
622    }
623
624    fn parse_effects(&mut self) -> Result<Vec<Effect>, ParseError> {
625        if !self.eat(&TokenKind::LBracket) {
626            return Ok(Vec::new());
627        }
628        let mut out = Vec::new();
629        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) {
630            out.push(self.parse_effect()?);
631            while self.eat(&TokenKind::Comma) {
632                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
633                out.push(self.parse_effect()?);
634            }
635        }
636        self.expect(&TokenKind::RBracket, "after effects")?;
637        Ok(out)
638    }
639
640    fn parse_effect(&mut self) -> Result<Effect, ParseError> {
641        let name = self.expect_ident("for effect name")?;
642        let arg = if self.eat(&TokenKind::LParen) {
643            let arg = match self.bump().map(|t| t.kind) {
644                Some(TokenKind::Str(s)) => EffectArg::Str(s),
645                Some(TokenKind::Int(n)) => EffectArg::Int(n),
646                Some(TokenKind::Ident(s)) => EffectArg::Ident(s),
647                other => return Err(self.error(format!("invalid effect arg: {other:?}"))),
648            };
649            self.expect(&TokenKind::RParen, "after effect arg")?;
650            Some(arg)
651        } else {
652            None
653        };
654        Ok(Effect { name, arg })
655    }
656
657    // --- blocks and statements ---
658
659    fn parse_block(&mut self) -> Result<Block, ParseError> {
660        self.expect(&TokenKind::LBrace, "before block")?;
661        let mut statements = Vec::new();
662        let result;
663        loop {
664            self.skip_newlines();
665            if matches!(self.peek(), Some(TokenKind::RBrace)) {
666                // Empty block: synthesize Unit literal.
667                result = Box::new(Expr::Lit(Literal::Unit));
668                break;
669            }
670            // Try parsing a let; otherwise an expression.
671            if matches!(self.peek(), Some(TokenKind::Let)) {
672                let stmt = self.parse_let_statement()?;
673                statements.push(stmt);
674                self.skip_newlines();
675                continue;
676            }
677            let expr = self.parse_expr()?;
678            self.skip_newlines();
679            // If the next token is `}`, this expression is the block's result.
680            if matches!(self.peek(), Some(TokenKind::RBrace)) {
681                result = Box::new(expr);
682                break;
683            }
684            statements.push(Statement::Expr(expr));
685        }
686        self.expect(&TokenKind::RBrace, "to close block")?;
687        Ok(Block { statements, result })
688    }
689
690    fn parse_let_statement(&mut self) -> Result<Statement, ParseError> {
691        self.expect(&TokenKind::Let, "in let")?;
692        // `let _ := expr` is the discard idiom (#200). The RHS is
693        // still evaluated for its effect, but the result is bound
694        // to a synthetic name nothing else references — so the
695        // type-checker / VM treat it like a normal let, but user
696        // code can't accidentally reach it.
697        let name = if matches!(self.peek_skip_newlines(), Some(TokenKind::Underscore)) {
698            self.skip_newlines();
699            self.bump();
700            self.discard_counter += 1;
701            format!("__lex_discard_{}", self.discard_counter)
702        } else {
703            self.expect_ident("after `let`")?
704        };
705        let ty = if self.eat(&TokenKind::ColonColon) {
706            Some(self.parse_type_expr()?)
707        } else {
708            None
709        };
710        self.expect(&TokenKind::ColonEq, "in let")?;
711        let value = self.parse_expr()?;
712        Ok(Statement::Let { name, ty, value })
713    }
714
715    // --- expressions ---
716
717    fn parse_expr(&mut self) -> Result<Expr, ParseError> {
718        // Recursion gate: every nested expression — match arms,
719        // tuple/list/record/block elements, function args, etc. —
720        // enters here, so this is the right place to bound depth.
721        // Decrement happens whether the inner call succeeds or fails.
722        if self.depth >= MAX_DEPTH {
723            return Err(ParseError {
724                pos: self.current_pos(),
725                msg: format!(
726                    "expression nests too deeply (max {MAX_DEPTH}); \
727                     malformed or hand-crafted input?"),
728            });
729        }
730        self.depth += 1;
731        let r = self.parse_expr_inner();
732        self.depth -= 1;
733        r
734    }
735
736    fn parse_expr_inner(&mut self) -> Result<Expr, ParseError> {
737        // Pipes are left-associative and bind less tightly than binary ops.
738        let mut left = self.parse_binary_expr(0)?;
739        while matches!(self.peek_skip_newlines(), Some(TokenKind::Pipe)) {
740            self.skip_newlines();
741            self.bump();
742            let right = self.parse_binary_expr(0)?;
743            left = Expr::Pipe { left: Box::new(left), right: Box::new(right) };
744        }
745        Ok(left)
746    }
747
748    fn parse_binary_expr(&mut self, min_prec: u8) -> Result<Expr, ParseError> {
749        let mut lhs = self.parse_unary()?;
750        loop {
751            let op = match self.peek_binop() {
752                Some(op) if op.precedence() >= min_prec => op,
753                _ => break,
754            };
755            self.skip_newlines();
756            self.bump();
757            let rhs = self.parse_binary_expr(op.precedence() + 1)?;
758            lhs = Expr::BinOp { op, lhs: Box::new(lhs), rhs: Box::new(rhs) };
759        }
760        Ok(lhs)
761    }
762
763    fn peek_binop(&mut self) -> Option<BinOp> {
764        match self.peek_skip_newlines()? {
765            TokenKind::Plus => Some(BinOp::Add),
766            TokenKind::Minus => Some(BinOp::Sub),
767            TokenKind::Star => Some(BinOp::Mul),
768            TokenKind::Slash => Some(BinOp::Div),
769            TokenKind::Percent => Some(BinOp::Mod),
770            TokenKind::EqEq => Some(BinOp::Eq),
771            TokenKind::BangEq => Some(BinOp::Neq),
772            TokenKind::Lt => Some(BinOp::Lt),
773            TokenKind::LtEq => Some(BinOp::Lte),
774            TokenKind::Gt => Some(BinOp::Gt),
775            TokenKind::GtEq => Some(BinOp::Gte),
776            TokenKind::And => Some(BinOp::And),
777            TokenKind::Or => Some(BinOp::Or),
778            _ => None,
779        }
780    }
781
782    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
783        self.skip_newlines();
784        match self.peek() {
785            Some(TokenKind::Not) => {
786                self.bump();
787                let inner = self.parse_unary()?;
788                Ok(Expr::UnaryOp { op: UnaryOp::Not, expr: Box::new(inner) })
789            }
790            Some(TokenKind::Minus) => {
791                self.bump();
792                let inner = self.parse_unary()?;
793                Ok(Expr::UnaryOp { op: UnaryOp::Neg, expr: Box::new(inner) })
794            }
795            _ => self.parse_postfix(),
796        }
797    }
798
799    fn parse_postfix(&mut self) -> Result<Expr, ParseError> {
800        let mut expr = self.parse_primary()?;
801        loop {
802            // Postfix operations don't cross newlines (they bind tightly).
803            match self.peek() {
804                Some(TokenKind::Dot) => {
805                    self.bump();
806                    let field = self.expect_ident("after `.`")?;
807                    expr = Expr::Field { value: Box::new(expr), field };
808                }
809                Some(TokenKind::LParen) => {
810                    self.bump();
811                    let mut args = Vec::new();
812                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
813                        args.push(self.parse_expr()?);
814                        while self.eat(&TokenKind::Comma) {
815                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
816                            args.push(self.parse_expr()?);
817                        }
818                    }
819                    self.expect(&TokenKind::RParen, "in call")?;
820                    expr = Expr::Call { callee: Box::new(expr), args };
821                }
822                Some(TokenKind::Question) => {
823                    self.bump();
824                    expr = Expr::Try(Box::new(expr));
825                }
826                _ => break,
827            }
828        }
829        Ok(expr)
830    }
831
832    fn parse_primary(&mut self) -> Result<Expr, ParseError> {
833        self.skip_newlines();
834        match self.peek() {
835            Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
836                TokenKind::Int(n) => Ok(Expr::Lit(Literal::Int(n))),
837                _ => unreachable!(),
838            },
839            Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
840                TokenKind::Float(n) => Ok(Expr::Lit(Literal::Float(n))),
841                _ => unreachable!(),
842            },
843            Some(TokenKind::Str(_)) => match self.bump().unwrap().kind {
844                TokenKind::Str(s) => Ok(Expr::Lit(Literal::Str(s))),
845                _ => unreachable!(),
846            },
847            Some(TokenKind::FStr(_)) => match self.bump().unwrap().kind {
848                TokenKind::FStr(s) => self.desugar_string_interpolation(&s),
849                _ => unreachable!(),
850            },
851            Some(TokenKind::Bytes(_)) => match self.bump().unwrap().kind {
852                TokenKind::Bytes(b) => Ok(Expr::Lit(Literal::Bytes(b))),
853                _ => unreachable!(),
854            },
855            Some(TokenKind::True) => { self.bump(); Ok(Expr::Lit(Literal::Bool(true))) }
856            Some(TokenKind::False) => { self.bump(); Ok(Expr::Lit(Literal::Bool(false))) }
857            Some(TokenKind::If) => self.parse_if(),
858            Some(TokenKind::Match) => self.parse_match(),
859            Some(TokenKind::Fn) => self.parse_lambda(),
860            Some(TokenKind::LBrace) => self.parse_brace_expr(),
861            Some(TokenKind::LBracket) => self.parse_list_literal(),
862            Some(TokenKind::LParen) => self.parse_paren_or_tuple(),
863            Some(TokenKind::Ident(_)) => self.parse_ident_or_record(),
864            other => Err(self.error(format!("expected expression, got {other:?}"))),
865        }
866    }
867
868    /// Disambiguate `{` between record literal and block.
869    /// Lookahead: `{ Ident :` is a record literal; `{ }` is also a record
870    /// (empty block has no use). Anything else is a block.
871    fn parse_brace_expr(&mut self) -> Result<Expr, ParseError> {
872        // Save position; peek 2-3 tokens past `{` (skipping newlines).
873        let saved = self.idx;
874        self.bump(); // `{`
875        // Skip newlines.
876        while matches!(self.peek(), Some(TokenKind::Newline) | Some(TokenKind::Semi)) {
877            self.idx += 1;
878        }
879        let is_record = matches!(self.peek(), Some(TokenKind::RBrace))
880            || (matches!(self.peek(), Some(TokenKind::Ident(_)))
881                && matches!(self.tokens.get(self.idx + 1).map(|t| &t.kind), Some(TokenKind::Colon) | Some(TokenKind::Comma) | Some(TokenKind::RBrace)));
882        self.idx = saved;
883        if is_record {
884            self.parse_record_literal()
885        } else {
886            Ok(Expr::Block(self.parse_block()?))
887        }
888    }
889
890    fn parse_record_literal(&mut self) -> Result<Expr, ParseError> {
891        self.expect(&TokenKind::LBrace, "in record literal")?;
892        let mut fields = Vec::new();
893        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
894            loop {
895                self.skip_newlines();
896                let name = self.expect_ident("in record literal")?;
897                let value = if self.eat(&TokenKind::Colon) {
898                    self.parse_expr()?
899                } else {
900                    // shorthand: `{ name }` => `{ name: name }`
901                    Expr::Var(name.clone())
902                };
903                fields.push(RecordLitField { name, value });
904                self.skip_newlines();
905                if !self.eat(&TokenKind::Comma) { break; }
906                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
907            }
908        }
909        self.expect(&TokenKind::RBrace, "after record literal")?;
910        Ok(Expr::RecordLit(fields))
911    }
912
913    fn parse_if(&mut self) -> Result<Expr, ParseError> {
914        self.expect(&TokenKind::If, "in if")?;
915        let cond = self.parse_expr()?;
916        let then_block = self.parse_block()?;
917        self.expect(&TokenKind::Else, "expected `else`")?;
918        let else_block = self.parse_block()?;
919        Ok(Expr::If { cond: Box::new(cond), then_block, else_block })
920    }
921
922    fn parse_match(&mut self) -> Result<Expr, ParseError> {
923        self.expect(&TokenKind::Match, "in match")?;
924        let scrutinee = self.parse_expr()?;
925        self.expect(&TokenKind::LBrace, "before match arms")?;
926        let mut arms = Vec::new();
927        loop {
928            self.skip_newlines();
929            if matches!(self.peek(), Some(TokenKind::RBrace)) { break; }
930            let pattern = self.parse_pattern()?;
931            self.expect(&TokenKind::FatArrow, "in match arm")?;
932            let body = self.parse_expr()?;
933            arms.push(Arm { pattern, body });
934            self.skip_newlines();
935            if !self.eat(&TokenKind::Comma) { break; }
936        }
937        self.expect(&TokenKind::RBrace, "after match arms")?;
938        Ok(Expr::Match { scrutinee: Box::new(scrutinee), arms })
939    }
940
941    fn parse_lambda(&mut self) -> Result<Expr, ParseError> {
942        self.expect(&TokenKind::Fn, "in lambda")?;
943        self.expect(&TokenKind::LParen, "before lambda params")?;
944        let mut params = Vec::new();
945        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
946            params.push(self.parse_param()?);
947            while self.eat(&TokenKind::Comma) {
948                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
949                params.push(self.parse_param()?);
950            }
951        }
952        self.expect(&TokenKind::RParen, "after lambda params")?;
953        self.expect(&TokenKind::Arrow, "before lambda return type")?;
954        let effects = self.parse_effects()?;
955        let return_type = self.parse_type_expr()?;
956        let body = self.parse_block()?;
957        Ok(Expr::Lambda(Box::new(Lambda { params, effects, return_type, body })))
958    }
959
960    fn parse_list_literal(&mut self) -> Result<Expr, ParseError> {
961        self.expect(&TokenKind::LBracket, "before list literal")?;
962        let mut items = Vec::new();
963        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) {
964            items.push(self.parse_expr()?);
965            while self.eat(&TokenKind::Comma) {
966                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBracket)) { break; }
967                items.push(self.parse_expr()?);
968            }
969        }
970        self.expect(&TokenKind::RBracket, "after list literal")?;
971        Ok(Expr::ListLit(items))
972    }
973
974    fn parse_paren_or_tuple(&mut self) -> Result<Expr, ParseError> {
975        self.expect(&TokenKind::LParen, "")?;
976        if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
977            self.bump();
978            return Ok(Expr::Lit(Literal::Unit));
979        }
980        let first = self.parse_expr()?;
981        // Inline type ascription: `(expr :: Type)` — peek for `::` before
982        // deciding whether this is a tuple, a grouping, or an ascription.
983        if self.eat(&TokenKind::ColonColon) {
984            let ty = self.parse_type_expr()?;
985            self.expect(&TokenKind::RParen, "after type ascription")?;
986            return Ok(Expr::Ascription { value: Box::new(first), ty });
987        }
988        if self.eat(&TokenKind::Comma) {
989            let mut items = vec![first];
990            if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
991                items.push(self.parse_expr()?);
992                while self.eat(&TokenKind::Comma) {
993                    if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
994                    items.push(self.parse_expr()?);
995                }
996            }
997            self.expect(&TokenKind::RParen, "after tuple")?;
998            Ok(Expr::TupleLit(items))
999        } else {
1000            self.expect(&TokenKind::RParen, "after parenthesized expression")?;
1001            Ok(first)
1002        }
1003    }
1004
1005    fn parse_ident_or_record(&mut self) -> Result<Expr, ParseError> {
1006        // Ident is parsed as a Var; later postfix (`(`, `.`, `?`) attach.
1007        let name = self.expect_ident("")?;
1008        Ok(Expr::Var(name))
1009    }
1010
1011    // --- patterns ---
1012
1013    fn parse_pattern(&mut self) -> Result<Pattern, ParseError> {
1014        self.skip_newlines();
1015        match self.peek() {
1016            Some(TokenKind::Minus) => {
1017                self.bump();
1018                self.skip_newlines();
1019                match self.peek() {
1020                    Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
1021                        TokenKind::Int(n) => Ok(Pattern::Lit(Literal::Int(-n))),
1022                        _ => unreachable!(),
1023                    },
1024                    Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
1025                        TokenKind::Float(n) => Ok(Pattern::Lit(Literal::Float(-n))),
1026                        _ => unreachable!(),
1027                    },
1028                    other => Err(self.error(format!("expected Int or Float after `-` in pattern, got {other:?}"))),
1029                }
1030            }
1031            Some(TokenKind::Underscore) => { self.bump(); Ok(Pattern::Wild) }
1032            Some(TokenKind::Int(_)) => match self.bump().unwrap().kind {
1033                TokenKind::Int(n) => Ok(Pattern::Lit(Literal::Int(n))),
1034                _ => unreachable!(),
1035            },
1036            Some(TokenKind::Float(_)) => match self.bump().unwrap().kind {
1037                TokenKind::Float(n) => Ok(Pattern::Lit(Literal::Float(n))),
1038                _ => unreachable!(),
1039            },
1040            Some(TokenKind::Str(_)) => match self.bump().unwrap().kind {
1041                TokenKind::Str(s) => Ok(Pattern::Lit(Literal::Str(s))),
1042                _ => unreachable!(),
1043            },
1044            Some(TokenKind::True) => { self.bump(); Ok(Pattern::Lit(Literal::Bool(true))) }
1045            Some(TokenKind::False) => { self.bump(); Ok(Pattern::Lit(Literal::Bool(false))) }
1046            Some(TokenKind::LBrace) => self.parse_record_pattern(),
1047            Some(TokenKind::LParen) => self.parse_tuple_pattern(),
1048            Some(TokenKind::Ident(_)) => {
1049                let mut name = self.expect_ident("")?;
1050                // Handle module-qualified constructor patterns: `module.Constructor(args)`.
1051                // Strip the qualifier and keep only the final name, matching how the
1052                // compiler emits MakeVariant with the unqualified constructor name.
1053                while matches!(self.peek(), Some(TokenKind::Dot)) {
1054                    self.bump();
1055                    name = self.expect_ident("after `.` in qualified pattern")?;
1056                }
1057                if matches!(self.peek(), Some(TokenKind::LParen)) {
1058                    self.bump();
1059                    let mut args = Vec::new();
1060                    if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
1061                        args.push(self.parse_pattern()?);
1062                        while self.eat(&TokenKind::Comma) {
1063                            if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
1064                            args.push(self.parse_pattern()?);
1065                        }
1066                    }
1067                    self.expect(&TokenKind::RParen, "after constructor pattern")?;
1068                    Ok(Pattern::Constructor { name, args })
1069                } else {
1070                    Ok(Pattern::Var(name))
1071                }
1072            }
1073            other => Err(self.error(format!("expected pattern, got {other:?}"))),
1074        }
1075    }
1076
1077    fn parse_record_pattern(&mut self) -> Result<Pattern, ParseError> {
1078        self.expect(&TokenKind::LBrace, "")?;
1079        let mut fields = Vec::new();
1080        let rest = false;
1081        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) {
1082            loop {
1083                self.skip_newlines();
1084                let name = self.expect_ident("in record pattern")?;
1085                let pattern = if self.eat(&TokenKind::Colon) {
1086                    Some(self.parse_pattern()?)
1087                } else {
1088                    None
1089                };
1090                fields.push(RecordPatField { name, pattern });
1091                self.skip_newlines();
1092                if !self.eat(&TokenKind::Comma) { break; }
1093                if matches!(self.peek_skip_newlines(), Some(TokenKind::RBrace)) { break; }
1094            }
1095        }
1096        self.expect(&TokenKind::RBrace, "after record pattern")?;
1097        Ok(Pattern::Record { fields, rest })
1098    }
1099
1100    fn parse_tuple_pattern(&mut self) -> Result<Pattern, ParseError> {
1101        self.expect(&TokenKind::LParen, "")?;
1102        let mut items = Vec::new();
1103        if !matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) {
1104            items.push(self.parse_pattern()?);
1105            while self.eat(&TokenKind::Comma) {
1106                if matches!(self.peek_skip_newlines(), Some(TokenKind::RParen)) { break; }
1107                items.push(self.parse_pattern()?);
1108            }
1109        }
1110        self.expect(&TokenKind::RParen, "after tuple pattern")?;
1111        if items.len() == 1 {
1112            Ok(items.into_iter().next().unwrap())
1113        } else {
1114            Ok(Pattern::Tuple(items))
1115        }
1116    }
1117}
1118
1119/// In a union RHS, every leaf must be a `Named` type expression — that is, a
1120/// PascalCase ident with optional payload via `Variant(payload_type)`.
1121fn type_to_variant(t: TypeExpr) -> Result<UnionVariant, ParseError> {
1122    match t {
1123        TypeExpr::Named { name, args } => {
1124            let payload = match args.len() {
1125                0 => None,
1126                1 => Some(args.into_iter().next().unwrap()),
1127                _ => Some(TypeExpr::Tuple(args)),
1128            };
1129            Ok(UnionVariant { name, payload })
1130        }
1131        // `Foo({ field :: T })` parses as Named with one arg = Record. handled above.
1132        _ => Err(ParseError {
1133            pos: 0,
1134            msg: "union variant must be a constructor name".into(),
1135        }),
1136    }
1137}
1138
1139/// Attach a collected list of `#` comments to whichever top-level
1140/// item variant carries them. Empty input is a no-op; the per-variant
1141/// `leading_comments: Vec<String>` field is always present.
1142fn attach_leading_comments(item: &mut Item, comments: Vec<String>) {
1143    if comments.is_empty() {
1144        return;
1145    }
1146    match item {
1147        Item::Import(i) => i.leading_comments = comments,
1148        Item::TypeDecl(t) => t.leading_comments = comments,
1149        Item::FnDecl(f) => f.leading_comments = comments,
1150    }
1151}