Skip to main content

relon_parser/
cst.rs

1//! Concrete syntax tree (CST) builder over the lossless [`lex`]
2//! output. P2 of the rowan rewrite — translates the existing winnow
3//! grammar into rowan `GreenNode`s while preserving every source byte
4//! (including whitespace and comments) as first-class tokens.
5//!
6//! Architecture
7//! ============
8//!
9//! - `Parser` wraps the flat `(SyntaxKind, &str)` token stream from
10//!   [`lex::lex`] plus a `rowan::GreenNodeBuilder` writing the tree.
11//! - "Skip-trivia" helpers (`current`, `at`, `nth`) ignore whitespace
12//!   and comments, so productions can pattern-match on meaningful
13//!   structure without ever forgetting to write a trivia token to the
14//!   tree.
15//! - Trivia is flushed to the builder lazily — emitted as siblings
16//!   *just before* the next meaningful token. The "right" home for a
17//!   trailing comment (does it belong to the closing brace, or to the
18//!   next pair?) is decided by `bump`'s flush order.
19//! - Each grammar production is a function on `&mut Parser`. They
20//!   call `open(kind)` / `close()` to mark composite nodes. Failures
21//!   recover via `error_recover(sync_set)` which emits an ERROR node
22//!   and synchronises to the nearest token in `sync_set`.
23//!
24//! Scope
25//! =====
26//!
27//! P2 (now complete) covers the full surface grammar:
28//!
29//! * Literals, identifiers, dotted paths, references.
30//! * Lists, dicts (with pair attributes + method-shorthand closures
31//!   + typed keys), list comprehensions.
32//! * Unary, binary (Pratt-precedence), call, postfix `.field` /
33//!   `[index]`, parenthesised closure (`(p) [-> R] => body`).
34//! * `expr match { ... }` and `expr where { ... }` postfix forms.
35//! * F-string decomposition into `F_STRING` + `F_STRING_LITERAL`
36//!   chunks + nested `F_STRING_INTERPOLATION` sub-nodes (whose
37//!   children are ordinary Relon expressions).
38//! * `TYPE_NODE` — dotted paths, generics, optional `?`.
39//! * Directive bodies dispatched by name: `#schema`/`#extend`
40//!   (name + generics + body + optional `with`), `#import`
41//!   (`<spec> from "path"`), `#main(typed-params) [-> Ret]`.
42//!
43//! P3 lives in `crate::ast` — typed-AST wrappers on top of this
44//! CST. P4 will migrate downstream crates (analyzer, evaluator,
45//! fmt, wasm, lsp) onto the new wrappers.
46
47use crate::lex;
48use crate::lex::utf8_codepoint_len_for_cst as utf8_codepoint_len;
49use crate::syntax::{RelonLanguage, SyntaxKind, SyntaxNode};
50use rowan::{Checkpoint, GreenNodeBuilder};
51
52/// One parse failure with an attached byte position. Always reachable
53/// from the resulting CST through the spanning `ERROR` node, but
54/// surfacing them separately gives callers (LSP diagnostics, CLI
55/// pretty-printer) a flat list without re-walking the tree.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct ParseError {
58    pub message: String,
59    /// Byte offset into the original source where recovery began.
60    pub offset: usize,
61}
62
63/// Successful parse result. `green` is the lossless tree; `errors`
64/// is the (possibly empty) list of parse errors emitted along the
65/// way. The parser NEVER returns `Err` — every input shape produces
66/// a tree, with `ERROR` nodes covering unparseable spans.
67#[derive(Debug, Clone)]
68pub struct Parse {
69    green: rowan::GreenNode,
70    pub errors: Vec<ParseError>,
71}
72
73impl Parse {
74    /// Wrap the green tree as a typed [`SyntaxNode`] for traversal.
75    pub fn syntax(&self) -> SyntaxNode {
76        SyntaxNode::new_root(self.green.clone())
77    }
78
79    /// Returns `true` when at least one parse error was emitted.
80    pub fn has_errors(&self) -> bool {
81        !self.errors.is_empty()
82    }
83}
84
85/// Top-level entry. Always produces a `Parse` — never panics, never
86/// returns `Err`. Bytes that don't fit any production are absorbed
87/// into `ERROR` nodes; the round-trip invariant holds regardless.
88pub fn parse_cst(source: &str) -> Parse {
89    let tokens = lex::lex(source);
90    let mut parser = Parser::new(tokens);
91    parser.parse_document();
92    parser.finish()
93}
94
95// =====================================================================
96// Parser state.
97// =====================================================================
98
99struct Parser<'a> {
100    /// The flat token stream the parser is currently consuming. We
101    /// own the vec so f-string interpolation sub-parses can swap in
102    /// a transient inner-token list without lifetime gymnastics —
103    /// the inner `&str` slices still point into the original source.
104    tokens: Vec<(SyntaxKind, &'a str)>,
105    pos: usize,
106    builder: GreenNodeBuilder<'static>,
107    errors: Vec<ParseError>,
108    /// Running byte offset — kept in sync with `pos` so we can record
109    /// error positions without re-walking.
110    cursor_byte: usize,
111}
112
113impl<'a> Parser<'a> {
114    fn new(tokens: Vec<(SyntaxKind, &'a str)>) -> Self {
115        Self {
116            tokens,
117            pos: 0,
118            builder: GreenNodeBuilder::new(),
119            errors: Vec::new(),
120            cursor_byte: 0,
121        }
122    }
123
124    fn finish(self) -> Parse {
125        // `parse_document` is responsible for emitting every token
126        // INSIDE the root DOCUMENT node — rowan requires it. The
127        // `finish()` call here just hands ownership of the green
128        // tree back.
129        debug_assert!(
130            self.pos >= self.tokens.len(),
131            "{} tokens unflushed at parse end",
132            self.tokens.len() - self.pos
133        );
134        Parse {
135            green: self.builder.finish(),
136            errors: self.errors,
137        }
138    }
139
140    // ----- token-stream introspection ----------------------------------
141
142    /// Kind of the next *non-trivia* token, or `None` if EOI.
143    fn current(&self) -> Option<SyntaxKind> {
144        self.nth(0)
145    }
146
147    /// Kind of the `n`-th non-trivia token ahead (0 = current), or
148    /// `None` if there aren't that many. Useful for productions that
149    /// need 1-token lookahead.
150    fn nth(&self, n: usize) -> Option<SyntaxKind> {
151        let mut idx = self.pos;
152        let mut left = n;
153        while idx < self.tokens.len() {
154            let kind = self.tokens[idx].0;
155            if kind.is_trivia() {
156                idx += 1;
157                continue;
158            }
159            if left == 0 {
160                return Some(kind);
161            }
162            left -= 1;
163            idx += 1;
164        }
165        None
166    }
167
168    fn at(&self, kind: SyntaxKind) -> bool {
169        self.current() == Some(kind)
170    }
171
172    fn at_set(&self, set: &[SyntaxKind]) -> bool {
173        self.current().is_some_and(|k| set.contains(&k))
174    }
175
176    fn at_end(&self) -> bool {
177        self.current().is_none()
178    }
179
180    // ----- consumption --------------------------------------------------
181
182    /// Emit any pending trivia tokens to the builder. Trivia tokens
183    /// (whitespace, comments) are skipped by `current` / `at` but
184    /// still need to land in the tree — this writes them flush
185    /// against whatever production opened most recently.
186    fn flush_trivia(&mut self) {
187        while self.pos < self.tokens.len() {
188            let (kind, text) = self.tokens[self.pos];
189            if !kind.is_trivia() {
190                return;
191            }
192            self.builder
193                .token(RelonLanguage::kind_to_raw_static(kind), text);
194            self.cursor_byte += text.len();
195            self.pos += 1;
196        }
197    }
198
199    /// Consume the next non-trivia token and emit it to the builder,
200    /// preceded by any pending trivia. Panics in tests if called at
201    /// EOI — productions should guard with `current()` first.
202    fn bump(&mut self) {
203        self.flush_trivia();
204        if self.pos >= self.tokens.len() {
205            debug_assert!(false, "bump() past end of input");
206            return;
207        }
208        let (kind, text) = self.tokens[self.pos];
209        self.builder
210            .token(RelonLanguage::kind_to_raw_static(kind), text);
211        self.cursor_byte += text.len();
212        self.pos += 1;
213    }
214
215    /// Consume the next non-trivia token if it matches `kind`.
216    /// Returns `true` on consume.
217    fn eat(&mut self, kind: SyntaxKind) -> bool {
218        if self.at(kind) {
219            self.bump();
220            true
221        } else {
222            false
223        }
224    }
225
226    /// Consume `kind` or emit a parse error. Returns `true` on
227    /// success; on failure leaves the cursor where it was and pushes
228    /// to `errors`. Productions that need to keep going should follow
229    /// `expect` with `error_recover` for proper sync behaviour.
230    fn expect(&mut self, kind: SyntaxKind) -> bool {
231        if self.eat(kind) {
232            true
233        } else {
234            self.error(format!("expected {kind:?}, found {:?}", self.current()));
235            false
236        }
237    }
238
239    fn error(&mut self, message: impl Into<String>) {
240        self.errors.push(ParseError {
241            message: message.into(),
242            offset: self.cursor_byte,
243        });
244    }
245
246    /// Wrap the next token (or a synthetic empty span) in an `ERROR`
247    /// node and push an error. Used as a one-shot way to mark an
248    /// unexpected leaf without entering recovery.
249    fn error_at_current(&mut self, message: impl Into<String>) {
250        self.error(message);
251        self.open(SyntaxKind::ERROR);
252        if !self.at_end() {
253            self.bump();
254        }
255        self.close();
256    }
257
258    /// Emit an `ERROR` node spanning every token until one of
259    /// `sync_set` is reached (or EOI). The error message is recorded
260    /// at the offset where recovery started.
261    fn error_recover(&mut self, message: impl Into<String>, sync_set: &[SyntaxKind]) {
262        self.error(message);
263        self.open(SyntaxKind::ERROR);
264        while !self.at_end() && !self.at_set(sync_set) {
265            self.bump();
266        }
267        self.close();
268    }
269
270    /// Canonical "back to a sane structural boundary" sync set: the
271    /// closing punctuators a dict / list / call would resume at, plus
272    /// the directive `#` head. Productions that recover with this set
273    /// re-enter their parent's punctuation-aware loop on the next
274    /// iteration. Used by the few productions that don't know which
275    /// container they're inside; container-specific recovery sites
276    /// keep narrower sets (`COMMA` + their own closing bracket).
277    const STRUCTURAL_SYNC: &'static [SyntaxKind] = &[
278        SyntaxKind::COMMA,
279        SyntaxKind::R_BRACE,
280        SyntaxKind::R_BRACK,
281        SyntaxKind::R_PAREN,
282        SyntaxKind::HASH,
283    ];
284
285    // ----- node bracketing ---------------------------------------------
286
287    fn open(&mut self, kind: SyntaxKind) {
288        // Order matters: `start_node` MUST come before `flush_trivia`
289        // so any pending whitespace / comments land INSIDE the new
290        // node (as leading trivia of its first child) rather than as
291        // siblings of the node at the parent level. Flushing first
292        // would also break the very-first `open(DOCUMENT)` call —
293        // leading file trivia would end up at rowan's root level,
294        // violating the "exactly one root" invariant.
295        self.builder
296            .start_node(RelonLanguage::kind_to_raw_static(kind));
297        self.flush_trivia();
298    }
299
300    fn checkpoint(&mut self) -> Checkpoint {
301        // Checkpoint snaps to "right after any pending trivia" —
302        // `open_at(ck, ..)` wraps the construct that follows, NOT
303        // the trivia in front of it. Otherwise a comment before a
304        // binary expression would get pulled inside the
305        // `BINARY_EXPR` node, which is the wrong attachment.
306        self.flush_trivia();
307        self.builder.checkpoint()
308    }
309
310    fn open_at(&mut self, ck: Checkpoint, kind: SyntaxKind) {
311        self.builder
312            .start_node_at(ck, RelonLanguage::kind_to_raw_static(kind));
313    }
314
315    fn close(&mut self) {
316        self.builder.finish_node();
317    }
318
319    // =================================================================
320    // Productions.
321    // =================================================================
322
323    /// Top-level: zero-or-more attributes, then one document value.
324    /// The whole thing is wrapped in a `DOCUMENT` node so the round
325    /// trip walks from a single root.
326    fn parse_document(&mut self) {
327        self.open(SyntaxKind::DOCUMENT);
328        // Leading directives / decorators stacked above the root
329        // value. The grammar permits them at file scope (e.g.
330        // `#schema X { ... }` files with no separate value body).
331        while self.at(SyntaxKind::HASH) || self.at(SyntaxKind::AT) {
332            self.parse_attribute();
333        }
334        // The root value. EOI is fine — files like
335        // `#schema X { ... }` end after the directive's body.
336        if !self.at_end() {
337            self.parse_expr();
338        }
339        // Anything left over is unexpected trailing input — wrap as
340        // ERROR so the round-trip stays whole.
341        if !self.at_end() {
342            self.error_recover("trailing input after root value", &[]);
343        }
344        // Trailing trivia (final newline, footer comments) MUST land
345        // inside DOCUMENT — rowan only accepts one root node, and
346        // tokens emitted after `close()` would have nowhere to live.
347        self.flush_trivia();
348        self.close();
349    }
350
351    /// `@name(...)` or `#name <body>`. Decorator bodies are always
352    /// `(args)` (or absent) and decorator names may be dotted
353    /// (`@ensure.int`, `@module.fn`); directive bodies branch on the
354    /// name: `schema` / `extend` capture `name <T, U>? body? (with {})?`,
355    /// `import` captures `<spec> from "path"`, `main` captures
356    /// `( typed-params ) [-> Ret]`, the remaining names dispatch via
357    /// [`directive_shape`] — bare directives consume no body so they
358    /// can sit cleanly above the field they decorate, value directives
359    /// take exactly one trailing expression.
360    fn parse_attribute(&mut self) {
361        let is_directive = self.at(SyntaxKind::HASH);
362        let kind = if is_directive {
363            SyntaxKind::DIRECTIVE
364        } else {
365            SyntaxKind::DECORATOR
366        };
367        self.open(kind);
368        self.bump(); // # or @
369        let name_text = if self.at(SyntaxKind::IDENT) {
370            let text = self.current_text();
371            self.bump();
372            text
373        } else {
374            self.error_at_current("expected attribute name");
375            None
376        };
377        if !is_directive {
378            // Decorator — name may be dotted (`@ensure.at_least`).
379            // Body is always `(args)` or empty.
380            while self.at(SyntaxKind::DOT) {
381                self.bump();
382                if self.at(SyntaxKind::IDENT) {
383                    self.bump();
384                } else {
385                    self.error_at_current("expected identifier after `.` in decorator name");
386                    break;
387                }
388            }
389            if self.at(SyntaxKind::L_PAREN) {
390                self.parse_call_args();
391            }
392            self.close();
393            return;
394        }
395        // Directive — dispatch on name. Unknown directive names take a
396        // single optional expression body to match the legacy parser's
397        // permissive fallback.
398        let shape = name_text
399            .and_then(crate::directive::directive_shape)
400            .unwrap_or(crate::DirectiveShape::Value);
401        match shape {
402            crate::DirectiveShape::Bare => {
403                // No body. `#internal`, `#relaxed`, `#unstrict`, `#native`.
404            }
405            crate::DirectiveShape::Value => {
406                if self.is_attribute_body_start() {
407                    self.parse_expr();
408                }
409            }
410            crate::DirectiveShape::NameBody => self.parse_directive_name_body(),
411            crate::DirectiveShape::Enum => self.parse_directive_enum(),
412            crate::DirectiveShape::Import => self.parse_directive_import(),
413            crate::DirectiveShape::Main => self.parse_directive_main(),
414        }
415        self.close();
416    }
417
418    /// `#enum Name<T, U>? { Variant, Variant { field: Type }, Variant(Type) }`.
419    /// The lowerer turns this into the internal tagged-enum schema form.
420    fn parse_directive_enum(&mut self) {
421        if self.at(SyntaxKind::IDENT) {
422            self.bump();
423        } else {
424            return;
425        }
426        if self.at(SyntaxKind::LT) {
427            self.bump();
428            while !self.at(SyntaxKind::GT) && !self.at_end() {
429                if self.at(SyntaxKind::IDENT) {
430                    self.bump();
431                } else {
432                    self.error_at_current("expected generic param");
433                    break;
434                }
435                if !self.eat(SyntaxKind::COMMA) {
436                    break;
437                }
438            }
439            self.expect(SyntaxKind::GT);
440        }
441        if !self.eat(SyntaxKind::L_BRACE) {
442            return;
443        }
444        while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
445            if self.at(SyntaxKind::COMMA) {
446                self.bump();
447                continue;
448            }
449            self.open(SyntaxKind::ENUM_VARIANT);
450            if self.at(SyntaxKind::IDENT) {
451                self.bump();
452            } else {
453                self.error_at_current("expected enum variant name");
454                self.close();
455                break;
456            }
457            if self.at(SyntaxKind::L_BRACE) {
458                self.bump();
459                while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
460                    if self.at(SyntaxKind::COMMA) {
461                        self.bump();
462                        continue;
463                    }
464                    self.open(SyntaxKind::ENUM_VARIANT_FIELD);
465                    if self.at(SyntaxKind::IDENT) {
466                        self.bump();
467                    } else {
468                        self.error_at_current("expected enum variant field name");
469                    }
470                    self.expect(SyntaxKind::COLON);
471                    self.parse_type();
472                    self.close();
473                    if !self.eat(SyntaxKind::COMMA) {
474                        break;
475                    }
476                }
477                self.expect(SyntaxKind::R_BRACE);
478            } else if self.at(SyntaxKind::L_PAREN) {
479                self.parse_tuple_type();
480            }
481            self.close();
482            if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_BRACE) {
483                self.error_at_current("expected `,` or `}` after enum variant");
484                break;
485            }
486        }
487        self.expect(SyntaxKind::R_BRACE);
488    }
489
490    /// `#schema Name <T, U>? body? (with { methods... })?`. The body
491    /// is whatever expression follows the name + generics (typically
492    /// a dict but the parser accepts any expression — the analyzer
493    /// emits a diagnostic when it isn't a dict). The trailing `with`
494    /// block is optional and may also follow a body-less `#schema X`
495    /// declaration.
496    fn parse_directive_name_body(&mut self) {
497        // Optional declared name.
498        if self.at(SyntaxKind::IDENT) {
499            self.bump();
500        } else {
501            return;
502        }
503        // Optional generic param list `<T, U>` — bare identifiers.
504        if self.at(SyntaxKind::LT) {
505            self.bump();
506            while !self.at(SyntaxKind::GT) && !self.at_end() {
507                if self.at(SyntaxKind::IDENT) {
508                    self.bump();
509                } else {
510                    self.error_at_current("expected generic param");
511                    break;
512                }
513                if !self.eat(SyntaxKind::COMMA) {
514                    break;
515                }
516            }
517            self.expect(SyntaxKind::GT);
518        }
519        // The body is everything up to (a) the next attribute, (b)
520        // the `with` keyword, or (c) the dict-field separator (`:`
521        // / `,` / `}` / EOI). Special-case the `with`-only shape
522        // (`#schema X with { ... }`) by skipping the body when we
523        // see `with` immediately.
524        let saw_with = self.at(SyntaxKind::IDENT) && self.current_text() == Some("with");
525        // v1 accepts an optional `:` separator between schema name and
526        // body: `#schema Image: { name: String }` is equivalent to
527        // `#schema Image { name: String }`. The legacy combinator chain
528        // consumed the `:` as part of the directive; the CST does the
529        // same so the `is_attribute_body_start` check below sees the
530        // body proper. Without this, the dict-field grammar would
531        // (correctly!) parse `Image:` as a malformed dict field after
532        // mistaking the directive for body-less.
533        if !saw_with && self.at(SyntaxKind::COLON) {
534            self.bump();
535        }
536        if !saw_with && self.is_attribute_body_start() {
537            // Guard: when the next chars are `Ident:` / `Ident,` we
538            // must not consume them — they belong to a dict field
539            // following `#schema X` in a `: ...` context.
540            if !self.peek_attribute_terminator() {
541                // Schema bodies are typically dicts (`#schema U { ... }`)
542                // but the grammar also accepts a type-alias body. When the body
543                // looks like a bare type expression — IDENT immediately
544                // followed by `<...>` — parse it as a type so the
545                // string-literal generic args don't surprise the Pratt
546                // expression grammar (which would treat `<` as a
547                // binary comparison).
548                if self.peek_is_bare_type_body() {
549                    self.parse_type();
550                } else {
551                    self.parse_expr();
552                }
553            }
554        }
555        // Optional `with { ... }` block — a structured method list.
556        // The legacy `opt_parse_with_block` (`directive.rs`) drives the
557        // shape: leading pragma stack (`#derive` / `#native` /
558        // `#internal` / `#no_auto_derive`), then a `name<T>?(p: T,
559        // ...) -> Ret (: body)?` declaration. We emit each method as
560        // a SCHEMA_METHOD node so the typed-AST layer can read the
561        // structure cheaply.
562        if self.at(SyntaxKind::IDENT) && self.current_text() == Some("with") {
563            self.bump();
564            if self.at(SyntaxKind::L_BRACE) {
565                self.parse_schema_with();
566            }
567        }
568    }
569
570    /// True when the upcoming token stream is an IDENT followed
571    /// immediately (no intervening whitespace) by `<` — a type-alias
572    /// body shape such as `Int` / `List<T>`. Used by
573    /// `parse_directive_name_body` to disambiguate the type-body shape
574    /// from a regular expression body. The IDENT-and-no-`<` case
575    /// (bare-type body like `#schema MyAlias String`) is also
576    /// classified as "type body" — the body is a single primitive
577    /// type identifier without generics.
578    fn peek_is_bare_type_body(&self) -> bool {
579        if !self.at(SyntaxKind::IDENT) {
580            return false;
581        }
582        // Only commit to the type body if the IDENT is one of the
583        // known type heads (`Int`, `String`, `Bool`, `List`, `Dict`,
584        // `Any`, `Float`) — otherwise a regular
585        // expression with a leading IDENT is the safer fallback.
586        let head = self.current_text().unwrap_or("");
587        if !matches!(
588            head,
589            "Int" | "String" | "Bool" | "Float" | "Any" | "List" | "Dict"
590        ) {
591            return false;
592        }
593        // Allow both primitive aliases (`Int`) and generic containers
594        // (`List<T>`) as type-body starts.
595        let head_idx = self.pos_skip_trivia();
596        let mut idx = head_idx + 1;
597        let mut had_ws = false;
598        while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
599            had_ws = true;
600            idx += 1;
601        }
602        if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::LT) && !had_ws {
603            return true;
604        }
605        // Bare type identifier (`#schema MyAlias String`) — only
606        // accept when nothing else follows on the line. We approximate
607        // "nothing else" by checking the next non-trivia token isn't
608        // a typical expression-continuation symbol.
609        matches!(
610            self.tokens.get(idx).map(|(k, _)| *k),
611            Some(SyntaxKind::HASH) | Some(SyntaxKind::L_BRACE) | None
612        )
613    }
614
615    /// `with { (pragma | method)* }` — body of a `#schema` / `#extend`
616    /// directive. Lossless: every byte (whitespace, comments, leading
617    /// pragmas) sits inside the [`SCHEMA_WITH`] node, with each method
618    /// declaration wrapped in its own [`SCHEMA_METHOD`] child.
619    fn parse_schema_with(&mut self) {
620        self.open(SyntaxKind::SCHEMA_WITH);
621        self.bump(); // {
622        while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
623            // Method declarations are introduced by either a pragma
624            // (`#derive` / `#native` / `#internal` / `#no_auto_derive`)
625            // or directly by a method name. We greedily group leading
626            // pragmas with the next method into one SCHEMA_METHOD node
627            // — if no method follows (e.g. trailing schema-level
628            // `#no_auto_derive`), the directives sit at the
629            // SCHEMA_WITH level as siblings.
630            if self.at(SyntaxKind::HASH) {
631                let ck = self.checkpoint();
632                let mut had_method_pragma = false;
633                while self.at(SyntaxKind::HASH) {
634                    let name = self.directive_name_after_hash();
635                    if matches!(
636                        name.as_deref(),
637                        Some("derive") | Some("native") | Some("internal")
638                    ) {
639                        had_method_pragma = true;
640                    }
641                    self.parse_attribute();
642                }
643                if self.at(SyntaxKind::IDENT) && !self.at_method_terminator() {
644                    self.open_at(ck, SyntaxKind::SCHEMA_METHOD);
645                    self.parse_schema_method_after_pragmas();
646                    self.close();
647                } else if had_method_pragma {
648                    // Pragma stack without a method — surface a recovery
649                    // error to mirror the legacy "stray method pragma"
650                    // diagnostic but keep parsing.
651                    self.error(
652                        "expected method declaration after `#derive` / `#native` / `#internal`",
653                    );
654                }
655                continue;
656            }
657            if self.at(SyntaxKind::IDENT) {
658                self.open(SyntaxKind::SCHEMA_METHOD);
659                self.parse_schema_method_after_pragmas();
660                self.close();
661                continue;
662            }
663            // Unexpected token inside the with-block — recover to the
664            // next likely start of a method (HASH / IDENT / R_BRACE).
665            self.error_recover(
666                "expected method or pragma inside `with { ... }`",
667                &[SyntaxKind::HASH, SyntaxKind::IDENT, SyntaxKind::R_BRACE],
668            );
669        }
670        self.expect(SyntaxKind::R_BRACE);
671        self.close();
672    }
673
674    /// True when the upcoming non-trivia token is the with-block
675    /// terminator (`}`) — used to spot a pragma stack with no method
676    /// trailing it without confusing it for a normal method header.
677    fn at_method_terminator(&self) -> bool {
678        matches!(self.current(), Some(SyntaxKind::R_BRACE)) || self.at_end()
679    }
680
681    /// Peek the IDENT immediately after a HASH at the current position
682    /// (skipping trivia). Returns `None` if `#` isn't followed by an
683    /// identifier.
684    fn directive_name_after_hash(&self) -> Option<String> {
685        let mut idx = self.pos_skip_trivia();
686        if self.tokens.get(idx).map(|(k, _)| *k) != Some(SyntaxKind::HASH) {
687            return None;
688        }
689        idx += 1;
690        while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
691            idx += 1;
692        }
693        match self.tokens.get(idx) {
694            Some((SyntaxKind::IDENT, text)) => Some((*text).to_string()),
695            _ => None,
696        }
697    }
698
699    /// Parse a single method declaration inside a `with { ... }` block.
700    /// Caller has already opened a SCHEMA_METHOD node and emitted any
701    /// leading pragma directives. Shape:
702    ///
703    ///   IDENT GenericParams? '(' (Param (',' Param)*)? ')' '->' Type (':' Expr)?
704    ///
705    /// Each parameter takes the named form `name: Type` (opposite of
706    /// `#main`'s `Type name`), reusing the existing CLOSURE_PARAM
707    /// wrapper to keep the typed-AST layer simple. The body is omitted
708    /// for `#native` methods.
709    fn parse_schema_method_after_pragmas(&mut self) {
710        // Method name.
711        if self.at(SyntaxKind::IDENT) {
712            self.bump();
713        } else {
714            self.error_at_current("expected method name");
715            return;
716        }
717        // Optional method-level generics `<U, V>`.
718        if self.at(SyntaxKind::LT) {
719            self.bump();
720            while !self.at(SyntaxKind::GT) && !self.at_end() {
721                if self.at(SyntaxKind::IDENT) {
722                    self.bump();
723                } else {
724                    self.error_at_current("expected method generic parameter");
725                    break;
726                }
727                if !self.eat(SyntaxKind::COMMA) {
728                    break;
729                }
730            }
731            self.expect(SyntaxKind::GT);
732        }
733        // Parameter list `(name: Type, ...)`.
734        if !self.expect(SyntaxKind::L_PAREN) {
735            return;
736        }
737        while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
738            self.parse_schema_method_param();
739            if !self.eat(SyntaxKind::COMMA) {
740                break;
741            }
742        }
743        self.expect(SyntaxKind::R_PAREN);
744        // `-> ReturnType` — required by the analyzer-level grammar
745        // (every with-block method declares its return), but the CST
746        // accepts the missing-arrow shape so older test fixtures
747        // that elided the return type still round-trip cleanly.
748        if self.eat(SyntaxKind::THIN_ARROW) {
749            self.parse_type();
750        }
751        // Optional `: body`. Methods marked `#native` omit it; for
752        // others the analyzer enforces presence.
753        if self.eat(SyntaxKind::COLON) {
754            self.parse_expr();
755        }
756    }
757
758    /// One schema-method parameter: `name: Type`. Lossless — emitted
759    /// inside a CLOSURE_PARAM node so the typed-AST layer can reuse
760    /// the existing wrapper.
761    fn parse_schema_method_param(&mut self) {
762        self.open(SyntaxKind::CLOSURE_PARAM);
763        if self.at(SyntaxKind::IDENT) {
764            self.bump();
765        } else {
766            self.error_at_current("expected parameter name");
767            self.close();
768            return;
769        }
770        if self.eat(SyntaxKind::COLON) {
771            self.parse_type();
772        } else {
773            self.error("expected `:` in schema method parameter");
774        }
775        self.close();
776    }
777
778    /// `#import <spec> from "path"`. `<spec>` is one of
779    /// `*`, `{ a, b as c }`, or a single identifier.
780    fn parse_directive_import(&mut self) {
781        if self.at(SyntaxKind::STAR) {
782            self.bump();
783        } else if self.at(SyntaxKind::L_BRACE) {
784            // Destructure list `{ a, b as c }` — each entry is an
785            // IDENT optionally followed by `as IDENT`. This is NOT a
786            // dict, so we don't reuse `parse_dict`. The legacy
787            // `parse_import_spec` accepts this shape; the typed-AST
788            // layer carries the entries on `DirectiveImportSpec`.
789            self.parse_import_destructure();
790        } else if self.at(SyntaxKind::IDENT) {
791            self.bump();
792        } else {
793            self.error_at_current("expected import spec");
794            return;
795        }
796        if self.at(SyntaxKind::IDENT) && self.current_text() == Some("from") {
797            self.bump();
798        } else {
799            self.error("expected `from` in #import");
800            return;
801        }
802        if self.at(SyntaxKind::STRING) {
803            self.bump();
804        } else {
805            self.error_at_current("expected path string in #import");
806            return;
807        }
808        // Optional integrity pin `<algo>:"<hex>"`. Accept anything of
809        // the shape `IDENT COLON STRING`; the algorithm name and hex
810        // are validated by the analyzer so the diagnostic span lands
811        // on the real source position rather than on the parser's
812        // current cursor.
813        if self.at(SyntaxKind::IDENT) {
814            self.bump();
815            if self.at(SyntaxKind::COLON) {
816                self.bump();
817            } else {
818                self.error_at_current("expected `:` in #import integrity pin");
819                return;
820            }
821            if self.at(SyntaxKind::STRING) {
822                self.bump();
823            } else {
824                self.error_at_current("expected hex string in #import integrity pin");
825            }
826        }
827    }
828
829    fn parse_import_destructure(&mut self) {
830        debug_assert!(self.at(SyntaxKind::L_BRACE));
831        self.bump(); // {
832        loop {
833            if self.at(SyntaxKind::R_BRACE) || self.at_end() {
834                break;
835            }
836            if self.at(SyntaxKind::IDENT) {
837                self.bump();
838                // Optional `as IDENT` alias.
839                if self.at(SyntaxKind::IDENT) && self.current_text() == Some("as") {
840                    self.bump();
841                    if self.at(SyntaxKind::IDENT) {
842                        self.bump();
843                    } else {
844                        self.error_at_current("expected identifier after `as` in #import");
845                    }
846                }
847            } else {
848                self.error_recover(
849                    "expected identifier in #import destructure",
850                    &[SyntaxKind::COMMA, SyntaxKind::R_BRACE],
851                );
852            }
853            if !self.eat(SyntaxKind::COMMA) {
854                break;
855            }
856        }
857        self.expect(SyntaxKind::R_BRACE);
858    }
859
860    /// `#main ( type ident, ... ) [-> Type]`. Captures the typed
861    /// param list directly so the directive node carries the same
862    /// structure the analyzer needs.
863    fn parse_directive_main(&mut self) {
864        if !self.eat(SyntaxKind::L_PAREN) {
865            self.error("expected `(` after `#main`");
866            return;
867        }
868        while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
869            // Each param: `Type ident` (closure-param shape).
870            self.parse_closure_param();
871            if !self.eat(SyntaxKind::COMMA) {
872                break;
873            }
874        }
875        self.expect(SyntaxKind::R_PAREN);
876        // Optional `-> ReturnType`.
877        if self.eat(SyntaxKind::THIN_ARROW) {
878            self.parse_type();
879        }
880    }
881
882    /// True when the next non-trivia token signals "no directive body
883    /// here, leave the ident for the surrounding grammar" — used by
884    /// `#schema X: value` (inside a dict) where `X` is the dict key,
885    /// not the schema-name body.
886    fn peek_attribute_terminator(&self) -> bool {
887        let mut idx = self.pos_skip_trivia();
888        // Skip an IDENT (and an optional generic angle-list).
889        if self.tokens.get(idx).map(|(k, _)| *k) != Some(SyntaxKind::IDENT) {
890            return false;
891        }
892        idx += 1;
893        while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
894            idx += 1;
895        }
896        matches!(
897            self.tokens.get(idx).map(|(k, _)| *k),
898            Some(SyntaxKind::COLON) | Some(SyntaxKind::COMMA) | Some(SyntaxKind::R_BRACE)
899        )
900    }
901
902    fn is_attribute_body_start(&self) -> bool {
903        self.current().is_some_and(|k| {
904            matches!(
905                k,
906                SyntaxKind::IDENT
907                    | SyntaxKind::NUMBER
908                    | SyntaxKind::STRING
909                    | SyntaxKind::L_BRACE
910                    | SyntaxKind::L_BRACK
911                    // `L_PAREN` covers the parenthesised closure form
912                    // `(p) => body` and parenthesised expressions
913                    // `(a + b)`. Without this, value-shape directives
914                    // like `#default (self) => ...` and
915                    // `#expect (n) => n > 0` would be parsed as
916                    // body-less, leaving the closure for the
917                    // surrounding dict to choke on.
918                    | SyntaxKind::L_PAREN
919                    | SyntaxKind::AMP
920                    | SyntaxKind::MINUS
921                    | SyntaxKind::BANG
922                    | SyntaxKind::STAR
923                    // F-strings start a fresh atom too.
924                    | SyntaxKind::F_STRING_OPEN
925            )
926        })
927    }
928
929    // ----- expression entry -------------------------------------------
930
931    /// Parse a full expression. Operator precedence is climbed with a
932    /// Pratt-style loop. Lowest precedence first; primary handles
933    /// atoms and prefix unaries. `match { ... }` and `where { ... }`
934    /// trail the binary chain as the outermost postfix forms — they
935    /// take precedence above ternary etc., matching the winnow
936    /// grammar in `expr.rs`.
937    fn parse_expr(&mut self) {
938        let ck = self.checkpoint();
939        self.parse_expr_bp(0);
940        // Ternary: `cond ? then : else`. Bound at expression-tail
941        // precedence — lower than every binary operator (so the binary
942        // chain absorbs into `cond`) but higher than the trailing
943        // `match` / `where` postfix forms (which wrap whatever ternary
944        // produces). The legacy `parse_ternary` (`expr.rs`) sits at the
945        // same level — see the precedence chain notes there.
946        //
947        // Disambiguation: `?` may also be a path-access prefix
948        // (`a?.b`, `a?[0]`) or a type-optional marker (`Foo?` inside a
949        // typed context). Path access is consumed earlier — the CST's
950        // current postfix loop doesn't fold `?.` / `?[`, but the legacy
951        // pre-P4 path always took those bytes itself, so no fixture
952        // reaches this branch with them in postfix position. Type
953        // optionals only appear inside committed `parse_type` calls
954        // (match arms, closure params, directive bodies), never at the
955        // outermost expression level — so seeing `?` here is
956        // unambiguously a ternary head.
957        if self.at(SyntaxKind::QUESTION) {
958            // Guard: don't claim a ternary on `?.` / `?[`. Those forms
959            // belong to path access and are handled (or rejected) by the
960            // atom layer; consuming `?` here would steal the prefix.
961            let next = self.nth(1);
962            if !matches!(next, Some(SyntaxKind::DOT) | Some(SyntaxKind::L_BRACK)) {
963                self.open_at(ck, SyntaxKind::TERNARY_EXPR);
964                self.bump(); // ?
965                self.parse_expr();
966                if !self.expect(SyntaxKind::COLON) {
967                    self.close();
968                    return;
969                }
970                self.parse_expr();
971                self.close();
972            }
973        }
974        loop {
975            if self.at(SyntaxKind::IDENT) && self.current_text() == Some("match") {
976                // Only commit to MATCH_EXPR when `match` is followed
977                // by `{` — otherwise it's a bareword called `match`
978                // somewhere unrelated.
979                if self.nth(1) == Some(SyntaxKind::L_BRACE) {
980                    self.open_at(ck, SyntaxKind::MATCH_EXPR);
981                    self.bump(); // `match`
982                    self.bump(); // {
983                    while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
984                        self.parse_match_arm();
985                        if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_BRACE) {
986                            self.error_recover(
987                                "expected `,` or `}` in match",
988                                &[SyntaxKind::COMMA, SyntaxKind::R_BRACE],
989                            );
990                            self.eat(SyntaxKind::COMMA);
991                        }
992                    }
993                    self.expect(SyntaxKind::R_BRACE);
994                    self.close();
995                    continue;
996                }
997            }
998            if self.at(SyntaxKind::IDENT)
999                && self.current_text() == Some("where")
1000                && self.nth(1) == Some(SyntaxKind::L_BRACE)
1001            {
1002                self.open_at(ck, SyntaxKind::WHERE_EXPR);
1003                self.bump(); // `where`
1004                self.parse_dict();
1005                self.close();
1006                continue;
1007            }
1008            break;
1009        }
1010    }
1011
1012    /// One match arm: `pattern: body`. Pattern is one of:
1013    ///
1014    /// * a TYPE_NODE (`Up`, `Int`) for existing unit/schema patterns;
1015    /// * `*` for wildcard;
1016    /// * a Rust-like enum payload pattern (`Pair(a, b)`,
1017    ///   `Email { address, subject: s }`).
1018    fn parse_match_arm(&mut self) {
1019        self.open(SyntaxKind::MATCH_ARM);
1020        if self.at(SyntaxKind::UNDERSCORE) {
1021            self.open(SyntaxKind::WILDCARD);
1022            self.bump();
1023            self.close();
1024        } else if self.at(SyntaxKind::STAR) {
1025            // `*` is no longer the pattern wildcard — the catch-all arm
1026            // is now written `_`. Keep the `*` lexeme inside a WILDCARD
1027            // node (so recovery stays structured) but flag the precise
1028            // migration so the diagnostic points at the new spelling.
1029            self.open(SyntaxKind::WILDCARD);
1030            self.error_at_current(
1031                "`*` is no longer a match wildcard — use `_` for the catch-all arm",
1032            );
1033            self.bump();
1034            self.close();
1035        } else if self.looks_like_match_payload_pattern() {
1036            self.parse_match_pattern();
1037        } else if self.at(SyntaxKind::IDENT) {
1038            self.parse_type();
1039        } else {
1040            self.error_at_current("expected match-arm pattern");
1041        }
1042        if self.eat(SyntaxKind::COLON) {
1043            self.parse_expr();
1044        } else {
1045            self.error("expected `:` in match arm");
1046        }
1047        self.close();
1048    }
1049
1050    fn looks_like_match_payload_pattern(&self) -> bool {
1051        if !self.at(SyntaxKind::IDENT) {
1052            return false;
1053        }
1054        let mut idx = self.pos_skip_trivia() + 1;
1055        while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
1056            idx += 1;
1057        }
1058        while self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::DOT) {
1059            idx += 1;
1060            while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
1061                idx += 1;
1062            }
1063            if self.tokens.get(idx).map(|(k, _)| *k) != Some(SyntaxKind::IDENT) {
1064                return false;
1065            }
1066            idx += 1;
1067            while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
1068                idx += 1;
1069            }
1070        }
1071        matches!(
1072            self.tokens.get(idx).map(|(k, _)| *k),
1073            Some(SyntaxKind::L_PAREN | SyntaxKind::L_BRACE)
1074        )
1075    }
1076
1077    fn parse_match_pattern(&mut self) {
1078        self.open(SyntaxKind::MATCH_PATTERN);
1079        self.expect(SyntaxKind::IDENT);
1080        while self.at(SyntaxKind::DOT) {
1081            self.bump();
1082            self.expect(SyntaxKind::IDENT);
1083        }
1084        if self.at(SyntaxKind::L_PAREN) {
1085            self.bump();
1086            while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
1087                if self.at(SyntaxKind::UNDERSCORE) || self.at(SyntaxKind::IDENT) {
1088                    self.bump();
1089                } else if self.at(SyntaxKind::STAR) {
1090                    self.error_at_current(
1091                        "`*` is no longer a pattern wildcard — use `_` to ignore a payload slot",
1092                    );
1093                    self.bump();
1094                } else {
1095                    self.error_at_current("expected tuple pattern binding");
1096                    break;
1097                }
1098                if !self.eat(SyntaxKind::COMMA) {
1099                    break;
1100                }
1101            }
1102            self.expect(SyntaxKind::R_PAREN);
1103        } else if self.at(SyntaxKind::L_BRACE) {
1104            self.bump();
1105            while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
1106                if self.at(SyntaxKind::IDENT) {
1107                    self.bump();
1108                    if self.eat(SyntaxKind::COLON) {
1109                        if self.at(SyntaxKind::UNDERSCORE) || self.at(SyntaxKind::IDENT) {
1110                            self.bump();
1111                        } else if self.at(SyntaxKind::STAR) {
1112                            self.error_at_current(
1113                                "`*` is no longer a pattern wildcard — use `_` to ignore a payload slot",
1114                            );
1115                            self.bump();
1116                        } else {
1117                            self.error_at_current("expected struct pattern binding");
1118                            break;
1119                        }
1120                    }
1121                } else {
1122                    self.error_at_current("expected struct pattern field");
1123                    break;
1124                }
1125                if !self.eat(SyntaxKind::COMMA) {
1126                    break;
1127                }
1128            }
1129            self.expect(SyntaxKind::R_BRACE);
1130        }
1131        self.close();
1132    }
1133
1134    fn parse_expr_bp(&mut self, min_bp: u8) {
1135        let lhs_ck = self.checkpoint();
1136        self.parse_unary();
1137
1138        while let Some(op) = self.current() {
1139            let Some((lbp, rbp)) = infix_bp(op) else {
1140                break;
1141            };
1142            if lbp < min_bp {
1143                break;
1144            }
1145            self.open_at(lhs_ck, SyntaxKind::BINARY_EXPR);
1146            if op == SyntaxKind::PLUS_PLUS {
1147                // `++` was parseable but never executable (the
1148                // evaluator always trapped UnsupportedOperator).
1149                // String concatenation is spelled `+`; keep consuming
1150                // the token inside the BINARY_EXPR node (so recovery
1151                // stays structured and the round-trip stays lossless)
1152                // but flag the precise migration. Plain `error` — not
1153                // `error_at_current`, which would wrap the token in an
1154                // extra ERROR node and skip the normal bump below.
1155                self.error("`++` is not an operator — use `+` to concatenate strings");
1156            }
1157            self.bump();
1158            self.parse_expr_bp(rbp);
1159            self.close();
1160        }
1161    }
1162
1163    /// Prefix-unary or atom. Postfix call / index / dot are wrapped
1164    /// here via checkpoint.
1165    fn parse_unary(&mut self) {
1166        if self.at_set(&[SyntaxKind::MINUS, SyntaxKind::BANG, SyntaxKind::PLUS]) {
1167            self.open(SyntaxKind::UNARY_EXPR);
1168            self.bump();
1169            self.parse_unary();
1170            self.close();
1171            return;
1172        }
1173        self.parse_postfix();
1174    }
1175
1176    /// Atom with postfix suffixes (`.field`, `[i]`, `(args)`,
1177    /// plus optional-chain `?.field` / `?[i]`).
1178    fn parse_postfix(&mut self) {
1179        let ck = self.checkpoint();
1180        self.parse_atom();
1181        loop {
1182            if self.at(SyntaxKind::L_PAREN) {
1183                self.open_at(ck, SyntaxKind::CALL_EXPR);
1184                self.parse_call_args();
1185                self.close();
1186            } else if self.at(SyntaxKind::DOT)
1187                || self.at(SyntaxKind::L_BRACK)
1188                || (self.at(SyntaxKind::QUESTION)
1189                    && matches!(
1190                        self.nth(1),
1191                        Some(SyntaxKind::DOT) | Some(SyntaxKind::L_BRACK)
1192                    ))
1193            {
1194                // Path access — fold into VARIABLE_EXPR so dotted
1195                // paths like `a.b.c` end up as a single node. v1.8
1196                // positional access `xs.0` (number after `.`) is the
1197                // tuple/list index form — accepted alongside `.field`.
1198                // Optional chaining (`a?.b`, `a?[0]`) consumes the `?`
1199                // as a prefix on the next segment; the typed-AST
1200                // marks the segment as optional.
1201                self.open_at(ck, SyntaxKind::VARIABLE_EXPR);
1202                loop {
1203                    let is_optional_prefix = self.at(SyntaxKind::QUESTION)
1204                        && matches!(
1205                            self.nth(1),
1206                            Some(SyntaxKind::DOT) | Some(SyntaxKind::L_BRACK)
1207                        );
1208                    if is_optional_prefix {
1209                        self.bump(); // ?
1210                    } else if !self.at(SyntaxKind::DOT) && !self.at(SyntaxKind::L_BRACK) {
1211                        break;
1212                    }
1213                    if self.at(SyntaxKind::DOT) {
1214                        self.bump();
1215                        if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::NUMBER) {
1216                            self.bump();
1217                        } else {
1218                            self.error_at_current("expected identifier or index after `.`");
1219                        }
1220                    } else if self.at(SyntaxKind::L_BRACK) {
1221                        // `[ index ]`
1222                        self.bump();
1223                        self.parse_expr();
1224                        self.expect(SyntaxKind::R_BRACK);
1225                    } else {
1226                        break;
1227                    }
1228                }
1229                self.close();
1230            } else {
1231                break;
1232            }
1233        }
1234    }
1235
1236    fn parse_atom(&mut self) {
1237        // Leading attributes (`#brand T {...}` / `@decorator(x) expr`)
1238        // stack above the atom they decorate. The CST keeps them as
1239        // siblings of the atom inside whatever node the caller opened
1240        // (typically a DICT_FIELD value, a LIST element, or a function
1241        // argument). The legacy parser handled this case the same way
1242        // — the attribute decorates whatever expression follows.
1243        while self.at(SyntaxKind::HASH) || self.at(SyntaxKind::AT) {
1244            // Guard: when `#` heads a directive whose body is bare
1245            // (e.g. `#relaxed` standing alone at file scope), there's
1246            // no following expression — `parse_attribute` consumes
1247            // nothing extra, and the loop would spin. Break out the
1248            // moment we see no progress.
1249            let before = self.pos;
1250            self.parse_attribute();
1251            if self.pos == before {
1252                break;
1253            }
1254        }
1255        match self.current() {
1256            Some(SyntaxKind::NUMBER) => {
1257                self.open(SyntaxKind::LITERAL);
1258                self.bump();
1259                self.close();
1260            }
1261            Some(SyntaxKind::STRING) => {
1262                let text = self.tokens[self.pos_skip_trivia()].1;
1263                if text.starts_with('f') {
1264                    self.parse_f_string();
1265                } else {
1266                    self.open(SyntaxKind::LITERAL);
1267                    self.bump();
1268                    self.close();
1269                }
1270            }
1271            Some(SyntaxKind::IDENT) => {
1272                // `true` / `false` / `Infinity` / `NaN` and the removed `null` spelling are
1273                // keyword-shaped literals but lex as IDENT — promote
1274                // here so the lowering can decode them via the LITERAL
1275                // walker (which dispatches on the inner token text).
1276                let text = self.tokens[self.pos_skip_trivia()].1;
1277                if matches!(text, "null" | "true" | "false" | "Infinity" | "NaN") {
1278                    self.open(SyntaxKind::LITERAL);
1279                    self.bump();
1280                    self.close();
1281                } else if self.looks_like_variant_ctor() {
1282                    // `Enum.Variant { ... }` — at least two dotted
1283                    // segments followed by a brace body. Legacy
1284                    // `parse_variant_ctor` requires `path.len() >= 2`
1285                    // before committing; we match that here so plain
1286                    // `foo.bar` member access still falls through to
1287                    // the postfix loop as VARIABLE_EXPR.
1288                    self.parse_variant_ctor();
1289                } else if self.looks_like_type_atom() {
1290                    // Bareword type expressions (`Dict<String, Int>`,
1291                    // `List<Int>`, `Foo?`). Legacy `parse_type_expr`
1292                    // lowers these into `Expr::Type`; we follow suit so
1293                    // forms like `#brand Dict<String, Int> { ... }`
1294                    // and `#schema Id Type<Arg>` parse
1295                    // cleanly without the Pratt grammar misreading
1296                    // `<` as a comparison.
1297                    self.parse_type();
1298                } else {
1299                    self.open(SyntaxKind::VARIABLE_EXPR);
1300                    self.bump();
1301                    self.close();
1302                }
1303            }
1304            Some(SyntaxKind::AMP) => self.parse_reference(),
1305            Some(SyntaxKind::L_BRACE) => self.parse_dict(),
1306            Some(SyntaxKind::L_BRACK) => self.parse_list(),
1307            Some(SyntaxKind::L_PAREN) => {
1308                // Three shapes share the leading `(`:
1309                //   1. `(p1, p2) [-> RetType] => body` — a closure.
1310                //   2. `(expr)`                       — a parenthesised
1311                //      group (precedence override, NOT a tuple).
1312                //   3. `()` / `(e,)` / `(e1, e2, ...)` — a tuple value.
1313                //      The unit `()` and the trailing-comma 1-tuple
1314                //      `(e,)` are the disambiguators that keep `(e)`
1315                //      pure grouping.
1316                // Closure lookahead runs first (it can see the trailing
1317                // `=>`); the tuple-vs-group decision is made by scanning
1318                // the parenthesised body for a top-level comma.
1319                if self.try_parse_paren_closure() {
1320                    return;
1321                }
1322                self.parse_paren_or_tuple();
1323            }
1324            Some(SyntaxKind::STAR) => {
1325                self.open(SyntaxKind::WILDCARD);
1326                self.bump();
1327                self.close();
1328            }
1329            Some(SyntaxKind::ELLIPSIS) => {
1330                self.open(SyntaxKind::SPREAD_EXPR);
1331                self.bump();
1332                // v1.3 typed spread: `...<Type> expr`. The type hint
1333                // sits between the ellipsis and the source expression
1334                // and disambiguates strict-mode derivation. The inner
1335                // expression follows the type with no separator.
1336                if self.at(SyntaxKind::LT) {
1337                    self.bump();
1338                    self.parse_type();
1339                    self.expect(SyntaxKind::GT);
1340                }
1341                self.parse_unary();
1342                self.close();
1343            }
1344            _ => {
1345                // `parse_atom` is reached from inside dict / list /
1346                // call / argument productions. When no atom shape
1347                // matches the current token, recover to the nearest
1348                // structural boundary so the surrounding loop can
1349                // resume without spinning. We emit a single ERROR
1350                // covering the bad span; the diagnostic message is
1351                // the standard "expected expression."
1352                self.error_recover("expected expression", Self::STRUCTURAL_SYNC);
1353            }
1354        }
1355    }
1356
1357    /// Look ahead past the current IDENT for an `IDENT (DOT IDENT)+ {`
1358    /// sequence — the variant-constructor shape `Enum.Variant { ... }`
1359    /// the legacy `parse_variant_ctor` (`expr.rs`) detects. Returns
1360    /// true only when at least two dotted segments precede the `{`,
1361    /// matching the legacy `path.len() < 2` guard. Anything else
1362    /// (single-segment IDENT, dotted-path member access without a
1363    /// trailing brace) falls through to the regular VARIABLE_EXPR path.
1364    fn looks_like_variant_ctor(&self) -> bool {
1365        if !self.at(SyntaxKind::IDENT) {
1366            return false;
1367        }
1368        let mut idx = self.pos_skip_trivia() + 1;
1369        let advance_trivia = |i: &mut usize, toks: &[(SyntaxKind, &str)]| {
1370            while *i < toks.len() && toks[*i].0.is_trivia() {
1371                *i += 1;
1372            }
1373        };
1374        advance_trivia(&mut idx, &self.tokens);
1375        let mut segs: usize = 1;
1376        while self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::DOT) {
1377            idx += 1;
1378            advance_trivia(&mut idx, &self.tokens);
1379            if self.tokens.get(idx).map(|(k, _)| *k) != Some(SyntaxKind::IDENT) {
1380                return false;
1381            }
1382            idx += 1;
1383            segs += 1;
1384            advance_trivia(&mut idx, &self.tokens);
1385        }
1386        if segs < 2 {
1387            return false;
1388        }
1389        self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::L_BRACE)
1390    }
1391
1392    /// Decide whether the current IDENT atom heads a *type* expression
1393    /// (`Dict<String, Int>`, `List<Int>`, `Foo?`). Legacy
1394    /// `parse_type_expr` (`expr.rs`) lowers such atoms into
1395    /// `Expr::Type`; downstream forms like `#brand Dict<K, V> { ... }`
1396    /// rely on this so the value body isn't misread as `Dict < K`
1397    /// (binary comparison).
1398    ///
1399    /// Conservative: only fires when the type-ness signal is
1400    /// unambiguous — the IDENT is a known type head, OR is
1401    /// immediately followed by `<...>` generics (no whitespace
1402    /// before `<`), with the angle balance closing cleanly. A
1403    /// trailing `?` (optional marker) also qualifies.
1404    fn looks_like_type_atom(&self) -> bool {
1405        if !self.at(SyntaxKind::IDENT) {
1406            return false;
1407        }
1408        let head_text = self.current_text().unwrap_or("");
1409        let head_idx = self.pos_skip_trivia();
1410        let mut idx = head_idx + 1;
1411        let mut had_ws = false;
1412        while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
1413            had_ws = true;
1414            idx += 1;
1415        }
1416        let known_head = matches!(
1417            head_text,
1418            "Int" | "String" | "Bool" | "Float" | "Any" | "List" | "Dict"
1419        );
1420        // `IDENT < ...>` — type with generics. Requires `<`
1421        // immediately adjacent (no whitespace).
1422        if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::LT) && !had_ws {
1423            // Scan for the matching `>` while tracking parens.
1424            let mut depth: i32 = 1;
1425            let mut paren_depth: i32 = 0;
1426            let mut j = idx + 1;
1427            while j < self.tokens.len() && depth > 0 {
1428                match self.tokens[j].0 {
1429                    SyntaxKind::LT => depth += 1,
1430                    SyntaxKind::GT => depth -= 1,
1431                    SyntaxKind::L_PAREN => paren_depth += 1,
1432                    SyntaxKind::R_PAREN if paren_depth > 0 => paren_depth -= 1,
1433                    SyntaxKind::L_BRACE
1434                    | SyntaxKind::R_BRACE
1435                    | SyntaxKind::R_PAREN
1436                    | SyntaxKind::FAT_ARROW
1437                        if depth == 1 && paren_depth == 0 =>
1438                    {
1439                        return false
1440                    }
1441                    _ => {}
1442                }
1443                j += 1;
1444            }
1445            return depth == 0;
1446        }
1447        // Bare type head with no generics — only fires when the IDENT
1448        // is recognised as a primitive type name. Guarded by what
1449        // follows so plain VARIABLE_EXPR usage doesn't accidentally
1450        // become a TYPE_NODE: must be followed by `{` (type-tagged
1451        // dict body, `#brand T { ... }`) or a stray type-suffix `?`.
1452        // The `?` no longer denotes optionality (that's `Option<T>`),
1453        // but routing it here lets `parse_type` emit a precise "use
1454        // Option<T>" error instead of a confusing ternary misparse.
1455        if known_head {
1456            let next = self.tokens.get(idx).map(|(k, _)| *k);
1457            if matches!(next, Some(SyntaxKind::QUESTION) | Some(SyntaxKind::L_BRACE)) {
1458                return true;
1459            }
1460        }
1461        // `IDENT ? {` — a legacy `Weather? { ... }` shape. The `?` is no
1462        // longer a valid optional marker, but the trailing brace makes
1463        // this unambiguously a (now-erroring) type-tagged value rather
1464        // than a ternary head, so route it into `parse_type` for the
1465        // helpful diagnostic.
1466        if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::QUESTION) {
1467            let mut j = idx + 1;
1468            while j < self.tokens.len() && self.tokens[j].0.is_trivia() {
1469                j += 1;
1470            }
1471            if self.tokens.get(j).map(|(k, _)| *k) == Some(SyntaxKind::L_BRACE) {
1472                return true;
1473            }
1474        }
1475        false
1476    }
1477
1478    /// `Enum (.Variant)+ { body }` — emit a VARIANT_CTOR node wrapping
1479    /// the dotted path (as plain IDENT + DOT tokens) and the brace
1480    /// body (a regular DICT). Caller has already determined via
1481    /// [`Self::looks_like_variant_ctor`] that we're at the head IDENT
1482    /// of such a construct.
1483    fn parse_variant_ctor(&mut self) {
1484        self.open(SyntaxKind::VARIANT_CTOR);
1485        // Head IDENT.
1486        self.bump();
1487        // Drain `.IDENT*` — guaranteed at least one by the peek.
1488        while self.at(SyntaxKind::DOT) {
1489            self.bump();
1490            if self.at(SyntaxKind::IDENT) {
1491                self.bump();
1492            } else {
1493                self.error_at_current("expected identifier after `.` in variant constructor");
1494                break;
1495            }
1496        }
1497        // Body is a regular dict literal.
1498        if self.at(SyntaxKind::L_BRACE) {
1499            self.parse_dict();
1500        } else {
1501            self.error("expected `{` in variant constructor");
1502        }
1503        self.close();
1504    }
1505
1506    /// Index into `tokens` of the next non-trivia token. Caller must
1507    /// guarantee `current().is_some()`.
1508    fn pos_skip_trivia(&self) -> usize {
1509        let mut idx = self.pos;
1510        while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
1511            idx += 1;
1512        }
1513        idx
1514    }
1515
1516    /// Decompose a leading `f"..."` / `f#"..."#` STRING token into a
1517    /// proper [`F_STRING`] subtree. The original token is consumed
1518    /// as a SINGLE leaf at the lex level, but for the CST we walk
1519    /// its bytes and emit:
1520    ///
1521    /// * `F_STRING_OPEN` — `f"` / `f#"` / `f##"` …
1522    /// * `F_STRING_LITERAL` — verbatim text between zones.
1523    /// * `F_STRING_INTERPOLATION` (a sub-node) — wraps a
1524    ///   `F_STRING_INTERP_START`, a recursively-parsed expression
1525    ///   (using the same flat lex on the interpolation bytes), and a
1526    ///   `F_STRING_INTERP_END`.
1527    /// * `F_STRING_CLOSE` — matching `"` / `"#` / `"##` …
1528    ///
1529    /// Reuses [`lex::lex`] for the interpolation bytes so any future
1530    /// lexer change is picked up automatically. The whole emission is
1531    /// driven directly by the original byte span — so the round-trip
1532    /// invariant holds without help from the caller.
1533    fn parse_f_string(&mut self) {
1534        // Flush trivia FIRST so the F_STRING node nests under whatever
1535        // production opened most recently. We then refuse to advance
1536        // `self.pos` until we've emitted every sub-piece, so the
1537        // overall byte count matches the original STRING token.
1538        self.flush_trivia();
1539        let tok_idx = self.pos;
1540        let (_kind, full_text): (SyntaxKind, &'a str) = self.tokens[tok_idx];
1541        let start_byte = self.cursor_byte;
1542        // Parse the opening sequence: `f` + zero-or-more `#` + `"`.
1543        let bytes = full_text.as_bytes();
1544        // The lexer already guarantees this token starts with `f`,
1545        // and that `next_is_hash_quote(bytes, 1)` was true, but be
1546        // defensive — bail to plain LITERAL if anything else.
1547        if bytes.first() != Some(&b'f') {
1548            // Should be unreachable given the caller's guard.
1549            self.open(SyntaxKind::LITERAL);
1550            self.bump();
1551            self.close();
1552            return;
1553        }
1554        let mut idx: usize = 1;
1555        while bytes.get(idx) == Some(&b'#') {
1556            idx += 1;
1557        }
1558        if bytes.get(idx) != Some(&b'"') {
1559            // Malformed open — emit the whole thing as a single
1560            // LITERAL so byte-round-trip is preserved.
1561            self.open(SyntaxKind::LITERAL);
1562            self.bump();
1563            self.close();
1564            return;
1565        }
1566        let hash_count = idx - 1;
1567        let open_end = idx + 1;
1568        let mut closing = String::from("\"");
1569        for _ in 0..hash_count {
1570            closing.push('#');
1571        }
1572
1573        // Locate the close. The body starts at `open_end`; we have to
1574        // track interpolation depth so a literal `}` inside an
1575        // interpolation can't be mistaken for the close.
1576        let body_start = open_end;
1577        let close_pos = self.find_fstring_close(bytes, body_start, &closing, hash_count);
1578        let close_pos = match close_pos {
1579            Some(p) => p,
1580            None => {
1581                // Unterminated — fall back to LITERAL.
1582                self.open(SyntaxKind::LITERAL);
1583                self.bump();
1584                self.close();
1585                return;
1586            }
1587        };
1588
1589        // Open the composite node.
1590        self.open(SyntaxKind::F_STRING);
1591        // Emit OPEN.
1592        self.emit_raw_token(SyntaxKind::F_STRING_OPEN, &full_text[..open_end]);
1593        // Walk body, splitting LITERAL chunks vs interpolation zones.
1594        let mut i = body_start;
1595        let mut literal_start = i;
1596        let raw_string = hash_count > 0;
1597        while i < close_pos {
1598            if Self::starts_with_at(bytes, i, b"${") {
1599                if i > literal_start {
1600                    self.emit_raw_token(SyntaxKind::F_STRING_LITERAL, &full_text[literal_start..i]);
1601                }
1602                // Find matching `}`.
1603                let interp_start = i;
1604                let interp_body_start = i + 2;
1605                let mut depth: usize = 1;
1606                let mut j = interp_body_start;
1607                while j < close_pos && depth > 0 {
1608                    match bytes[j] {
1609                        b'{' => {
1610                            depth += 1;
1611                            j += 1;
1612                        }
1613                        b'}' => {
1614                            depth -= 1;
1615                            if depth == 0 {
1616                                break;
1617                            }
1618                            j += 1;
1619                        }
1620                        b'"' => {
1621                            // Skip nested "..." (the lexer always
1622                            // pairs them up safely on round-trip).
1623                            j = crate::lex::scan_normal_string_for_cst(bytes, j);
1624                        }
1625                        b => {
1626                            // Skip a full codepoint to make progress
1627                            // on invalid UTF-8 boundaries.
1628                            j += utf8_codepoint_len(b);
1629                        }
1630                    }
1631                }
1632                if depth != 0 {
1633                    // Unterminated interpolation — emit the rest as
1634                    // one literal so bytes survive, then stop.
1635                    self.emit_raw_token(SyntaxKind::F_STRING_LITERAL, &full_text[i..close_pos]);
1636                    literal_start = close_pos;
1637                    break;
1638                }
1639                let interp_body_end = j;
1640                let interp_close = j + 1;
1641                // Emit the interpolation sub-node.
1642                self.open(SyntaxKind::F_STRING_INTERPOLATION);
1643                self.emit_raw_token(
1644                    SyntaxKind::F_STRING_INTERP_START,
1645                    &full_text[interp_start..interp_body_start],
1646                );
1647                // Sub-parse the inner expression. The inner text is a
1648                // self-contained slice; we hand it to a fresh `lex` +
1649                // mini-parser. This is recursive (an interpolation can
1650                // contain another f-string), but the byte-accounting
1651                // works because we splice sub-tokens directly into the
1652                // builder.
1653                self.parse_fstring_interp_inner(&full_text[interp_body_start..interp_body_end]);
1654                self.emit_raw_token(
1655                    SyntaxKind::F_STRING_INTERP_END,
1656                    &full_text[interp_body_end..interp_close],
1657                );
1658                self.close();
1659                literal_start = interp_close;
1660                i = interp_close;
1661                continue;
1662            }
1663            // Escape handling — only relevant in non-raw f-strings.
1664            if !raw_string && bytes[i] == b'\\' && i + 1 < close_pos {
1665                i += 1 + utf8_codepoint_len(bytes[i + 1]);
1666                continue;
1667            }
1668            i += utf8_codepoint_len(bytes[i]);
1669        }
1670        if literal_start < close_pos {
1671            self.emit_raw_token(
1672                SyntaxKind::F_STRING_LITERAL,
1673                &full_text[literal_start..close_pos],
1674            );
1675        }
1676        // Emit CLOSE.
1677        self.emit_raw_token(SyntaxKind::F_STRING_CLOSE, &full_text[close_pos..]);
1678        self.close();
1679        // Advance the parser past the original STRING token now that
1680        // we've emitted every sub-piece directly.
1681        self.cursor_byte = start_byte + full_text.len();
1682        self.pos = tok_idx + 1;
1683    }
1684
1685    /// Emit a single leaf token directly to the builder (bypassing
1686    /// the lex-token cursor). Used by f-string decomposition; never
1687    /// advances `pos` / `cursor_byte`.
1688    fn emit_raw_token(&mut self, kind: SyntaxKind, text: &str) {
1689        self.builder
1690            .token(RelonLanguage::kind_to_raw_static(kind), text);
1691    }
1692
1693    /// Sub-parser for the inside of `${ ... }` in an f-string. We
1694    /// temporarily swap `self.tokens` with the inner-text lex (the
1695    /// `&str` slices inside still borrow from the original source,
1696    /// so the swapped `Vec` is fully compatible lifetime-wise),
1697    /// run the same Pratt expression grammar, then restore.
1698    fn parse_fstring_interp_inner(&mut self, text: &'a str) {
1699        let inner_tokens: Vec<(SyntaxKind, &'a str)> = crate::lex::lex(text);
1700        // Stash outer state and install the inner stream.
1701        let outer_tokens = std::mem::replace(&mut self.tokens, inner_tokens);
1702        let outer_pos = std::mem::replace(&mut self.pos, 0);
1703        let outer_cursor = self.cursor_byte;
1704        self.cursor_byte = 0;
1705        if !self.at_end() {
1706            self.parse_expr();
1707        }
1708        // Absorb any remaining bytes so the F_STRING_INTERPOLATION
1709        // body has full byte coverage. Trailing whitespace becomes
1710        // trivia naturally; anything else lands in an ERROR node.
1711        if !self.at_end() {
1712            self.error_recover("trailing input in interpolation", &[]);
1713        }
1714        self.flush_trivia();
1715        // Restore outer state.
1716        self.tokens = outer_tokens;
1717        self.pos = outer_pos;
1718        self.cursor_byte = outer_cursor + text.len();
1719    }
1720
1721    fn find_fstring_close(
1722        &self,
1723        bytes: &[u8],
1724        body_start: usize,
1725        closing: &str,
1726        hashes: usize,
1727    ) -> Option<usize> {
1728        let raw = hashes > 0;
1729        let mut idx = body_start;
1730        while idx + closing.len() <= bytes.len() {
1731            // Skip past balanced `${...}` interpolations.
1732            if Self::starts_with_at(bytes, idx, b"${") {
1733                let mut depth: usize = 1;
1734                let mut j = idx + 2;
1735                while j < bytes.len() && depth > 0 {
1736                    match bytes[j] {
1737                        b'{' => depth += 1,
1738                        b'}' => depth -= 1,
1739                        b'"' => {
1740                            j = crate::lex::scan_normal_string_for_cst(bytes, j);
1741                            continue;
1742                        }
1743                        _ => {}
1744                    }
1745                    if depth == 0 {
1746                        j += 1;
1747                        break;
1748                    }
1749                    j += 1;
1750                }
1751                if depth != 0 {
1752                    return None;
1753                }
1754                idx = j;
1755                continue;
1756            }
1757            if !raw && bytes[idx] == b'\\' {
1758                if idx + 1 >= bytes.len() {
1759                    return None;
1760                }
1761                idx += 1 + utf8_codepoint_len(bytes[idx + 1]);
1762                continue;
1763            }
1764            if Self::starts_with_at(bytes, idx, closing.as_bytes()) {
1765                return Some(idx);
1766            }
1767            idx += utf8_codepoint_len(bytes[idx]);
1768        }
1769        None
1770    }
1771
1772    fn starts_with_at(bytes: &[u8], idx: usize, needle: &[u8]) -> bool {
1773        bytes
1774            .get(idx..idx + needle.len())
1775            .is_some_and(|s| s == needle)
1776    }
1777
1778    /// Scan forward (without committing) starting from `start_idx`,
1779    /// past a balanced `(...)`, returning the index of the first
1780    /// non-trivia token AFTER the closing `)`. `start_idx` must point
1781    /// at the opening `L_PAREN` token. Returns `None` if the parens
1782    /// are unbalanced (we ran past EOI before matching).
1783    fn scan_after_matching_paren(&self, start_idx: usize) -> Option<usize> {
1784        debug_assert!(self.tokens.get(start_idx).map(|(k, _)| *k) == Some(SyntaxKind::L_PAREN));
1785        let mut depth: i32 = 0;
1786        let mut idx = start_idx;
1787        while idx < self.tokens.len() {
1788            let kind = self.tokens[idx].0;
1789            match kind {
1790                SyntaxKind::L_PAREN => depth += 1,
1791                SyntaxKind::R_PAREN => {
1792                    depth -= 1;
1793                    if depth == 0 {
1794                        let mut next = idx + 1;
1795                        while next < self.tokens.len() && self.tokens[next].0.is_trivia() {
1796                            next += 1;
1797                        }
1798                        return Some(next);
1799                    }
1800                }
1801                _ => {}
1802            }
1803            idx += 1;
1804        }
1805        None
1806    }
1807
1808    /// Without consuming anything, decide whether the `(...)` at the
1809    /// current position is followed (modulo an optional `-> Type`) by
1810    /// a `=>` arrow — i.e. the parens are a closure parameter list,
1811    /// not a parenthesised expression. We're already at the
1812    /// `L_PAREN`.
1813    fn looks_like_closure_after_paren(&self) -> bool {
1814        let lparen_idx = self.pos_skip_trivia();
1815        let Some(after_paren) = self.scan_after_matching_paren(lparen_idx) else {
1816            return false;
1817        };
1818        // `=> ...`?
1819        if matches!(
1820            self.tokens.get(after_paren).map(|(k, _)| *k),
1821            Some(SyntaxKind::FAT_ARROW)
1822        ) {
1823            return true;
1824        }
1825        // `-> RetType => ...`? Skip past the return-type tokens. We
1826        // can't fully parse a type without committing, so scan ahead
1827        // conservatively until we hit `=>` (closure) or anything that
1828        // disqualifies (newline-like break is fine — trivia is skipped
1829        // by definition, but we treat `,`/`}`/`]`/`)`/`:` as a
1830        // disqualifier so we never confuse `-> Type:` patterns).
1831        if matches!(
1832            self.tokens.get(after_paren).map(|(k, _)| *k),
1833            Some(SyntaxKind::THIN_ARROW)
1834        ) {
1835            let mut idx = after_paren + 1;
1836            let mut bracket_depth: i32 = 0;
1837            while idx < self.tokens.len() {
1838                let kind = self.tokens[idx].0;
1839                if kind.is_trivia() {
1840                    idx += 1;
1841                    continue;
1842                }
1843                match kind {
1844                    SyntaxKind::FAT_ARROW if bracket_depth == 0 => return true,
1845                    SyntaxKind::COMMA
1846                    | SyntaxKind::R_BRACE
1847                    | SyntaxKind::R_BRACK
1848                    | SyntaxKind::R_PAREN
1849                    | SyntaxKind::COLON
1850                        if bracket_depth == 0 =>
1851                    {
1852                        return false
1853                    }
1854                    SyntaxKind::L_BRACE
1855                    | SyntaxKind::L_BRACK
1856                    | SyntaxKind::L_PAREN
1857                    | SyntaxKind::LT => {
1858                        bracket_depth += 1;
1859                    }
1860                    SyntaxKind::R_BRACE | SyntaxKind::R_BRACK | SyntaxKind::GT
1861                        if bracket_depth > 0 =>
1862                    {
1863                        bracket_depth -= 1;
1864                    }
1865                    _ => {}
1866                }
1867                idx += 1;
1868            }
1869        }
1870        false
1871    }
1872
1873    /// When `current()` is `L_PAREN` and `looks_like_closure_after_paren`
1874    /// is true, consume the entire `(params) [-> RetType] => body`
1875    /// construct and emit a CLOSURE node. Returns true on success.
1876    /// Leaves the parser untouched and returns false otherwise.
1877    fn try_parse_paren_closure(&mut self) -> bool {
1878        if !self.at(SyntaxKind::L_PAREN) {
1879            return false;
1880        }
1881        if !self.looks_like_closure_after_paren() {
1882            return false;
1883        }
1884        self.open(SyntaxKind::CLOSURE);
1885        self.bump(); // (
1886                     // Comma-separated CLOSURE_PARAMs.
1887        while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
1888            self.parse_closure_param();
1889            if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_PAREN) {
1890                self.error_recover(
1891                    "expected `,` or `)` in closure parameter list",
1892                    &[SyntaxKind::COMMA, SyntaxKind::R_PAREN],
1893                );
1894                self.eat(SyntaxKind::COMMA);
1895            }
1896        }
1897        self.expect(SyntaxKind::R_PAREN);
1898        // Optional `-> RetType`.
1899        if self.eat(SyntaxKind::THIN_ARROW) {
1900            self.parse_type();
1901        }
1902        if self.expect(SyntaxKind::FAT_ARROW) {
1903            self.parse_expr();
1904        }
1905        self.close();
1906        true
1907    }
1908
1909    /// Parse a `(`-led atom that is NOT a closure: either a
1910    /// parenthesised group `(expr)` or a tuple value literal.
1911    ///
1912    /// Disambiguation (locked design):
1913    ///   * `()`              → unit / zero-tuple (TUPLE node, no children).
1914    ///   * `(e)`             → grouping (the inner expression, no wrapper).
1915    ///   * `(e,)`            → 1-tuple (trailing comma forces it).
1916    ///   * `(e1, e2, ...)`   → n-tuple.
1917    ///
1918    /// The opening `(` and closing `)` land inside the TUPLE node for the
1919    /// tuple shapes so the round-trip-by-bytes invariant holds; for the
1920    /// grouping shape the parens are bumped as bare leaves around the
1921    /// inner expression (matching the pre-tuple behaviour).
1922    fn parse_paren_or_tuple(&mut self) {
1923        debug_assert!(self.at(SyntaxKind::L_PAREN));
1924        let ck = self.checkpoint();
1925        self.bump(); // (
1926                     // Empty parens — the unit tuple `()`.
1927        if self.at(SyntaxKind::R_PAREN) {
1928            self.open_at(ck, SyntaxKind::TUPLE);
1929            self.bump(); // )
1930            self.close();
1931            return;
1932        }
1933        // Parse the first element / grouped expression.
1934        self.parse_expr();
1935        if self.at(SyntaxKind::COMMA) {
1936            // At least one comma → a tuple. Wrap everything (including
1937            // the already-parsed first element) in a TUPLE node.
1938            self.open_at(ck, SyntaxKind::TUPLE);
1939            while self.eat(SyntaxKind::COMMA) {
1940                // Trailing comma before `)` is allowed (and is what
1941                // makes `(e,)` a 1-tuple).
1942                if self.at(SyntaxKind::R_PAREN) || self.at_end() {
1943                    break;
1944                }
1945                self.parse_expr();
1946            }
1947            self.expect(SyntaxKind::R_PAREN);
1948            self.close();
1949            return;
1950        }
1951        // No comma — plain grouping `(expr)`. No TUPLE wrapper; the
1952        // inner expression stands on its own (precedence override only).
1953        self.expect(SyntaxKind::R_PAREN);
1954    }
1955
1956    /// One closure parameter — either `name` or `Type name`. P2
1957    /// records the type, when present, as a TYPE_NODE child preceding
1958    /// the IDENT.
1959    fn parse_closure_param(&mut self) {
1960        self.open(SyntaxKind::CLOSURE_PARAM);
1961        // Heuristic: if the next two non-trivia tokens are IDENT IDENT
1962        // (or a more elaborate type followed by an ident), treat the
1963        // leading run as a TypeNode. We delegate to `parse_type` which
1964        // commits conservatively (it stops at the first non-type-y
1965        // token, so a bare `IDENT` doesn't get swallowed as a type).
1966        // The simplest signal of "this is a typed param" is that
1967        // there are at least two adjacent IDENTs, possibly with `<...>`
1968        // / `?` in the type slot.
1969        if self.peek_is_typed_param() {
1970            self.parse_type();
1971        }
1972        // A bare `_` is a legal parameter name (the Rust-style
1973        // ignore binding `(acc, _) => ...`). Since the lexer now emits
1974        // `_` as `UNDERSCORE` rather than `IDENT`, accept it here too so
1975        // a `_` parameter keeps parsing exactly as it did before the
1976        // wildcard split.
1977        if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::UNDERSCORE) {
1978            self.bump();
1979        } else {
1980            self.error_at_current("expected closure parameter name");
1981        }
1982        self.close();
1983    }
1984
1985    /// Cheap lookahead: does the upcoming token stream look like
1986    /// `Type ident` (a typed closure parameter) or just `ident`
1987    /// (untyped)? We say "typed" if the current token is IDENT and
1988    /// the next non-trivia token after a `Type`-shaped run is another
1989    /// IDENT — meaning the first one is the type and the second is
1990    /// the param name. We allow `<...>` and `?` between them.
1991    ///
1992    /// Crucial heuristic: when a `<` appears, it must be immediately
1993    /// adjacent (no whitespace) to the preceding IDENT for it to
1994    /// count as opening a generic argument list. Without this
1995    /// guard, `a < b: c` (a closure param of type `a` named `< b`
1996    /// — but `<` isn't a valid name leader, so it bails)
1997    /// would still be misinterpreted in pathological cases. Rust /
1998    /// TypeScript both use the same lex-time adjacency check.
1999    fn peek_is_typed_param(&self) -> bool {
2000        if !self.at(SyntaxKind::IDENT) {
2001            return false;
2002        }
2003        // Walk past IDENT, optional `.IDENT*`, optional `<...>`,
2004        // optional `?`, then check for IDENT.
2005        let head_idx = self.pos_skip_trivia();
2006        let mut idx = head_idx + 1;
2007        let advance_trivia = |i: &mut usize| {
2008            while *i < self.tokens.len() && self.tokens[*i].0.is_trivia() {
2009                *i += 1;
2010            }
2011        };
2012        // For the adjacency check we want to know whether ANY trivia
2013        // intervenes between the IDENT and the next non-trivia token.
2014        let mut had_trivia_after_head = false;
2015        if idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
2016            had_trivia_after_head = true;
2017            advance_trivia(&mut idx);
2018        }
2019        // `.IDENT*`
2020        while idx < self.tokens.len() && self.tokens[idx].0 == SyntaxKind::DOT {
2021            idx += 1;
2022            advance_trivia(&mut idx);
2023            if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::IDENT) {
2024                idx += 1;
2025                advance_trivia(&mut idx);
2026            } else {
2027                return false;
2028            }
2029            had_trivia_after_head = false;
2030        }
2031        // `<...>` — balanced angle scan. Refuse when whitespace
2032        // separates the IDENT and the `<` — that's the disambiguation
2033        // hook between `Foo<Bar>` (type) and `a < b` (comparison).
2034        if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::LT) {
2035            if had_trivia_after_head {
2036                return false;
2037            }
2038            let mut depth: i32 = 1;
2039            // Track nested `(...)` so tuple-type arguments like
2040            // `List<(Int, String)>` don't trip the comma rejection.
2041            let mut paren_depth: i32 = 0;
2042            idx += 1;
2043            while idx < self.tokens.len() && depth > 0 {
2044                match self.tokens[idx].0 {
2045                    SyntaxKind::LT => depth += 1,
2046                    SyntaxKind::GT => depth -= 1,
2047                    SyntaxKind::L_PAREN => paren_depth += 1,
2048                    SyntaxKind::R_PAREN if paren_depth > 0 => paren_depth -= 1,
2049                    // Anything that strongly disqualifies a type
2050                    // expression — bail. Commas at depth==1 are
2051                    // fine (`Dict<String, Int>`) — only structural
2052                    // tokens that can never appear inside a type
2053                    // disqualify the scan.
2054                    SyntaxKind::L_BRACE
2055                    | SyntaxKind::R_BRACE
2056                    | SyntaxKind::R_PAREN
2057                    | SyntaxKind::FAT_ARROW
2058                        if depth == 1 && paren_depth == 0 =>
2059                    {
2060                        return false
2061                    }
2062                    _ => {}
2063                }
2064                idx += 1;
2065            }
2066            if depth != 0 {
2067                return false;
2068            }
2069            advance_trivia(&mut idx);
2070        }
2071        // Optional `?`.
2072        if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::QUESTION) {
2073            idx += 1;
2074            advance_trivia(&mut idx);
2075        }
2076        self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::IDENT)
2077    }
2078
2079    /// Parse a type-expression-shaped run of tokens into a TYPE_NODE.
2080    /// The grammar:
2081    ///
2082    ///   TypeNode    := TupleType | (PathSeg ('.' PathSeg)* GenericArgs?)
2083    ///   TupleType   := '(' ')' | '(' TypeNode ',' ')' | '(' TypeNode (',' TypeNode)+ ','? ')'
2084    ///   PathSeg     := IDENT | STRING
2085    ///   GenericArgs := '<' (TypeNode (',' TypeNode)*)? ','? '>'
2086    ///
2087    /// Handles string-keyed segments (`"namespaced".Foo`), nested
2088    /// generics (`Map<String, Int>`), and v1.7 tuple types in both
2089    /// type-hint position (`(Int, String) pair: ...`) and as generic
2090    /// arguments (`List<(Int, String)>`). A trailing type-suffix `?`
2091    /// (`T?`) is no longer valid — optionality is written `Option<T>` —
2092    /// so we still consume any stray `?` token but flag it as an error.
2093    fn parse_type(&mut self) {
2094        // Tuple type — committed only when the caller picked
2095        // `parse_type` (typed-key / generic-arg / closure-param /
2096        // return-type position). The expression grammar uses its own
2097        // `(...)` handler so a parens group never reaches this branch.
2098        if self.at(SyntaxKind::L_PAREN) {
2099            self.parse_tuple_type();
2100            return;
2101        }
2102        self.open(SyntaxKind::TYPE_NODE);
2103        // First segment: IDENT or STRING (allowed in the v1 grammar
2104        // for dotted-string paths like `"foo".Bar`).
2105        if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::STRING) {
2106            self.bump();
2107        } else {
2108            self.error_at_current("expected type name");
2109            self.close();
2110            return;
2111        }
2112        // Dotted continuation.
2113        while self.at(SyntaxKind::DOT) {
2114            self.bump();
2115            if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::STRING) {
2116                self.bump();
2117            } else {
2118                self.error_at_current("expected identifier after `.` in type");
2119            }
2120        }
2121        // Generic argument list. We're in a committed type context
2122        // here (the caller already decided "this is a type"), so any
2123        // `<` opens generics — no adjacency check needed.
2124        if self.at(SyntaxKind::LT) {
2125            self.bump();
2126            loop {
2127                if self.at(SyntaxKind::GT) || self.at_end() {
2128                    break;
2129                }
2130                self.parse_type();
2131                if !self.eat(SyntaxKind::COMMA) {
2132                    break;
2133                }
2134            }
2135            self.expect(SyntaxKind::GT);
2136        }
2137        // Type-suffix `?` is no longer valid syntax — optional types
2138        // are written `Option<T>`. We still consume the token (keeping
2139        // the tree lossless and avoiding recovery spin) but flag it as
2140        // an error pointing the user to the named-type form.
2141        if self.at(SyntaxKind::QUESTION) {
2142            self.error("optional types are written `Option<T>`, not `T?`");
2143            self.bump();
2144        }
2145        self.close();
2146    }
2147
2148    /// `(T1, T2, ...)` tuple type. Three shapes:
2149    ///
2150    /// * `()`         — zero-tuple.
2151    /// * `(T,)`       — one-tuple (trailing comma is mandatory; without
2152    ///   it the form is a parenthesised type, not used
2153    ///   in the current grammar but still consumed as
2154    ///   a single-element TUPLE_TYPE for forward-compat).
2155    /// * `(T1, T2)`   — 2+ tuple, optional trailing comma.
2156    ///
2157    /// Caller has already committed to type-position via `parse_type`,
2158    /// so we don't have to worry about confusing this with a closure
2159    /// param list — the closure detection happens at the expression
2160    /// layer (`try_parse_paren_closure`) and never reaches here.
2161    fn parse_tuple_type(&mut self) {
2162        self.open(SyntaxKind::TUPLE_TYPE);
2163        self.bump(); // (
2164        while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
2165            self.parse_type();
2166            if !self.eat(SyntaxKind::COMMA) {
2167                break;
2168            }
2169        }
2170        self.expect(SyntaxKind::R_PAREN);
2171        // Type-suffix `?` is no longer valid syntax — optional tuple
2172        // types are written `Option<(...)>`. Consume the token but flag
2173        // it as an error.
2174        if self.at(SyntaxKind::QUESTION) {
2175            self.error("optional types are written `Option<T>`, not `T?`");
2176            self.bump();
2177        }
2178        self.close();
2179    }
2180
2181    fn parse_reference(&mut self) {
2182        // `&base.tail.tail...` with optional-chain `?.` / `?[` access
2183        // forms (`&a.b?.c`, `&a?.[0]`). The legacy `reference_var`
2184        // grammar accepts both `.` / `[` and the `?`-prefixed variant
2185        // — the typed-AST tags the optional-ness on each `TokenKey`.
2186        self.open(SyntaxKind::REFERENCE_EXPR);
2187        self.bump(); // &
2188        if self.at(SyntaxKind::IDENT) {
2189            self.bump(); // base name
2190        } else {
2191            self.error_at_current("expected reference base after `&`");
2192        }
2193        loop {
2194            // `?.` and `?[` — eat the `?` prefix first, then fall
2195            // through to the regular dot / bracket handling.
2196            if self.at(SyntaxKind::QUESTION)
2197                && matches!(
2198                    self.nth(1),
2199                    Some(SyntaxKind::DOT) | Some(SyntaxKind::L_BRACK)
2200                )
2201            {
2202                self.bump(); // ?
2203            } else if !self.at(SyntaxKind::DOT) && !self.at(SyntaxKind::L_BRACK) {
2204                break;
2205            }
2206            if self.at(SyntaxKind::DOT) {
2207                self.bump();
2208                if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::NUMBER) {
2209                    self.bump();
2210                } else {
2211                    self.error_at_current("expected identifier or index after `.`");
2212                }
2213            } else if self.at(SyntaxKind::L_BRACK) {
2214                self.bump(); // [
2215                self.parse_expr();
2216                self.expect(SyntaxKind::R_BRACK);
2217            } else {
2218                break;
2219            }
2220        }
2221        self.close();
2222    }
2223
2224    fn parse_list(&mut self) {
2225        // We don't know up-front whether this `[` opens a list or a
2226        // comprehension — comprehensions look like `[ expr for id in
2227        // iterable (if cond)? ]`. Use a checkpoint so we can wrap the
2228        // first expression into either LIST or COMPREHENSION based on
2229        // what we find next.
2230        let outer_ck = self.checkpoint();
2231        self.bump(); // [
2232                     // Empty list — handle explicitly so we don't try to parse an
2233                     // expression after `[`.
2234        if self.at(SyntaxKind::R_BRACK) {
2235            self.open_at(outer_ck, SyntaxKind::LIST);
2236            self.bump();
2237            self.close();
2238            return;
2239        }
2240        // Parse the first element (or `for` head). If it's a spread,
2241        // it can't be a comprehension head — emit LIST directly.
2242        if self.at(SyntaxKind::ELLIPSIS) {
2243            self.open_at(outer_ck, SyntaxKind::LIST);
2244            self.parse_list_body_tail();
2245            return;
2246        }
2247        self.parse_expr();
2248        // After the first expression: if `for IDENT in ...`, this is
2249        // a comprehension. Otherwise it's a regular list — wrap as
2250        // LIST and continue collecting the rest.
2251        if self.at(SyntaxKind::IDENT) && self.current_text() == Some("for") {
2252            self.open_at(outer_ck, SyntaxKind::COMPREHENSION);
2253            self.bump(); // `for`
2254            if self.at(SyntaxKind::IDENT) {
2255                self.bump();
2256            } else {
2257                self.error_at_current("expected identifier after `for`");
2258            }
2259            if self.at(SyntaxKind::IDENT) && self.current_text() == Some("in") {
2260                self.bump();
2261            } else {
2262                self.error("expected `in` in comprehension");
2263            }
2264            self.parse_expr();
2265            if self.at(SyntaxKind::IDENT) && self.current_text() == Some("if") {
2266                self.bump();
2267                self.parse_expr();
2268            }
2269            self.expect(SyntaxKind::R_BRACK);
2270            self.close();
2271            return;
2272        }
2273        // Regular list — wrap the existing first element into a LIST
2274        // node and continue.
2275        self.open_at(outer_ck, SyntaxKind::LIST);
2276        if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_BRACK) {
2277            self.error_recover(
2278                "expected `,` or `]` in list",
2279                &[SyntaxKind::COMMA, SyntaxKind::R_BRACK],
2280            );
2281            self.eat(SyntaxKind::COMMA);
2282        }
2283        self.parse_list_body_tail();
2284    }
2285
2286    /// Consume the remainder of a LIST body (after the optional leading
2287    /// element + comma have already been emitted) up to and including
2288    /// the closing `]`, then close the LIST node.
2289    fn parse_list_body_tail(&mut self) {
2290        while !self.at(SyntaxKind::R_BRACK) && !self.at_end() {
2291            self.parse_expr();
2292            if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_BRACK) {
2293                self.error_recover(
2294                    "expected `,` or `]` in list",
2295                    &[SyntaxKind::COMMA, SyntaxKind::R_BRACK],
2296                );
2297                self.eat(SyntaxKind::COMMA);
2298            }
2299        }
2300        self.expect(SyntaxKind::R_BRACK);
2301        self.close();
2302    }
2303
2304    /// Text of the current (non-trivia) token, or None at EOI. Used by
2305    /// keyword-tail productions (`for`, `in`, `if`, `match`, `where`,
2306    /// `with`) that the lexer doesn't split out.
2307    fn current_text(&self) -> Option<&'a str> {
2308        let idx = self.pos_skip_trivia();
2309        self.tokens.get(idx).map(|(_, t)| *t)
2310    }
2311
2312    fn parse_dict(&mut self) {
2313        self.open(SyntaxKind::DICT);
2314        self.bump(); // {
2315        while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
2316            self.parse_dict_field();
2317            if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_BRACE) {
2318                self.error_recover(
2319                    "expected `,` or `}` in dict",
2320                    &[SyntaxKind::COMMA, SyntaxKind::R_BRACE],
2321                );
2322                self.eat(SyntaxKind::COMMA);
2323            }
2324        }
2325        self.expect(SyntaxKind::R_BRACE);
2326        self.close();
2327    }
2328
2329    fn parse_dict_field(&mut self) {
2330        self.open(SyntaxKind::DICT_FIELD);
2331        // Leading attributes (e.g. `#internal` / `#expect "msg"` /
2332        // `@currency("USD")`) stack above the pair's key. Same
2333        // shape the file root permits.
2334        while self.at(SyntaxKind::HASH) || self.at(SyntaxKind::AT) {
2335            self.parse_attribute();
2336        }
2337        if self.at_end() {
2338            self.close();
2339            return;
2340        }
2341        // Attribute-only field: `#import x from "p", "next": 1` — the
2342        // `#import` directive already consumed its full body, leaving
2343        // the field separator next. Same for a sequence of bare
2344        // directives whose payload is the field itself (e.g.
2345        // `#schema X { ... },`). Close the field here so the surrounding
2346        // dict resumes at the separator.
2347        if matches!(
2348            self.current(),
2349            Some(SyntaxKind::COMMA) | Some(SyntaxKind::R_BRACE)
2350        ) {
2351            self.close();
2352            return;
2353        }
2354        // The key: an ident, a string, or `...` (spread).
2355        if self.at(SyntaxKind::ELLIPSIS) {
2356            self.open(SyntaxKind::SPREAD_EXPR);
2357            self.bump();
2358            // v1.3 typed spread `...<Type> source` — same shape as the
2359            // atom-level spread, but here we sit inside a dict field
2360            // so the source expression can be a richer form.
2361            if self.at(SyntaxKind::LT) {
2362                self.bump();
2363                self.parse_type();
2364                self.expect(SyntaxKind::GT);
2365            }
2366            self.parse_expr();
2367            self.close();
2368            self.close();
2369            return;
2370        }
2371        // Optional leading type hint: `Type key: value` /
2372        // `Type key(params): body`. We commit only when peeking
2373        // suggests a typed-key shape — otherwise the leading run is
2374        // the key itself (e.g. a single identifier). v1.7 tuple types
2375        // (`(Int, String) pair: ...`) take the same slot and are
2376        // detected by a separate `(...)`-leading peek.
2377        if self.peek_is_tuple_typed_dict_key() {
2378            self.parse_tuple_type();
2379        } else if self.peek_is_typed_dict_key() {
2380            self.parse_type();
2381        }
2382        if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::STRING) {
2383            self.bump();
2384        } else if self.at(SyntaxKind::L_BRACK) {
2385            // Dynamic key `[expr]: value`.
2386            self.bump();
2387            // Optional `<T>` type-hint between `[` and the expression.
2388            if self.at(SyntaxKind::LT) {
2389                self.bump();
2390                self.parse_type();
2391                self.expect(SyntaxKind::GT);
2392            }
2393            self.parse_expr();
2394            self.expect(SyntaxKind::R_BRACK);
2395        } else {
2396            self.error_recover(
2397                "expected dict key",
2398                &[SyntaxKind::COLON, SyntaxKind::COMMA, SyntaxKind::R_BRACE],
2399            );
2400        }
2401        // Method-shorthand closure: `key(params) [-> Ret]: body`.
2402        // Detect via a `(` immediately after the key. We commit to the
2403        // closure interpretation whenever a `(` follows the key, since
2404        // the v1 grammar already reserves that position exclusively
2405        // for the method shorthand.
2406        if self.at(SyntaxKind::L_PAREN) {
2407            // Emit `(params) [-> Ret]` as a CLOSURE_PARAM list now;
2408            // the body that follows the `:` will be wrapped together
2409            // with the params into a CLOSURE node via a checkpoint.
2410            let closure_ck = self.checkpoint();
2411            self.bump(); // (
2412            while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
2413                self.parse_closure_param();
2414                if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_PAREN) {
2415                    self.error_recover(
2416                        "expected `,` or `)` in closure parameter list",
2417                        &[SyntaxKind::COMMA, SyntaxKind::R_PAREN],
2418                    );
2419                    self.eat(SyntaxKind::COMMA);
2420                }
2421            }
2422            self.expect(SyntaxKind::R_PAREN);
2423            // Optional `-> RetType`.
2424            if self.eat(SyntaxKind::THIN_ARROW) {
2425                self.parse_type();
2426            }
2427            if self.eat(SyntaxKind::COLON) {
2428                self.open_at(closure_ck, SyntaxKind::CLOSURE);
2429                self.parse_expr();
2430                self.close();
2431            } else {
2432                self.error("expected `:` in dict field");
2433            }
2434        } else if self.eat(SyntaxKind::COLON) {
2435            self.parse_expr();
2436        } else {
2437            self.error("expected `:` in dict field");
2438        }
2439        self.close();
2440    }
2441
2442    /// Does the upcoming token stream start with a Type-shaped run
2443    /// followed by an IDENT (or STRING) and then `:` / `(` (i.e. a
2444    /// typed-dict-key, NOT a dotted-path or a bare key)? Conservative
2445    /// — false negatives are fine (the field still parses untyped),
2446    /// but a false positive would consume the key as a type.
2447    fn peek_is_typed_dict_key(&self) -> bool {
2448        // Same logic as peek_is_typed_param, but we also accept STRING
2449        // as the trailing key segment, and we require a following
2450        // `:` or `(` so a dotted-path-as-value doesn't trip us up.
2451        if !self.at(SyntaxKind::IDENT) {
2452            return false;
2453        }
2454        let mut idx = self.pos_skip_trivia() + 1;
2455        let advance_trivia = |i: &mut usize, toks: &[(SyntaxKind, &str)]| {
2456            while *i < toks.len() && toks[*i].0.is_trivia() {
2457                *i += 1;
2458            }
2459        };
2460        advance_trivia(&mut idx, &self.tokens);
2461        while idx < self.tokens.len() && self.tokens[idx].0 == SyntaxKind::DOT {
2462            idx += 1;
2463            advance_trivia(&mut idx, &self.tokens);
2464            if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::IDENT) {
2465                idx += 1;
2466                advance_trivia(&mut idx, &self.tokens);
2467            } else {
2468                return false;
2469            }
2470        }
2471        let saw_generics = self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::LT);
2472        if saw_generics {
2473            let mut depth: i32 = 1;
2474            // Track nested `(` / `)` so a tuple-type argument like
2475            // `List<(Int, String)>` doesn't make the rejection bail
2476            // out the moment it hits a comma or `)`.
2477            let mut paren_depth: i32 = 0;
2478            idx += 1;
2479            while idx < self.tokens.len() && depth > 0 {
2480                match self.tokens[idx].0 {
2481                    SyntaxKind::LT => depth += 1,
2482                    SyntaxKind::GT => depth -= 1,
2483                    SyntaxKind::L_PAREN => paren_depth += 1,
2484                    SyntaxKind::R_PAREN if paren_depth > 0 => paren_depth -= 1,
2485                    SyntaxKind::L_BRACE
2486                    | SyntaxKind::R_BRACE
2487                    | SyntaxKind::R_PAREN
2488                    | SyntaxKind::FAT_ARROW
2489                    | SyntaxKind::THIN_ARROW
2490                    | SyntaxKind::COLON
2491                        if depth == 1 && paren_depth == 0 =>
2492                    {
2493                        return false
2494                    }
2495                    _ => {}
2496                }
2497                idx += 1;
2498            }
2499            if depth != 0 {
2500                return false;
2501            }
2502            advance_trivia(&mut idx, &self.tokens);
2503        }
2504        let saw_question = self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::QUESTION);
2505        if saw_question {
2506            idx += 1;
2507            advance_trivia(&mut idx, &self.tokens);
2508        }
2509        // Now we must see IDENT or STRING (the key) followed by `:`
2510        // or `(`. If neither, the leading run wasn't a type — bail
2511        // and let the surrounding parser treat it as the key itself.
2512        if !matches!(
2513            self.tokens.get(idx).map(|(k, _)| *k),
2514            Some(SyntaxKind::IDENT) | Some(SyntaxKind::STRING)
2515        ) {
2516            return false;
2517        }
2518        let mut after_key = idx + 1;
2519        advance_trivia(&mut after_key, &self.tokens);
2520        let next = self.tokens.get(after_key).map(|(k, _)| *k);
2521        matches!(next, Some(SyntaxKind::COLON) | Some(SyntaxKind::L_PAREN))
2522    }
2523
2524    /// Does the upcoming token stream start with a balanced `(...)`
2525    /// tuple-type prefix followed by an IDENT (or STRING) and then
2526    /// `:` / `(` (i.e. `(Int, String) pair: ...`)? Used by
2527    /// [`parse_dict_field`] to commit to the tuple-type lead, which
2528    /// has to win over the "parens group" interpretation of the same
2529    /// bytes when they appear at the head of a dict field. The
2530    /// balanced paren scan walks past nested generics / nested parens
2531    /// so `List<(Int, String)>` doesn't fool the outer detector.
2532    fn peek_is_tuple_typed_dict_key(&self) -> bool {
2533        if !self.at(SyntaxKind::L_PAREN) {
2534            return false;
2535        }
2536        let lparen_idx = self.pos_skip_trivia();
2537        let Some(after_paren) = self.scan_after_matching_paren(lparen_idx) else {
2538            return false;
2539        };
2540        // Optional trailing `?` after the tuple type.
2541        let mut idx = after_paren;
2542        if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::QUESTION) {
2543            idx += 1;
2544            while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
2545                idx += 1;
2546            }
2547        }
2548        // Must see IDENT or STRING (the key), followed by `:` or `(`.
2549        if !matches!(
2550            self.tokens.get(idx).map(|(k, _)| *k),
2551            Some(SyntaxKind::IDENT) | Some(SyntaxKind::STRING)
2552        ) {
2553            return false;
2554        }
2555        let mut after_key = idx + 1;
2556        while after_key < self.tokens.len() && self.tokens[after_key].0.is_trivia() {
2557            after_key += 1;
2558        }
2559        matches!(
2560            self.tokens.get(after_key).map(|(k, _)| *k),
2561            Some(SyntaxKind::COLON) | Some(SyntaxKind::L_PAREN)
2562        )
2563    }
2564
2565    fn parse_call_args(&mut self) {
2566        self.open(SyntaxKind::CALL_ARG);
2567        self.bump(); // (
2568        while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
2569            self.parse_call_arg();
2570            if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_PAREN) {
2571                self.error_recover(
2572                    "expected `,` or `)` in argument list",
2573                    &[SyntaxKind::COMMA, SyntaxKind::R_PAREN],
2574                );
2575                self.eat(SyntaxKind::COMMA);
2576            }
2577        }
2578        self.expect(SyntaxKind::R_PAREN);
2579        self.close();
2580    }
2581
2582    /// One argument inside a call's parens. Either positional (a
2583    /// bare expression) or named (`IDENT = expression`). The latter
2584    /// is detected by peeking IDENT-followed-by-EQ — the legacy
2585    /// `parse_call_arg` (`fn_call.rs`) uses the same lookahead. We
2586    /// emit the IDENT + EQ + value expression as siblings of each
2587    /// other under the parent CALL_ARG node so the lowering pass can
2588    /// pick the name back out without re-running token logic.
2589    fn parse_call_arg(&mut self) {
2590        if self.at(SyntaxKind::IDENT) && self.nth(1) == Some(SyntaxKind::EQ) {
2591            // Named: IDENT EQ <expr>.
2592            self.bump(); // name
2593            self.bump(); // =
2594            self.parse_expr();
2595        } else {
2596            self.parse_expr();
2597        }
2598    }
2599}
2600
2601// =====================================================================
2602// Operator precedence (Pratt binding-power table).
2603//
2604// Mirrors the existing precedence chain in `expr.rs`:
2605//   1. or   ||
2606//   2. and  &&
2607//   3. equality   ==  !=
2608//   4. comparison <  >  <=  >=
2609//   5. (retired) concat ++ — still consumed here for structured
2610//      recovery, but `parse_expr_bp` flags it with a migration
2611//      diagnostic pointing at `+` (string concatenation operator)
2612//   6. additive + -
2613//   7. multiplicative * / %
2614//   8. pipe |
2615// All operators are left-associative (right_bp = left_bp + 1).
2616// =====================================================================
2617
2618fn infix_bp(kind: SyntaxKind) -> Option<(u8, u8)> {
2619    Some(match kind {
2620        SyntaxKind::PIPE_PIPE => (10, 11),
2621        SyntaxKind::AMP_AMP => (20, 21),
2622        SyntaxKind::EQ_EQ | SyntaxKind::BANG_EQ => (30, 31),
2623        SyntaxKind::LT | SyntaxKind::GT | SyntaxKind::LT_EQ | SyntaxKind::GT_EQ => (40, 41),
2624        SyntaxKind::PLUS_PLUS => (50, 51),
2625        SyntaxKind::PLUS | SyntaxKind::MINUS => (60, 61),
2626        SyntaxKind::STAR | SyntaxKind::SLASH | SyntaxKind::PERCENT => (70, 71),
2627        SyntaxKind::PIPE => (80, 81),
2628        _ => return None,
2629    })
2630}
2631
2632// =====================================================================
2633// rowan `Language::kind_to_raw` is an instance method on a unit type;
2634// our hot inner loops want a `'static`-friendly free function. Wrap it.
2635// =====================================================================
2636
2637trait RawKind {
2638    fn kind_to_raw_static(kind: SyntaxKind) -> rowan::SyntaxKind;
2639}
2640impl RawKind for RelonLanguage {
2641    fn kind_to_raw_static(kind: SyntaxKind) -> rowan::SyntaxKind {
2642        kind.into()
2643    }
2644}
2645
2646#[cfg(test)]
2647mod tests {
2648    use super::*;
2649
2650    fn parse_round_trip(source: &str) -> Parse {
2651        let parsed = parse_cst(source);
2652        let reconstructed = parsed.syntax().text().to_string();
2653        assert_eq!(reconstructed, source, "round-trip mismatch");
2654        parsed
2655    }
2656
2657    #[test]
2658    fn empty_dict() {
2659        let parsed = parse_round_trip("{}");
2660        assert!(!parsed.has_errors());
2661    }
2662
2663    #[test]
2664    fn simple_dict() {
2665        parse_round_trip("{ foo: 1, bar: 2 }");
2666    }
2667
2668    #[test]
2669    fn nested_dict_and_list() {
2670        parse_round_trip("{\n    foo: [1, 2, 3],\n    bar: { baz: \"hi\" }\n}\n");
2671    }
2672
2673    #[test]
2674    fn reference_path() {
2675        parse_round_trip("{ x: &root.foo.bar[0] }");
2676    }
2677
2678    #[test]
2679    fn binary_expression() {
2680        let parsed = parse_round_trip("{ x: 1 + 2 * 3 }");
2681        assert!(!parsed.has_errors());
2682        // Multiplicative inside additive — verify the BINARY_EXPR
2683        // nesting by looking at the syntax tree.
2684        let syntax = parsed.syntax();
2685        let dict = syntax
2686            .descendants()
2687            .find(|n| n.kind() == SyntaxKind::DICT)
2688            .expect("dict");
2689        let outer_binary = dict
2690            .descendants()
2691            .find(|n| n.kind() == SyntaxKind::BINARY_EXPR)
2692            .expect("outer binary");
2693        // The outer binary is `1 + (2 * 3)`. The right child is
2694        // another BINARY_EXPR.
2695        let inner_binaries: Vec<_> = outer_binary
2696            .descendants()
2697            .filter(|n| n.kind() == SyntaxKind::BINARY_EXPR && *n != outer_binary)
2698            .collect();
2699        assert!(!inner_binaries.is_empty(), "expected nested BINARY_EXPR");
2700    }
2701
2702    #[test]
2703    fn method_shorthand_emits_closure() {
2704        let parsed = parse_round_trip("{ add(a, b): a + b }");
2705        assert!(!parsed.has_errors());
2706        let closures: Vec<_> = parsed
2707            .syntax()
2708            .descendants()
2709            .filter(|n| n.kind() == SyntaxKind::CLOSURE)
2710            .collect();
2711        assert_eq!(closures.len(), 1, "expected exactly one CLOSURE node");
2712        let params: Vec<_> = closures[0]
2713            .descendants()
2714            .filter(|n| n.kind() == SyntaxKind::CLOSURE_PARAM)
2715            .collect();
2716        assert_eq!(params.len(), 2, "expected two CLOSURE_PARAMs");
2717    }
2718
2719    #[test]
2720    fn standalone_paren_closure() {
2721        let parsed = parse_round_trip("{ f: (a, b) => a + b }");
2722        assert!(!parsed.has_errors());
2723        let closures: Vec<_> = parsed
2724            .syntax()
2725            .descendants()
2726            .filter(|n| n.kind() == SyntaxKind::CLOSURE)
2727            .collect();
2728        assert_eq!(closures.len(), 1);
2729    }
2730
2731    #[test]
2732    fn list_comprehension_emits_comprehension_node() {
2733        let parsed = parse_round_trip("{ xs: [x * 2 for x in src if x > 0] }");
2734        assert!(!parsed.has_errors());
2735        let comps: Vec<_> = parsed
2736            .syntax()
2737            .descendants()
2738            .filter(|n| n.kind() == SyntaxKind::COMPREHENSION)
2739            .collect();
2740        assert_eq!(comps.len(), 1);
2741        // The COMPREHENSION should NOT also be a LIST.
2742        let lists: Vec<_> = parsed
2743            .syntax()
2744            .descendants()
2745            .filter(|n| n.kind() == SyntaxKind::LIST)
2746            .collect();
2747        // The dict body is not a list, so the only [...] in source
2748        // becomes a COMPREHENSION — no LIST nodes at top level.
2749        assert!(
2750            lists.is_empty(),
2751            "comprehension `[...]` should not also produce a LIST"
2752        );
2753    }
2754
2755    #[test]
2756    fn match_expression_emits_match_node() {
2757        let parsed = parse_round_trip(
2758            "{ render(item): item match { Image: \"i\", Text: \"t\", _ : \"u\" } }",
2759        );
2760        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2761        let matches: Vec<_> = parsed
2762            .syntax()
2763            .descendants()
2764            .filter(|n| n.kind() == SyntaxKind::MATCH_EXPR)
2765            .collect();
2766        assert_eq!(matches.len(), 1);
2767        let arms: Vec<_> = parsed
2768            .syntax()
2769            .descendants()
2770            .filter(|n| n.kind() == SyntaxKind::MATCH_ARM)
2771            .collect();
2772        assert_eq!(arms.len(), 3);
2773    }
2774
2775    #[test]
2776    fn underscore_match_catch_all_parses_clean() {
2777        // The Rust-style `_` catch-all parses without errors and yields a
2778        // WILDCARD pattern node (the same node `*` used to produce).
2779        let parsed = parse_round_trip("{ render(item): item match { Image: \"i\", _: \"u\" } }");
2780        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2781        let wildcards: Vec<_> = parsed
2782            .syntax()
2783            .descendants()
2784            .filter(|n| n.kind() == SyntaxKind::WILDCARD)
2785            .collect();
2786        assert_eq!(wildcards.len(), 1);
2787    }
2788
2789    #[test]
2790    fn star_in_match_arm_now_errors() {
2791        // `*` is no longer the pattern wildcard — a match catch-all
2792        // spelled `*` is a parse error pointing at the new `_` spelling.
2793        let parsed = parse_round_trip("{ render(item): item match { Image: \"i\", *: \"u\" } }");
2794        assert!(
2795            parsed.has_errors(),
2796            "`*` in a match arm must error (use `_`): {:?}",
2797            parsed.errors
2798        );
2799        assert!(
2800            parsed.errors.iter().any(|e| e.message.contains("`_`")),
2801            "diagnostic should point at `_`: {:?}",
2802            parsed.errors
2803        );
2804    }
2805
2806    #[test]
2807    fn plus_plus_concat_now_errors() {
2808        // `++` was parseable but never executable — string
2809        // concatenation is spelled `+`. The token is still consumed
2810        // (round-trip stays lossless, recovery stays structured) but
2811        // the parse carries a migration diagnostic pointing at `+`.
2812        let parsed = parse_round_trip("{ msg: \"a\" ++ \"b\" }");
2813        assert!(
2814            parsed.has_errors(),
2815            "`++` must error (use `+`): {:?}",
2816            parsed.errors
2817        );
2818        assert!(
2819            parsed
2820                .errors
2821                .iter()
2822                .any(|e| e.message.contains("use `+` to concatenate strings")),
2823            "diagnostic should point at `+`: {:?}",
2824            parsed.errors
2825        );
2826    }
2827
2828    #[test]
2829    fn plus_plus_in_main_body_errors() {
2830        // Same diagnostic through the `#main` body expression path.
2831        let parsed = parse_round_trip("#main(String s) -> String\ns ++ \"!\"\n");
2832        assert!(
2833            parsed.has_errors(),
2834            "`++` in a #main body must error: {:?}",
2835            parsed.errors
2836        );
2837        assert!(
2838            parsed
2839                .errors
2840                .iter()
2841                .any(|e| e.message.contains("use `+` to concatenate strings")),
2842            "diagnostic should point at `+`: {:?}",
2843            parsed.errors
2844        );
2845    }
2846
2847    #[test]
2848    fn underscore_closure_param_parses_clean() {
2849        // A bare `_` is a legal closure parameter name (the Rust-style
2850        // ignore binding). The wildcard lexer split must NOT break it:
2851        // `(acc, _) => acc` parses without errors.
2852        let parsed = parse_round_trip("{ f(n): range(n).reduce(0, (acc, _) => acc) }");
2853        assert!(
2854            !parsed.has_errors(),
2855            "`_` closure param must parse clean: {:?}",
2856            parsed.errors
2857        );
2858    }
2859
2860    #[test]
2861    fn schema_directive_with_body() {
2862        let parsed = parse_round_trip("#schema User { String name: *, Int age: * }\n{ a: 1 }");
2863        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2864        let dirs: Vec<_> = parsed
2865            .syntax()
2866            .descendants()
2867            .filter(|n| n.kind() == SyntaxKind::DIRECTIVE)
2868            .collect();
2869        assert_eq!(dirs.len(), 1);
2870    }
2871
2872    #[test]
2873    fn schema_with_generic_params_and_with_block() {
2874        let parsed = parse_round_trip(
2875            "#schema Result<T, E> { T value: *, E error: * } with { unwrap(): value }\n{ x: 1 }",
2876        );
2877        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2878    }
2879
2880    #[test]
2881    fn import_directive_round_trip() {
2882        let parsed = parse_round_trip("#import string from \"std/string\"\n{ x: 1 }");
2883        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2884    }
2885
2886    #[test]
2887    fn import_with_sha256_integrity_round_trip() {
2888        let parsed =
2889            parse_round_trip("#import lib from \"./lib.relon\" sha256:\"deadbeef\"\n{ x: 1 }");
2890        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2891    }
2892
2893    #[test]
2894    fn import_with_missing_hex_string_reports_error() {
2895        // `sha256:` followed by something that is not a STRING should
2896        // raise a parse error (rather than silently consume tokens).
2897        let parsed = parse_round_trip("#import lib from \"./lib.relon\" sha256: bad\n{ x: 1 }");
2898        assert!(
2899            parsed.has_errors(),
2900            "expected parse error for malformed integrity pin"
2901        );
2902    }
2903
2904    #[test]
2905    fn main_directive_round_trip() {
2906        let parsed = parse_round_trip("#main(User u, Cart cart) -> Result<Order>\n{ x: 1 }");
2907        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2908    }
2909
2910    #[test]
2911    fn f_string_emits_f_string_node() {
2912        let parsed = parse_round_trip(r#"{ msg: f"hello ${name}!" }"#);
2913        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2914        let fs: Vec<_> = parsed
2915            .syntax()
2916            .descendants()
2917            .filter(|n| n.kind() == SyntaxKind::F_STRING)
2918            .collect();
2919        assert_eq!(fs.len(), 1);
2920        let interps: Vec<_> = parsed
2921            .syntax()
2922            .descendants()
2923            .filter(|n| n.kind() == SyntaxKind::F_STRING_INTERPOLATION)
2924            .collect();
2925        assert_eq!(interps.len(), 1);
2926        // Interpolation body should contain a VARIABLE_EXPR for `name`.
2927        let interp = &interps[0];
2928        let vars: Vec<_> = interp
2929            .descendants()
2930            .filter(|n| n.kind() == SyntaxKind::VARIABLE_EXPR)
2931            .collect();
2932        assert!(!vars.is_empty(), "expected VARIABLE_EXPR inside interp");
2933    }
2934
2935    #[test]
2936    fn raw_f_string_round_trip() {
2937        parse_round_trip("{ msg: f#\"raw ${x} text\"# }");
2938    }
2939
2940    #[test]
2941    fn plain_string_still_literal() {
2942        let parsed = parse_round_trip(r#"{ x: "hi" }"#);
2943        let fs: Vec<_> = parsed
2944            .syntax()
2945            .descendants()
2946            .filter(|n| n.kind() == SyntaxKind::F_STRING)
2947            .collect();
2948        assert!(fs.is_empty(), "plain string should not be F_STRING");
2949    }
2950
2951    #[test]
2952    fn where_expression_emits_where_node() {
2953        let parsed = parse_round_trip("{ x: a + b where { a: 1, b: 2 } }");
2954        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2955        let wheres: Vec<_> = parsed
2956            .syntax()
2957            .descendants()
2958            .filter(|n| n.kind() == SyntaxKind::WHERE_EXPR)
2959            .collect();
2960        assert_eq!(wheres.len(), 1);
2961    }
2962
2963    #[test]
2964    fn list_without_for_stays_list() {
2965        let parsed = parse_round_trip("{ xs: [1, 2, 3] }");
2966        assert!(!parsed.has_errors());
2967        let lists: Vec<_> = parsed
2968            .syntax()
2969            .descendants()
2970            .filter(|n| n.kind() == SyntaxKind::LIST)
2971            .collect();
2972        assert_eq!(lists.len(), 1);
2973    }
2974
2975    #[test]
2976    fn generic_type_in_closure_param() {
2977        let parsed = parse_round_trip("{ extract(List<Int> xs, Option<String> sep): xs }");
2978        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2979        let types: Vec<_> = parsed
2980            .syntax()
2981            .descendants()
2982            .filter(|n| n.kind() == SyntaxKind::TYPE_NODE)
2983            .collect();
2984        // `List<Int>` outer + `Int` nested + `Option<String>` outer +
2985        // `String` nested = 4 TYPE_NODEs.
2986        assert!(
2987            types.len() >= 3,
2988            "expected at least 3 TYPE_NODE, got {}",
2989            types.len()
2990        );
2991    }
2992
2993    #[test]
2994    fn type_suffix_question_is_rejected() {
2995        // Wave A: the type-suffix `?` (`Int?`, `Weather?`, `List<T>?`)
2996        // is no longer valid — optionality is written `Option<T>`. Each
2997        // of these must surface a parse error pointing at `Option<T>`.
2998        for source in [
2999            "#main(Int? x) -> Int\n0\n",
3000            "{ extract(String? sep): sep }",
3001            "{ Weather? w: { a: 1 } }",
3002            "{ x: #brand Weather? { a: 1 } }",
3003            "{ extract(List<Int>? xs): xs }",
3004        ] {
3005            let parsed = parse_cst(source);
3006            assert!(
3007                parsed.has_errors(),
3008                "expected a parse error for type-suffix `?` in {source:?}"
3009            );
3010            assert!(
3011                parsed
3012                    .errors
3013                    .iter()
3014                    .any(|e| e.message.contains("Option<T>")),
3015                "expected an `Option<T>` hint in errors for {source:?}, got {:?}",
3016                parsed.errors
3017            );
3018        }
3019    }
3020
3021    #[test]
3022    fn option_type_in_main_signature_parses_clean() {
3023        // The migration target `Option<T>` parses without errors where
3024        // the old `T?` used to.
3025        let parsed = parse_cst("#main(Option<Int> x) -> Int\n0\n");
3026        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3027    }
3028
3029    #[test]
3030    fn optional_chaining_and_ternary_still_parse() {
3031        // The call-chain `?` (optional chaining) and the ternary `?:`
3032        // must keep parsing cleanly — only the type-suffix `?` is gone.
3033        for source in [
3034            "{ f(a): a?.b }",
3035            "{ f(a): a?[0] }",
3036            "{ f(a): a?.b?.c }",
3037            "{ g(x): x < 0 ? -x : x }",
3038        ] {
3039            let parsed = parse_round_trip(source);
3040            assert!(
3041                !parsed.has_errors(),
3042                "unexpected errors for {source:?}: {:?}",
3043                parsed.errors
3044            );
3045        }
3046    }
3047
3048    #[test]
3049    fn comparison_lt_not_treated_as_generics() {
3050        // The closure-param peek must NOT decide `a < b` is a typed
3051        // param — there's whitespace between `a` and `<`. The dict
3052        // body should be a single binary expression.
3053        let parsed = parse_round_trip("{ f: a < b }");
3054        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3055        let binaries: Vec<_> = parsed
3056            .syntax()
3057            .descendants()
3058            .filter(|n| n.kind() == SyntaxKind::BINARY_EXPR)
3059            .collect();
3060        assert_eq!(binaries.len(), 1, "expected one BINARY_EXPR");
3061    }
3062
3063    #[test]
3064    fn typed_closure_param_records_type_node() {
3065        let parsed = parse_round_trip("{ add(Int a, Int b): a + b }");
3066        assert!(!parsed.has_errors());
3067        let type_nodes: Vec<_> = parsed
3068            .syntax()
3069            .descendants()
3070            .filter(|n| n.kind() == SyntaxKind::TYPE_NODE)
3071            .collect();
3072        assert!(
3073            type_nodes.len() >= 2,
3074            "expected TYPE_NODEs for typed params"
3075        );
3076    }
3077
3078    #[test]
3079    fn comments_round_trip() {
3080        parse_round_trip("// header\n{\n    // inner\n    x: 1, /* trail */ y: 2\n}\n");
3081    }
3082
3083    #[test]
3084    fn error_recovery_preserves_bytes() {
3085        // Deliberate parse failure: missing colon. The recovery
3086        // wraps `42` in an ERROR node and resyncs to `,`. Source
3087        // bytes are intact end-to-end.
3088        let parsed = parse_round_trip("{ foo 42, bar: 1 }");
3089        assert!(parsed.has_errors(), "expected an error report");
3090    }
3091
3092    #[test]
3093    fn unknown_byte_does_not_crash() {
3094        parse_round_trip("{ x: \u{0000} 1 }");
3095    }
3096
3097    #[test]
3098    fn variant_ctor_emits_variant_node() {
3099        let parsed = parse_round_trip("{ x: Result.Ok { value: 1 } }");
3100        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3101        let vc: Vec<_> = parsed
3102            .syntax()
3103            .descendants()
3104            .filter(|n| n.kind() == SyntaxKind::VARIANT_CTOR)
3105            .collect();
3106        assert_eq!(vc.len(), 1);
3107    }
3108
3109    #[test]
3110    fn variant_ctor_three_segment_path() {
3111        let parsed = parse_round_trip("{ x: Foo.Bar.Baz { field: 1 } }");
3112        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3113        let vc: Vec<_> = parsed
3114            .syntax()
3115            .descendants()
3116            .filter(|n| n.kind() == SyntaxKind::VARIANT_CTOR)
3117            .collect();
3118        assert_eq!(vc.len(), 1);
3119    }
3120
3121    #[test]
3122    fn dotted_access_without_brace_stays_variable() {
3123        // `foo.bar` alone is member access — must NOT become a
3124        // VARIANT_CTOR. Walks the post-fix path the same as before.
3125        let parsed = parse_round_trip("{ x: foo.bar }");
3126        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3127        let vc: Vec<_> = parsed
3128            .syntax()
3129            .descendants()
3130            .filter(|n| n.kind() == SyntaxKind::VARIANT_CTOR)
3131            .collect();
3132        assert!(vc.is_empty(), "single dotted access should not be a ctor");
3133    }
3134
3135    #[test]
3136    fn named_call_args_parse_without_errors() {
3137        let parsed = parse_round_trip("{ y: map(f = g) }");
3138        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3139        // The CALL_ARG node contains the IDENT, EQ, and value side by
3140        // side; the lowering pass groups them back into a `CallArg`.
3141        let call_args: Vec<_> = parsed
3142            .syntax()
3143            .descendants()
3144            .filter(|n| n.kind() == SyntaxKind::CALL_ARG)
3145            .collect();
3146        assert_eq!(call_args.len(), 1);
3147        let has_eq = call_args[0]
3148            .children_with_tokens()
3149            .filter_map(|el| el.into_token())
3150            .any(|t| t.kind() == SyntaxKind::EQ);
3151        assert!(has_eq, "named arg should carry an EQ token");
3152    }
3153
3154    #[test]
3155    fn mixed_positional_and_named_args() {
3156        let parsed = parse_round_trip("{ z: f(1, name = expr, more = 2) }");
3157        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3158    }
3159
3160    #[test]
3161    fn ternary_expression_emits_ternary_node() {
3162        let parsed = parse_round_trip("{ x: a ? 1 : 2 }");
3163        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3164        let ts: Vec<_> = parsed
3165            .syntax()
3166            .descendants()
3167            .filter(|n| n.kind() == SyntaxKind::TERNARY_EXPR)
3168            .collect();
3169        assert_eq!(ts.len(), 1, "expected one TERNARY_EXPR");
3170    }
3171
3172    #[test]
3173    fn ternary_root_no_whitespace() {
3174        // Legacy accepts `true? 1:2` — every `?` / `:` boundary is
3175        // surrounded by `soc0` so adjacent forms parse without spaces.
3176        let parsed = parse_round_trip("true? 1:2");
3177        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3178    }
3179
3180    #[test]
3181    fn ternary_nested_in_else() {
3182        // Right-recursive parse: `a ? 1 : b ? 2 : 3` should produce a
3183        // ternary whose `els` is another ternary.
3184        let parsed = parse_round_trip("{ x: a ? 1 : b ? 2 : 3 }");
3185        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3186        let ts: Vec<_> = parsed
3187            .syntax()
3188            .descendants()
3189            .filter(|n| n.kind() == SyntaxKind::TERNARY_EXPR)
3190            .collect();
3191        assert_eq!(ts.len(), 2);
3192    }
3193
3194    #[test]
3195    fn bare_directive_does_not_consume_next_field() {
3196        // `#internal` is a bare directive; the IDENT after it must
3197        // belong to the next dict field, not to the directive body.
3198        let src = "{ #internal\n  field(s): s, next: 1 }";
3199        let parsed = parse_round_trip(src);
3200        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3201    }
3202
3203    #[test]
3204    fn dict_field_can_be_attribute_only() {
3205        // `#import x from "p"` consumes its whole body; the field is
3206        // attribute-only and the `,` belongs to the surrounding dict.
3207        let src = "{ #import x from \"p\", next: 1 }";
3208        let parsed = parse_round_trip(src);
3209        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3210    }
3211
3212    #[test]
3213    fn schema_with_block_emits_method_nodes() {
3214        // Slice-opener for the schema with-block grammar. Two methods
3215        // back-to-back, one carrying a `#derive` pragma and a `Self`
3216        // parameter type.
3217        let src = "#schema Money { Int cents: * } with {\n    #derive Equatable\n    eq(other: Self) -> Bool: self.cents == other.cents\n}\n{ Money p: { cents: 100 } }\n";
3218        let parsed = parse_round_trip(src);
3219        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3220        let with_blocks: Vec<_> = parsed
3221            .syntax()
3222            .descendants()
3223            .filter(|n| n.kind() == SyntaxKind::SCHEMA_WITH)
3224            .collect();
3225        assert_eq!(with_blocks.len(), 1);
3226        let methods: Vec<_> = with_blocks[0]
3227            .descendants()
3228            .filter(|n| n.kind() == SyntaxKind::SCHEMA_METHOD)
3229            .collect();
3230        assert_eq!(methods.len(), 1);
3231        // The method should contain the `#derive` directive and a
3232        // CLOSURE_PARAM for `other`.
3233        let dirs: Vec<_> = methods[0]
3234            .descendants()
3235            .filter(|n| n.kind() == SyntaxKind::DIRECTIVE)
3236            .collect();
3237        assert_eq!(dirs.len(), 1);
3238        let params: Vec<_> = methods[0]
3239            .descendants()
3240            .filter(|n| n.kind() == SyntaxKind::CLOSURE_PARAM)
3241            .collect();
3242        assert_eq!(params.len(), 1);
3243    }
3244
3245    #[test]
3246    fn schema_with_block_native_method_skips_body() {
3247        // `#native` method has no `: body` — just the signature.
3248        let src =
3249            "#schema Doc { String text: * } with {\n    #native\n    render() -> String\n}\n{}\n";
3250        let parsed = parse_round_trip(src);
3251        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3252    }
3253
3254    #[test]
3255    fn tuple_index_access_round_trips() {
3256        // v1.8 positional access `xs.0` — number after the dot is a
3257        // valid path tail, alongside identifier-style `xs.field`.
3258        let parsed = parse_round_trip("{ Int head: xs.0 }");
3259        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3260    }
3261
3262    #[test]
3263    fn type_atom_for_brand_directive_body() {
3264        // `#brand Dict<String, Int> { ... }` — the brand directive's
3265        // body is a type-tagged dict. The leading IDENT `Dict` (a
3266        // known type head) must lower into a TYPE_NODE so the
3267        // generics aren't mistaken for binary `<` / `>` operators.
3268        let src = "{ counters: #brand Dict<String, Int> { hits: 1 } }";
3269        let parsed = parse_round_trip(src);
3270        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3271        let types: Vec<_> = parsed
3272            .syntax()
3273            .descendants()
3274            .filter(|n| n.kind() == SyntaxKind::TYPE_NODE)
3275            .collect();
3276        assert!(!types.is_empty(), "expected a TYPE_NODE for Dict<...>");
3277    }
3278
3279    #[test]
3280    fn typed_spread_round_trips() {
3281        // v1.3 typed spread `...<Type> expr`. The `<Type>` annotation
3282        // lands inside the SPREAD_EXPR; the source expression follows.
3283        let parsed = parse_round_trip("{ val: { ...<Extra> base } }");
3284        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3285        let spreads: Vec<_> = parsed
3286            .syntax()
3287            .descendants()
3288            .filter(|n| n.kind() == SyntaxKind::SPREAD_EXPR)
3289            .collect();
3290        assert_eq!(spreads.len(), 1, "expected one SPREAD_EXPR");
3291        let types: Vec<_> = spreads[0]
3292            .descendants()
3293            .filter(|n| n.kind() == SyntaxKind::TYPE_NODE)
3294            .collect();
3295        assert!(!types.is_empty(), "typed spread should carry a TYPE_NODE");
3296    }
3297
3298    #[test]
3299    fn tuple_type_in_dict_field_round_trips() {
3300        // v1.7 tuple types in the type-hint slot of a dict field.
3301        let parsed = parse_round_trip("{ (Int, String) pair: (42, \"hello\") }");
3302        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3303        let tts: Vec<_> = parsed
3304            .syntax()
3305            .descendants()
3306            .filter(|n| n.kind() == SyntaxKind::TUPLE_TYPE)
3307            .collect();
3308        assert_eq!(tts.len(), 1, "expected one TUPLE_TYPE");
3309    }
3310
3311    #[test]
3312    fn tuple_type_inside_generic() {
3313        let parsed = parse_round_trip("{ List<(Int, String)> rows: [(1, \"a\")] }");
3314        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3315        let tts: Vec<_> = parsed
3316            .syntax()
3317            .descendants()
3318            .filter(|n| n.kind() == SyntaxKind::TUPLE_TYPE)
3319            .collect();
3320        assert_eq!(tts.len(), 1);
3321    }
3322
3323    #[test]
3324    fn tuple_type_zero_and_one() {
3325        // Zero-tuple `()` and one-tuple `(T,)` both round-trip
3326        // cleanly. The trailing comma in the one-tuple matters for the
3327        // typed-AST layer (it disambiguates from `(T)` parens), but the
3328        // CST keeps the bytes verbatim.
3329        let parsed = parse_round_trip("{ () unit: [], (Int,) one: [1] }");
3330        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3331    }
3332
3333    #[test]
3334    fn decorator_dotted_name_round_trips() {
3335        // `@ensure.int` / `@ensure.at_least(1024)` — dotted decorator
3336        // names appear in the corpus alongside plain `@name(...)`.
3337        let src = "{ @ensure.int\n  @ensure.at_least(1024)\n  \"port\": 80 }";
3338        let parsed = parse_round_trip(src);
3339        assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3340    }
3341
3342    /// Monotonic floor on how many checked-in `.relon` fixtures parse
3343    /// without ANY ERROR nodes. Each P2 slice MUST raise this number;
3344    /// regressions need a deliberate, recorded reason.
3345    ///
3346    /// The floor starts at 30 (closures slice). Bump it as more P2
3347    /// grammar lands.
3348    #[test]
3349    fn fixtures_clean_parse_floor() {
3350        // Each P2 slice bumps the floor. At slice 1 (closures) we hit
3351        // ~60 of ~210 — the directive / match / where / type slices
3352        // pushed this to 135. After the P4-prep grammar gaps
3353        // (ternary / named call args / variant ctor) we reach 148.
3354        // Directive-shape dispatch + attribute-only dict fields pushed
3355        // it to 157 (the next P2 slices target tuple types, typed
3356        // spreads, and the schema with-block named-param method
3357        // grammar). Tuple types `(T1, T2)` brought the floor to 165.
3358        // Typed spreads `...<Type> expr` brought it to 170.
3359        // Schema with-block structured method nodes brought it to 198.
3360        // Tuple-index `.N` access, type-atom recognition for
3361        // `#brand Dict<K, V> { ... }` / `Weather? { ... }`,
3362        // Enum-with-struct-variant inside generic args, and
3363        // expression-level leading attributes brought it to 208.
3364        // The remaining two `.relon` files
3365        // (`with_block_invalid/*.relon`) are intentional parse-error
3366        // fixtures used by the legacy parser's negative test suite.
3367        const FLOOR: usize = 208;
3368        let clean = fixture_clean_parse_count();
3369        eprintln!("[parser] fixtures clean-parse count: {clean}");
3370        assert!(
3371            clean >= FLOOR,
3372            "regressed clean-parse count: floor={FLOOR}, actual={clean}",
3373        );
3374    }
3375
3376    fn fixture_clean_parse_count() -> usize {
3377        use std::fs;
3378        use std::path::PathBuf;
3379
3380        let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
3381        let workspace_root = crate_dir
3382            .parent()
3383            .and_then(|p| p.parent())
3384            .expect("workspace root")
3385            .to_path_buf();
3386        let mut files = Vec::new();
3387        walk(&workspace_root, &mut files);
3388        files.retain(|p| !p.to_string_lossy().contains("/target/"));
3389        let mut clean = 0usize;
3390        for path in files {
3391            let source = fs::read_to_string(&path).unwrap_or_default();
3392            if source.is_empty() {
3393                continue;
3394            }
3395            let parsed = parse_cst(&source);
3396            if !parsed.has_errors() {
3397                clean += 1;
3398            }
3399        }
3400        clean
3401    }
3402
3403    /// The strongest invariant: every checked-in `.relon` file
3404    /// round-trips through the CST byte-exact. Some may still have
3405    /// parse errors (the v2 grammar doesn't cover every construct
3406    /// yet) — that's expected and tolerated. What MUST hold is the
3407    /// lossless tree property.
3408    #[test]
3409    fn every_fixture_round_trips_through_cst() {
3410        use std::fs;
3411        use std::path::PathBuf;
3412
3413        let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
3414        let workspace_root = crate_dir
3415            .parent()
3416            .and_then(|p| p.parent())
3417            .expect("workspace root")
3418            .to_path_buf();
3419        let mut files = Vec::new();
3420        walk(&workspace_root, &mut files);
3421        files.retain(|p| !p.to_string_lossy().contains("/target/"));
3422        assert!(!files.is_empty());
3423        for path in files {
3424            let source = fs::read_to_string(&path).unwrap_or_else(|e| panic!("read {path:?}: {e}"));
3425            let parsed = parse_cst(&source);
3426            let reconstructed = parsed.syntax().text().to_string();
3427            assert_eq!(reconstructed, source, "round-trip mismatch on {path:?}");
3428        }
3429    }
3430
3431    fn walk(dir: &std::path::Path, out: &mut Vec<std::path::PathBuf>) {
3432        let Ok(read) = std::fs::read_dir(dir) else {
3433            return;
3434        };
3435        for entry in read.flatten() {
3436            let p = entry.path();
3437            if p.is_dir() {
3438                let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
3439                if matches!(name, "target" | "node_modules" | ".git") {
3440                    continue;
3441                }
3442                walk(&p, out);
3443            } else if p.extension().and_then(|e| e.to_str()) == Some("relon") {
3444                out.push(p);
3445            }
3446        }
3447    }
3448}