relon_parser/cst.rs
1//! Concrete syntax tree (CST) builder over the lossless [`lex`]
2//! output. P2 of the rowan rewrite — translates the existing winnow
3//! grammar into rowan `GreenNode`s while preserving every source byte
4//! (including whitespace and comments) as first-class tokens.
5//!
6//! Architecture
7//! ============
8//!
9//! - `Parser` wraps the flat `(SyntaxKind, &str)` token stream from
10//! [`lex::lex`] plus a `rowan::GreenNodeBuilder` writing the tree.
11//! - "Skip-trivia" helpers (`current`, `at`, `nth`) ignore whitespace
12//! and comments, so productions can pattern-match on meaningful
13//! structure without ever forgetting to write a trivia token to the
14//! tree.
15//! - Trivia is flushed to the builder lazily — emitted as siblings
16//! *just before* the next meaningful token. The "right" home for a
17//! trailing comment (does it belong to the closing brace, or to the
18//! next pair?) is decided by `bump`'s flush order.
19//! - Each grammar production is a function on `&mut Parser`. They
20//! call `open(kind)` / `close()` to mark composite nodes. Failures
21//! recover via `error_recover(sync_set)` which emits an ERROR node
22//! and synchronises to the nearest token in `sync_set`.
23//!
24//! Scope
25//! =====
26//!
27//! P2 (now complete) covers the full surface grammar:
28//!
29//! * Literals, identifiers, dotted paths, references.
30//! * Lists, dicts (with pair attributes + method-shorthand closures
31//! + typed keys), list comprehensions.
32//! * Unary, binary (Pratt-precedence), call, postfix `.field` /
33//! `[index]`, parenthesised closure (`(p) [-> R] => body`).
34//! * `expr match { ... }` and `expr where { ... }` postfix forms.
35//! * F-string decomposition into `F_STRING` + `F_STRING_LITERAL`
36//! chunks + nested `F_STRING_INTERPOLATION` sub-nodes (whose
37//! children are ordinary Relon expressions).
38//! * `TYPE_NODE` — dotted paths, generics, optional `?`.
39//! * Directive bodies dispatched by name: `#schema`/`#extend`
40//! (name + generics + body + optional `with`), `#import`
41//! (`<spec> from "path"`), `#main(typed-params) [-> Ret]`.
42//!
43//! P3 lives in `crate::ast` — typed-AST wrappers on top of this
44//! CST. P4 will migrate downstream crates (analyzer, evaluator,
45//! fmt, wasm, lsp) onto the new wrappers.
46
47use crate::lex;
48use crate::lex::utf8_codepoint_len_for_cst as utf8_codepoint_len;
49use crate::syntax::{RelonLanguage, SyntaxKind, SyntaxNode};
50use rowan::{Checkpoint, GreenNodeBuilder};
51
52/// One parse failure with an attached byte position. Always reachable
53/// from the resulting CST through the spanning `ERROR` node, but
54/// surfacing them separately gives callers (LSP diagnostics, CLI
55/// pretty-printer) a flat list without re-walking the tree.
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct ParseError {
58 pub message: String,
59 /// Byte offset into the original source where recovery began.
60 pub offset: usize,
61}
62
63/// Successful parse result. `green` is the lossless tree; `errors`
64/// is the (possibly empty) list of parse errors emitted along the
65/// way. The parser NEVER returns `Err` — every input shape produces
66/// a tree, with `ERROR` nodes covering unparseable spans.
67#[derive(Debug, Clone)]
68pub struct Parse {
69 green: rowan::GreenNode,
70 pub errors: Vec<ParseError>,
71}
72
73impl Parse {
74 /// Wrap the green tree as a typed [`SyntaxNode`] for traversal.
75 pub fn syntax(&self) -> SyntaxNode {
76 SyntaxNode::new_root(self.green.clone())
77 }
78
79 /// Returns `true` when at least one parse error was emitted.
80 pub fn has_errors(&self) -> bool {
81 !self.errors.is_empty()
82 }
83}
84
85/// Top-level entry. Always produces a `Parse` — never panics, never
86/// returns `Err`. Bytes that don't fit any production are absorbed
87/// into `ERROR` nodes; the round-trip invariant holds regardless.
88pub fn parse_cst(source: &str) -> Parse {
89 let tokens = lex::lex(source);
90 let mut parser = Parser::new(tokens);
91 parser.parse_document();
92 parser.finish()
93}
94
95// =====================================================================
96// Parser state.
97// =====================================================================
98
99struct Parser<'a> {
100 /// The flat token stream the parser is currently consuming. We
101 /// own the vec so f-string interpolation sub-parses can swap in
102 /// a transient inner-token list without lifetime gymnastics —
103 /// the inner `&str` slices still point into the original source.
104 tokens: Vec<(SyntaxKind, &'a str)>,
105 pos: usize,
106 builder: GreenNodeBuilder<'static>,
107 errors: Vec<ParseError>,
108 /// Running byte offset — kept in sync with `pos` so we can record
109 /// error positions without re-walking.
110 cursor_byte: usize,
111}
112
113impl<'a> Parser<'a> {
114 fn new(tokens: Vec<(SyntaxKind, &'a str)>) -> Self {
115 Self {
116 tokens,
117 pos: 0,
118 builder: GreenNodeBuilder::new(),
119 errors: Vec::new(),
120 cursor_byte: 0,
121 }
122 }
123
124 fn finish(self) -> Parse {
125 // `parse_document` is responsible for emitting every token
126 // INSIDE the root DOCUMENT node — rowan requires it. The
127 // `finish()` call here just hands ownership of the green
128 // tree back.
129 debug_assert!(
130 self.pos >= self.tokens.len(),
131 "{} tokens unflushed at parse end",
132 self.tokens.len() - self.pos
133 );
134 Parse {
135 green: self.builder.finish(),
136 errors: self.errors,
137 }
138 }
139
140 // ----- token-stream introspection ----------------------------------
141
142 /// Kind of the next *non-trivia* token, or `None` if EOI.
143 fn current(&self) -> Option<SyntaxKind> {
144 self.nth(0)
145 }
146
147 /// Kind of the `n`-th non-trivia token ahead (0 = current), or
148 /// `None` if there aren't that many. Useful for productions that
149 /// need 1-token lookahead.
150 fn nth(&self, n: usize) -> Option<SyntaxKind> {
151 let mut idx = self.pos;
152 let mut left = n;
153 while idx < self.tokens.len() {
154 let kind = self.tokens[idx].0;
155 if kind.is_trivia() {
156 idx += 1;
157 continue;
158 }
159 if left == 0 {
160 return Some(kind);
161 }
162 left -= 1;
163 idx += 1;
164 }
165 None
166 }
167
168 fn at(&self, kind: SyntaxKind) -> bool {
169 self.current() == Some(kind)
170 }
171
172 fn at_set(&self, set: &[SyntaxKind]) -> bool {
173 self.current().is_some_and(|k| set.contains(&k))
174 }
175
176 fn at_end(&self) -> bool {
177 self.current().is_none()
178 }
179
180 // ----- consumption --------------------------------------------------
181
182 /// Emit any pending trivia tokens to the builder. Trivia tokens
183 /// (whitespace, comments) are skipped by `current` / `at` but
184 /// still need to land in the tree — this writes them flush
185 /// against whatever production opened most recently.
186 fn flush_trivia(&mut self) {
187 while self.pos < self.tokens.len() {
188 let (kind, text) = self.tokens[self.pos];
189 if !kind.is_trivia() {
190 return;
191 }
192 self.builder
193 .token(RelonLanguage::kind_to_raw_static(kind), text);
194 self.cursor_byte += text.len();
195 self.pos += 1;
196 }
197 }
198
199 /// Consume the next non-trivia token and emit it to the builder,
200 /// preceded by any pending trivia. Panics in tests if called at
201 /// EOI — productions should guard with `current()` first.
202 fn bump(&mut self) {
203 self.flush_trivia();
204 if self.pos >= self.tokens.len() {
205 debug_assert!(false, "bump() past end of input");
206 return;
207 }
208 let (kind, text) = self.tokens[self.pos];
209 self.builder
210 .token(RelonLanguage::kind_to_raw_static(kind), text);
211 self.cursor_byte += text.len();
212 self.pos += 1;
213 }
214
215 /// Consume the next non-trivia token if it matches `kind`.
216 /// Returns `true` on consume.
217 fn eat(&mut self, kind: SyntaxKind) -> bool {
218 if self.at(kind) {
219 self.bump();
220 true
221 } else {
222 false
223 }
224 }
225
226 /// Consume `kind` or emit a parse error. Returns `true` on
227 /// success; on failure leaves the cursor where it was and pushes
228 /// to `errors`. Productions that need to keep going should follow
229 /// `expect` with `error_recover` for proper sync behaviour.
230 fn expect(&mut self, kind: SyntaxKind) -> bool {
231 if self.eat(kind) {
232 true
233 } else {
234 self.error(format!("expected {kind:?}, found {:?}", self.current()));
235 false
236 }
237 }
238
239 fn error(&mut self, message: impl Into<String>) {
240 self.errors.push(ParseError {
241 message: message.into(),
242 offset: self.cursor_byte,
243 });
244 }
245
246 /// Wrap the next token (or a synthetic empty span) in an `ERROR`
247 /// node and push an error. Used as a one-shot way to mark an
248 /// unexpected leaf without entering recovery.
249 fn error_at_current(&mut self, message: impl Into<String>) {
250 self.error(message);
251 self.open(SyntaxKind::ERROR);
252 if !self.at_end() {
253 self.bump();
254 }
255 self.close();
256 }
257
258 /// Emit an `ERROR` node spanning every token until one of
259 /// `sync_set` is reached (or EOI). The error message is recorded
260 /// at the offset where recovery started.
261 fn error_recover(&mut self, message: impl Into<String>, sync_set: &[SyntaxKind]) {
262 self.error(message);
263 self.open(SyntaxKind::ERROR);
264 while !self.at_end() && !self.at_set(sync_set) {
265 self.bump();
266 }
267 self.close();
268 }
269
270 /// Canonical "back to a sane structural boundary" sync set: the
271 /// closing punctuators a dict / list / call would resume at, plus
272 /// the directive `#` head. Productions that recover with this set
273 /// re-enter their parent's punctuation-aware loop on the next
274 /// iteration. Used by the few productions that don't know which
275 /// container they're inside; container-specific recovery sites
276 /// keep narrower sets (`COMMA` + their own closing bracket).
277 const STRUCTURAL_SYNC: &'static [SyntaxKind] = &[
278 SyntaxKind::COMMA,
279 SyntaxKind::R_BRACE,
280 SyntaxKind::R_BRACK,
281 SyntaxKind::R_PAREN,
282 SyntaxKind::HASH,
283 ];
284
285 // ----- node bracketing ---------------------------------------------
286
287 fn open(&mut self, kind: SyntaxKind) {
288 // Order matters: `start_node` MUST come before `flush_trivia`
289 // so any pending whitespace / comments land INSIDE the new
290 // node (as leading trivia of its first child) rather than as
291 // siblings of the node at the parent level. Flushing first
292 // would also break the very-first `open(DOCUMENT)` call —
293 // leading file trivia would end up at rowan's root level,
294 // violating the "exactly one root" invariant.
295 self.builder
296 .start_node(RelonLanguage::kind_to_raw_static(kind));
297 self.flush_trivia();
298 }
299
300 fn checkpoint(&mut self) -> Checkpoint {
301 // Checkpoint snaps to "right after any pending trivia" —
302 // `open_at(ck, ..)` wraps the construct that follows, NOT
303 // the trivia in front of it. Otherwise a comment before a
304 // binary expression would get pulled inside the
305 // `BINARY_EXPR` node, which is the wrong attachment.
306 self.flush_trivia();
307 self.builder.checkpoint()
308 }
309
310 fn open_at(&mut self, ck: Checkpoint, kind: SyntaxKind) {
311 self.builder
312 .start_node_at(ck, RelonLanguage::kind_to_raw_static(kind));
313 }
314
315 fn close(&mut self) {
316 self.builder.finish_node();
317 }
318
319 // =================================================================
320 // Productions.
321 // =================================================================
322
323 /// Top-level: zero-or-more attributes, then one document value.
324 /// The whole thing is wrapped in a `DOCUMENT` node so the round
325 /// trip walks from a single root.
326 fn parse_document(&mut self) {
327 self.open(SyntaxKind::DOCUMENT);
328 // Leading directives / decorators stacked above the root
329 // value. The grammar permits them at file scope (e.g.
330 // `#schema X { ... }` files with no separate value body).
331 while self.at(SyntaxKind::HASH) || self.at(SyntaxKind::AT) {
332 self.parse_attribute();
333 }
334 // The root value. EOI is fine — files like
335 // `#schema X { ... }` end after the directive's body.
336 if !self.at_end() {
337 self.parse_expr();
338 }
339 // Anything left over is unexpected trailing input — wrap as
340 // ERROR so the round-trip stays whole.
341 if !self.at_end() {
342 self.error_recover("trailing input after root value", &[]);
343 }
344 // Trailing trivia (final newline, footer comments) MUST land
345 // inside DOCUMENT — rowan only accepts one root node, and
346 // tokens emitted after `close()` would have nowhere to live.
347 self.flush_trivia();
348 self.close();
349 }
350
351 /// `@name(...)` or `#name <body>`. Decorator bodies are always
352 /// `(args)` (or absent) and decorator names may be dotted
353 /// (`@ensure.int`, `@module.fn`); directive bodies branch on the
354 /// name: `schema` / `extend` capture `name <T, U>? body? (with {})?`,
355 /// `import` captures `<spec> from "path"`, `main` captures
356 /// `( typed-params ) [-> Ret]`, the remaining names dispatch via
357 /// [`directive_shape`] — bare directives consume no body so they
358 /// can sit cleanly above the field they decorate, value directives
359 /// take exactly one trailing expression.
360 fn parse_attribute(&mut self) {
361 let is_directive = self.at(SyntaxKind::HASH);
362 let kind = if is_directive {
363 SyntaxKind::DIRECTIVE
364 } else {
365 SyntaxKind::DECORATOR
366 };
367 self.open(kind);
368 self.bump(); // # or @
369 let name_text = if self.at(SyntaxKind::IDENT) {
370 let text = self.current_text();
371 self.bump();
372 text
373 } else {
374 self.error_at_current("expected attribute name");
375 None
376 };
377 if !is_directive {
378 // Decorator — name may be dotted (`@ensure.at_least`).
379 // Body is always `(args)` or empty.
380 while self.at(SyntaxKind::DOT) {
381 self.bump();
382 if self.at(SyntaxKind::IDENT) {
383 self.bump();
384 } else {
385 self.error_at_current("expected identifier after `.` in decorator name");
386 break;
387 }
388 }
389 if self.at(SyntaxKind::L_PAREN) {
390 self.parse_call_args();
391 }
392 self.close();
393 return;
394 }
395 // Directive — dispatch on name. Unknown directive names take a
396 // single optional expression body to match the legacy parser's
397 // permissive fallback.
398 let shape = name_text
399 .and_then(crate::directive::directive_shape)
400 .unwrap_or(crate::DirectiveShape::Value);
401 match shape {
402 crate::DirectiveShape::Bare => {
403 // No body. `#internal`, `#relaxed`, `#unstrict`, `#native`.
404 }
405 crate::DirectiveShape::Value => {
406 if self.is_attribute_body_start() {
407 self.parse_expr();
408 }
409 }
410 crate::DirectiveShape::NameBody => self.parse_directive_name_body(),
411 crate::DirectiveShape::Enum => self.parse_directive_enum(),
412 crate::DirectiveShape::Import => self.parse_directive_import(),
413 crate::DirectiveShape::Main => self.parse_directive_main(),
414 }
415 self.close();
416 }
417
418 /// `#enum Name<T, U>? { Variant, Variant { field: Type }, Variant(Type) }`.
419 /// The lowerer turns this into the internal tagged-enum schema form.
420 fn parse_directive_enum(&mut self) {
421 if self.at(SyntaxKind::IDENT) {
422 self.bump();
423 } else {
424 return;
425 }
426 if self.at(SyntaxKind::LT) {
427 self.bump();
428 while !self.at(SyntaxKind::GT) && !self.at_end() {
429 if self.at(SyntaxKind::IDENT) {
430 self.bump();
431 } else {
432 self.error_at_current("expected generic param");
433 break;
434 }
435 if !self.eat(SyntaxKind::COMMA) {
436 break;
437 }
438 }
439 self.expect(SyntaxKind::GT);
440 }
441 if !self.eat(SyntaxKind::L_BRACE) {
442 return;
443 }
444 while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
445 if self.at(SyntaxKind::COMMA) {
446 self.bump();
447 continue;
448 }
449 self.open(SyntaxKind::ENUM_VARIANT);
450 if self.at(SyntaxKind::IDENT) {
451 self.bump();
452 } else {
453 self.error_at_current("expected enum variant name");
454 self.close();
455 break;
456 }
457 if self.at(SyntaxKind::L_BRACE) {
458 self.bump();
459 while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
460 if self.at(SyntaxKind::COMMA) {
461 self.bump();
462 continue;
463 }
464 self.open(SyntaxKind::ENUM_VARIANT_FIELD);
465 if self.at(SyntaxKind::IDENT) {
466 self.bump();
467 } else {
468 self.error_at_current("expected enum variant field name");
469 }
470 self.expect(SyntaxKind::COLON);
471 self.parse_type();
472 self.close();
473 if !self.eat(SyntaxKind::COMMA) {
474 break;
475 }
476 }
477 self.expect(SyntaxKind::R_BRACE);
478 } else if self.at(SyntaxKind::L_PAREN) {
479 self.parse_tuple_type();
480 }
481 self.close();
482 if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_BRACE) {
483 self.error_at_current("expected `,` or `}` after enum variant");
484 break;
485 }
486 }
487 self.expect(SyntaxKind::R_BRACE);
488 }
489
490 /// `#schema Name <T, U>? body? (with { methods... })?`. The body
491 /// is whatever expression follows the name + generics (typically
492 /// a dict but the parser accepts any expression — the analyzer
493 /// emits a diagnostic when it isn't a dict). The trailing `with`
494 /// block is optional and may also follow a body-less `#schema X`
495 /// declaration.
496 fn parse_directive_name_body(&mut self) {
497 // Optional declared name.
498 if self.at(SyntaxKind::IDENT) {
499 self.bump();
500 } else {
501 return;
502 }
503 // Optional generic param list `<T, U>` — bare identifiers.
504 if self.at(SyntaxKind::LT) {
505 self.bump();
506 while !self.at(SyntaxKind::GT) && !self.at_end() {
507 if self.at(SyntaxKind::IDENT) {
508 self.bump();
509 } else {
510 self.error_at_current("expected generic param");
511 break;
512 }
513 if !self.eat(SyntaxKind::COMMA) {
514 break;
515 }
516 }
517 self.expect(SyntaxKind::GT);
518 }
519 // The body is everything up to (a) the next attribute, (b)
520 // the `with` keyword, or (c) the dict-field separator (`:`
521 // / `,` / `}` / EOI). Special-case the `with`-only shape
522 // (`#schema X with { ... }`) by skipping the body when we
523 // see `with` immediately.
524 let saw_with = self.at(SyntaxKind::IDENT) && self.current_text() == Some("with");
525 // v1 accepts an optional `:` separator between schema name and
526 // body: `#schema Image: { name: String }` is equivalent to
527 // `#schema Image { name: String }`. The legacy combinator chain
528 // consumed the `:` as part of the directive; the CST does the
529 // same so the `is_attribute_body_start` check below sees the
530 // body proper. Without this, the dict-field grammar would
531 // (correctly!) parse `Image:` as a malformed dict field after
532 // mistaking the directive for body-less.
533 if !saw_with && self.at(SyntaxKind::COLON) {
534 self.bump();
535 }
536 if !saw_with && self.is_attribute_body_start() {
537 // Guard: when the next chars are `Ident:` / `Ident,` we
538 // must not consume them — they belong to a dict field
539 // following `#schema X` in a `: ...` context.
540 if !self.peek_attribute_terminator() {
541 // Schema bodies are typically dicts (`#schema U { ... }`)
542 // but the grammar also accepts a type-alias body. When the body
543 // looks like a bare type expression — IDENT immediately
544 // followed by `<...>` — parse it as a type so the
545 // string-literal generic args don't surprise the Pratt
546 // expression grammar (which would treat `<` as a
547 // binary comparison).
548 if self.peek_is_bare_type_body() {
549 self.parse_type();
550 } else {
551 self.parse_expr();
552 }
553 }
554 }
555 // Optional `with { ... }` block — a structured method list.
556 // The legacy `opt_parse_with_block` (`directive.rs`) drives the
557 // shape: leading pragma stack (`#derive` / `#native` /
558 // `#internal` / `#no_auto_derive`), then a `name<T>?(p: T,
559 // ...) -> Ret (: body)?` declaration. We emit each method as
560 // a SCHEMA_METHOD node so the typed-AST layer can read the
561 // structure cheaply.
562 if self.at(SyntaxKind::IDENT) && self.current_text() == Some("with") {
563 self.bump();
564 if self.at(SyntaxKind::L_BRACE) {
565 self.parse_schema_with();
566 }
567 }
568 }
569
570 /// True when the upcoming token stream is an IDENT followed
571 /// immediately (no intervening whitespace) by `<` — a type-alias
572 /// body shape such as `Int` / `List<T>`. Used by
573 /// `parse_directive_name_body` to disambiguate the type-body shape
574 /// from a regular expression body. The IDENT-and-no-`<` case
575 /// (bare-type body like `#schema MyAlias String`) is also
576 /// classified as "type body" — the body is a single primitive
577 /// type identifier without generics.
578 fn peek_is_bare_type_body(&self) -> bool {
579 if !self.at(SyntaxKind::IDENT) {
580 return false;
581 }
582 // Only commit to the type body if the IDENT is one of the
583 // known type heads (`Int`, `String`, `Bool`, `List`, `Dict`,
584 // `Any`, `Float`) — otherwise a regular
585 // expression with a leading IDENT is the safer fallback.
586 let head = self.current_text().unwrap_or("");
587 if !matches!(
588 head,
589 "Int" | "String" | "Bool" | "Float" | "Any" | "List" | "Dict"
590 ) {
591 return false;
592 }
593 // Allow both primitive aliases (`Int`) and generic containers
594 // (`List<T>`) as type-body starts.
595 let head_idx = self.pos_skip_trivia();
596 let mut idx = head_idx + 1;
597 let mut had_ws = false;
598 while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
599 had_ws = true;
600 idx += 1;
601 }
602 if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::LT) && !had_ws {
603 return true;
604 }
605 // Bare type identifier (`#schema MyAlias String`) — only
606 // accept when nothing else follows on the line. We approximate
607 // "nothing else" by checking the next non-trivia token isn't
608 // a typical expression-continuation symbol.
609 matches!(
610 self.tokens.get(idx).map(|(k, _)| *k),
611 Some(SyntaxKind::HASH) | Some(SyntaxKind::L_BRACE) | None
612 )
613 }
614
615 /// `with { (pragma | method)* }` — body of a `#schema` / `#extend`
616 /// directive. Lossless: every byte (whitespace, comments, leading
617 /// pragmas) sits inside the [`SCHEMA_WITH`] node, with each method
618 /// declaration wrapped in its own [`SCHEMA_METHOD`] child.
619 fn parse_schema_with(&mut self) {
620 self.open(SyntaxKind::SCHEMA_WITH);
621 self.bump(); // {
622 while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
623 // Method declarations are introduced by either a pragma
624 // (`#derive` / `#native` / `#internal` / `#no_auto_derive`)
625 // or directly by a method name. We greedily group leading
626 // pragmas with the next method into one SCHEMA_METHOD node
627 // — if no method follows (e.g. trailing schema-level
628 // `#no_auto_derive`), the directives sit at the
629 // SCHEMA_WITH level as siblings.
630 if self.at(SyntaxKind::HASH) {
631 let ck = self.checkpoint();
632 let mut had_method_pragma = false;
633 while self.at(SyntaxKind::HASH) {
634 let name = self.directive_name_after_hash();
635 if matches!(
636 name.as_deref(),
637 Some("derive") | Some("native") | Some("internal")
638 ) {
639 had_method_pragma = true;
640 }
641 self.parse_attribute();
642 }
643 if self.at(SyntaxKind::IDENT) && !self.at_method_terminator() {
644 self.open_at(ck, SyntaxKind::SCHEMA_METHOD);
645 self.parse_schema_method_after_pragmas();
646 self.close();
647 } else if had_method_pragma {
648 // Pragma stack without a method — surface a recovery
649 // error to mirror the legacy "stray method pragma"
650 // diagnostic but keep parsing.
651 self.error(
652 "expected method declaration after `#derive` / `#native` / `#internal`",
653 );
654 }
655 continue;
656 }
657 if self.at(SyntaxKind::IDENT) {
658 self.open(SyntaxKind::SCHEMA_METHOD);
659 self.parse_schema_method_after_pragmas();
660 self.close();
661 continue;
662 }
663 // Unexpected token inside the with-block — recover to the
664 // next likely start of a method (HASH / IDENT / R_BRACE).
665 self.error_recover(
666 "expected method or pragma inside `with { ... }`",
667 &[SyntaxKind::HASH, SyntaxKind::IDENT, SyntaxKind::R_BRACE],
668 );
669 }
670 self.expect(SyntaxKind::R_BRACE);
671 self.close();
672 }
673
674 /// True when the upcoming non-trivia token is the with-block
675 /// terminator (`}`) — used to spot a pragma stack with no method
676 /// trailing it without confusing it for a normal method header.
677 fn at_method_terminator(&self) -> bool {
678 matches!(self.current(), Some(SyntaxKind::R_BRACE)) || self.at_end()
679 }
680
681 /// Peek the IDENT immediately after a HASH at the current position
682 /// (skipping trivia). Returns `None` if `#` isn't followed by an
683 /// identifier.
684 fn directive_name_after_hash(&self) -> Option<String> {
685 let mut idx = self.pos_skip_trivia();
686 if self.tokens.get(idx).map(|(k, _)| *k) != Some(SyntaxKind::HASH) {
687 return None;
688 }
689 idx += 1;
690 while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
691 idx += 1;
692 }
693 match self.tokens.get(idx) {
694 Some((SyntaxKind::IDENT, text)) => Some((*text).to_string()),
695 _ => None,
696 }
697 }
698
699 /// Parse a single method declaration inside a `with { ... }` block.
700 /// Caller has already opened a SCHEMA_METHOD node and emitted any
701 /// leading pragma directives. Shape:
702 ///
703 /// IDENT GenericParams? '(' (Param (',' Param)*)? ')' '->' Type (':' Expr)?
704 ///
705 /// Each parameter takes the named form `name: Type` (opposite of
706 /// `#main`'s `Type name`), reusing the existing CLOSURE_PARAM
707 /// wrapper to keep the typed-AST layer simple. The body is omitted
708 /// for `#native` methods.
709 fn parse_schema_method_after_pragmas(&mut self) {
710 // Method name.
711 if self.at(SyntaxKind::IDENT) {
712 self.bump();
713 } else {
714 self.error_at_current("expected method name");
715 return;
716 }
717 // Optional method-level generics `<U, V>`.
718 if self.at(SyntaxKind::LT) {
719 self.bump();
720 while !self.at(SyntaxKind::GT) && !self.at_end() {
721 if self.at(SyntaxKind::IDENT) {
722 self.bump();
723 } else {
724 self.error_at_current("expected method generic parameter");
725 break;
726 }
727 if !self.eat(SyntaxKind::COMMA) {
728 break;
729 }
730 }
731 self.expect(SyntaxKind::GT);
732 }
733 // Parameter list `(name: Type, ...)`.
734 if !self.expect(SyntaxKind::L_PAREN) {
735 return;
736 }
737 while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
738 self.parse_schema_method_param();
739 if !self.eat(SyntaxKind::COMMA) {
740 break;
741 }
742 }
743 self.expect(SyntaxKind::R_PAREN);
744 // `-> ReturnType` — required by the analyzer-level grammar
745 // (every with-block method declares its return), but the CST
746 // accepts the missing-arrow shape so older test fixtures
747 // that elided the return type still round-trip cleanly.
748 if self.eat(SyntaxKind::THIN_ARROW) {
749 self.parse_type();
750 }
751 // Optional `: body`. Methods marked `#native` omit it; for
752 // others the analyzer enforces presence.
753 if self.eat(SyntaxKind::COLON) {
754 self.parse_expr();
755 }
756 }
757
758 /// One schema-method parameter: `name: Type`. Lossless — emitted
759 /// inside a CLOSURE_PARAM node so the typed-AST layer can reuse
760 /// the existing wrapper.
761 fn parse_schema_method_param(&mut self) {
762 self.open(SyntaxKind::CLOSURE_PARAM);
763 if self.at(SyntaxKind::IDENT) {
764 self.bump();
765 } else {
766 self.error_at_current("expected parameter name");
767 self.close();
768 return;
769 }
770 if self.eat(SyntaxKind::COLON) {
771 self.parse_type();
772 } else {
773 self.error("expected `:` in schema method parameter");
774 }
775 self.close();
776 }
777
778 /// `#import <spec> from "path"`. `<spec>` is one of
779 /// `*`, `{ a, b as c }`, or a single identifier.
780 fn parse_directive_import(&mut self) {
781 if self.at(SyntaxKind::STAR) {
782 self.bump();
783 } else if self.at(SyntaxKind::L_BRACE) {
784 // Destructure list `{ a, b as c }` — each entry is an
785 // IDENT optionally followed by `as IDENT`. This is NOT a
786 // dict, so we don't reuse `parse_dict`. The legacy
787 // `parse_import_spec` accepts this shape; the typed-AST
788 // layer carries the entries on `DirectiveImportSpec`.
789 self.parse_import_destructure();
790 } else if self.at(SyntaxKind::IDENT) {
791 self.bump();
792 } else {
793 self.error_at_current("expected import spec");
794 return;
795 }
796 if self.at(SyntaxKind::IDENT) && self.current_text() == Some("from") {
797 self.bump();
798 } else {
799 self.error("expected `from` in #import");
800 return;
801 }
802 if self.at(SyntaxKind::STRING) {
803 self.bump();
804 } else {
805 self.error_at_current("expected path string in #import");
806 return;
807 }
808 // Optional integrity pin `<algo>:"<hex>"`. Accept anything of
809 // the shape `IDENT COLON STRING`; the algorithm name and hex
810 // are validated by the analyzer so the diagnostic span lands
811 // on the real source position rather than on the parser's
812 // current cursor.
813 if self.at(SyntaxKind::IDENT) {
814 self.bump();
815 if self.at(SyntaxKind::COLON) {
816 self.bump();
817 } else {
818 self.error_at_current("expected `:` in #import integrity pin");
819 return;
820 }
821 if self.at(SyntaxKind::STRING) {
822 self.bump();
823 } else {
824 self.error_at_current("expected hex string in #import integrity pin");
825 }
826 }
827 }
828
829 fn parse_import_destructure(&mut self) {
830 debug_assert!(self.at(SyntaxKind::L_BRACE));
831 self.bump(); // {
832 loop {
833 if self.at(SyntaxKind::R_BRACE) || self.at_end() {
834 break;
835 }
836 if self.at(SyntaxKind::IDENT) {
837 self.bump();
838 // Optional `as IDENT` alias.
839 if self.at(SyntaxKind::IDENT) && self.current_text() == Some("as") {
840 self.bump();
841 if self.at(SyntaxKind::IDENT) {
842 self.bump();
843 } else {
844 self.error_at_current("expected identifier after `as` in #import");
845 }
846 }
847 } else {
848 self.error_recover(
849 "expected identifier in #import destructure",
850 &[SyntaxKind::COMMA, SyntaxKind::R_BRACE],
851 );
852 }
853 if !self.eat(SyntaxKind::COMMA) {
854 break;
855 }
856 }
857 self.expect(SyntaxKind::R_BRACE);
858 }
859
860 /// `#main ( type ident, ... ) [-> Type]`. Captures the typed
861 /// param list directly so the directive node carries the same
862 /// structure the analyzer needs.
863 fn parse_directive_main(&mut self) {
864 if !self.eat(SyntaxKind::L_PAREN) {
865 self.error("expected `(` after `#main`");
866 return;
867 }
868 while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
869 // Each param: `Type ident` (closure-param shape).
870 self.parse_closure_param();
871 if !self.eat(SyntaxKind::COMMA) {
872 break;
873 }
874 }
875 self.expect(SyntaxKind::R_PAREN);
876 // Optional `-> ReturnType`.
877 if self.eat(SyntaxKind::THIN_ARROW) {
878 self.parse_type();
879 }
880 }
881
882 /// True when the next non-trivia token signals "no directive body
883 /// here, leave the ident for the surrounding grammar" — used by
884 /// `#schema X: value` (inside a dict) where `X` is the dict key,
885 /// not the schema-name body.
886 fn peek_attribute_terminator(&self) -> bool {
887 let mut idx = self.pos_skip_trivia();
888 // Skip an IDENT (and an optional generic angle-list).
889 if self.tokens.get(idx).map(|(k, _)| *k) != Some(SyntaxKind::IDENT) {
890 return false;
891 }
892 idx += 1;
893 while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
894 idx += 1;
895 }
896 matches!(
897 self.tokens.get(idx).map(|(k, _)| *k),
898 Some(SyntaxKind::COLON) | Some(SyntaxKind::COMMA) | Some(SyntaxKind::R_BRACE)
899 )
900 }
901
902 fn is_attribute_body_start(&self) -> bool {
903 self.current().is_some_and(|k| {
904 matches!(
905 k,
906 SyntaxKind::IDENT
907 | SyntaxKind::NUMBER
908 | SyntaxKind::STRING
909 | SyntaxKind::L_BRACE
910 | SyntaxKind::L_BRACK
911 // `L_PAREN` covers the parenthesised closure form
912 // `(p) => body` and parenthesised expressions
913 // `(a + b)`. Without this, value-shape directives
914 // like `#default (self) => ...` and
915 // `#expect (n) => n > 0` would be parsed as
916 // body-less, leaving the closure for the
917 // surrounding dict to choke on.
918 | SyntaxKind::L_PAREN
919 | SyntaxKind::AMP
920 | SyntaxKind::MINUS
921 | SyntaxKind::BANG
922 | SyntaxKind::STAR
923 // F-strings start a fresh atom too.
924 | SyntaxKind::F_STRING_OPEN
925 )
926 })
927 }
928
929 // ----- expression entry -------------------------------------------
930
931 /// Parse a full expression. Operator precedence is climbed with a
932 /// Pratt-style loop. Lowest precedence first; primary handles
933 /// atoms and prefix unaries. `match { ... }` and `where { ... }`
934 /// trail the binary chain as the outermost postfix forms — they
935 /// take precedence above ternary etc., matching the winnow
936 /// grammar in `expr.rs`.
937 fn parse_expr(&mut self) {
938 let ck = self.checkpoint();
939 self.parse_expr_bp(0);
940 // Ternary: `cond ? then : else`. Bound at expression-tail
941 // precedence — lower than every binary operator (so the binary
942 // chain absorbs into `cond`) but higher than the trailing
943 // `match` / `where` postfix forms (which wrap whatever ternary
944 // produces). The legacy `parse_ternary` (`expr.rs`) sits at the
945 // same level — see the precedence chain notes there.
946 //
947 // Disambiguation: `?` may also be a path-access prefix
948 // (`a?.b`, `a?[0]`) or a type-optional marker (`Foo?` inside a
949 // typed context). Path access is consumed earlier — the CST's
950 // current postfix loop doesn't fold `?.` / `?[`, but the legacy
951 // pre-P4 path always took those bytes itself, so no fixture
952 // reaches this branch with them in postfix position. Type
953 // optionals only appear inside committed `parse_type` calls
954 // (match arms, closure params, directive bodies), never at the
955 // outermost expression level — so seeing `?` here is
956 // unambiguously a ternary head.
957 if self.at(SyntaxKind::QUESTION) {
958 // Guard: don't claim a ternary on `?.` / `?[`. Those forms
959 // belong to path access and are handled (or rejected) by the
960 // atom layer; consuming `?` here would steal the prefix.
961 let next = self.nth(1);
962 if !matches!(next, Some(SyntaxKind::DOT) | Some(SyntaxKind::L_BRACK)) {
963 self.open_at(ck, SyntaxKind::TERNARY_EXPR);
964 self.bump(); // ?
965 self.parse_expr();
966 if !self.expect(SyntaxKind::COLON) {
967 self.close();
968 return;
969 }
970 self.parse_expr();
971 self.close();
972 }
973 }
974 loop {
975 if self.at(SyntaxKind::IDENT) && self.current_text() == Some("match") {
976 // Only commit to MATCH_EXPR when `match` is followed
977 // by `{` — otherwise it's a bareword called `match`
978 // somewhere unrelated.
979 if self.nth(1) == Some(SyntaxKind::L_BRACE) {
980 self.open_at(ck, SyntaxKind::MATCH_EXPR);
981 self.bump(); // `match`
982 self.bump(); // {
983 while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
984 self.parse_match_arm();
985 if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_BRACE) {
986 self.error_recover(
987 "expected `,` or `}` in match",
988 &[SyntaxKind::COMMA, SyntaxKind::R_BRACE],
989 );
990 self.eat(SyntaxKind::COMMA);
991 }
992 }
993 self.expect(SyntaxKind::R_BRACE);
994 self.close();
995 continue;
996 }
997 }
998 if self.at(SyntaxKind::IDENT)
999 && self.current_text() == Some("where")
1000 && self.nth(1) == Some(SyntaxKind::L_BRACE)
1001 {
1002 self.open_at(ck, SyntaxKind::WHERE_EXPR);
1003 self.bump(); // `where`
1004 self.parse_dict();
1005 self.close();
1006 continue;
1007 }
1008 break;
1009 }
1010 }
1011
1012 /// One match arm: `pattern: body`. Pattern is one of:
1013 ///
1014 /// * a TYPE_NODE (`Up`, `Int`) for existing unit/schema patterns;
1015 /// * `*` for wildcard;
1016 /// * a Rust-like enum payload pattern (`Pair(a, b)`,
1017 /// `Email { address, subject: s }`).
1018 fn parse_match_arm(&mut self) {
1019 self.open(SyntaxKind::MATCH_ARM);
1020 if self.at(SyntaxKind::UNDERSCORE) {
1021 self.open(SyntaxKind::WILDCARD);
1022 self.bump();
1023 self.close();
1024 } else if self.at(SyntaxKind::STAR) {
1025 // `*` is no longer the pattern wildcard — the catch-all arm
1026 // is now written `_`. Keep the `*` lexeme inside a WILDCARD
1027 // node (so recovery stays structured) but flag the precise
1028 // migration so the diagnostic points at the new spelling.
1029 self.open(SyntaxKind::WILDCARD);
1030 self.error_at_current(
1031 "`*` is no longer a match wildcard — use `_` for the catch-all arm",
1032 );
1033 self.bump();
1034 self.close();
1035 } else if self.looks_like_match_payload_pattern() {
1036 self.parse_match_pattern();
1037 } else if self.at(SyntaxKind::IDENT) {
1038 self.parse_type();
1039 } else {
1040 self.error_at_current("expected match-arm pattern");
1041 }
1042 if self.eat(SyntaxKind::COLON) {
1043 self.parse_expr();
1044 } else {
1045 self.error("expected `:` in match arm");
1046 }
1047 self.close();
1048 }
1049
1050 fn looks_like_match_payload_pattern(&self) -> bool {
1051 if !self.at(SyntaxKind::IDENT) {
1052 return false;
1053 }
1054 let mut idx = self.pos_skip_trivia() + 1;
1055 while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
1056 idx += 1;
1057 }
1058 while self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::DOT) {
1059 idx += 1;
1060 while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
1061 idx += 1;
1062 }
1063 if self.tokens.get(idx).map(|(k, _)| *k) != Some(SyntaxKind::IDENT) {
1064 return false;
1065 }
1066 idx += 1;
1067 while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
1068 idx += 1;
1069 }
1070 }
1071 matches!(
1072 self.tokens.get(idx).map(|(k, _)| *k),
1073 Some(SyntaxKind::L_PAREN | SyntaxKind::L_BRACE)
1074 )
1075 }
1076
1077 fn parse_match_pattern(&mut self) {
1078 self.open(SyntaxKind::MATCH_PATTERN);
1079 self.expect(SyntaxKind::IDENT);
1080 while self.at(SyntaxKind::DOT) {
1081 self.bump();
1082 self.expect(SyntaxKind::IDENT);
1083 }
1084 if self.at(SyntaxKind::L_PAREN) {
1085 self.bump();
1086 while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
1087 if self.at(SyntaxKind::UNDERSCORE) || self.at(SyntaxKind::IDENT) {
1088 self.bump();
1089 } else if self.at(SyntaxKind::STAR) {
1090 self.error_at_current(
1091 "`*` is no longer a pattern wildcard — use `_` to ignore a payload slot",
1092 );
1093 self.bump();
1094 } else {
1095 self.error_at_current("expected tuple pattern binding");
1096 break;
1097 }
1098 if !self.eat(SyntaxKind::COMMA) {
1099 break;
1100 }
1101 }
1102 self.expect(SyntaxKind::R_PAREN);
1103 } else if self.at(SyntaxKind::L_BRACE) {
1104 self.bump();
1105 while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
1106 if self.at(SyntaxKind::IDENT) {
1107 self.bump();
1108 if self.eat(SyntaxKind::COLON) {
1109 if self.at(SyntaxKind::UNDERSCORE) || self.at(SyntaxKind::IDENT) {
1110 self.bump();
1111 } else if self.at(SyntaxKind::STAR) {
1112 self.error_at_current(
1113 "`*` is no longer a pattern wildcard — use `_` to ignore a payload slot",
1114 );
1115 self.bump();
1116 } else {
1117 self.error_at_current("expected struct pattern binding");
1118 break;
1119 }
1120 }
1121 } else {
1122 self.error_at_current("expected struct pattern field");
1123 break;
1124 }
1125 if !self.eat(SyntaxKind::COMMA) {
1126 break;
1127 }
1128 }
1129 self.expect(SyntaxKind::R_BRACE);
1130 }
1131 self.close();
1132 }
1133
1134 fn parse_expr_bp(&mut self, min_bp: u8) {
1135 let lhs_ck = self.checkpoint();
1136 self.parse_unary();
1137
1138 while let Some(op) = self.current() {
1139 let Some((lbp, rbp)) = infix_bp(op) else {
1140 break;
1141 };
1142 if lbp < min_bp {
1143 break;
1144 }
1145 self.open_at(lhs_ck, SyntaxKind::BINARY_EXPR);
1146 if op == SyntaxKind::PLUS_PLUS {
1147 // `++` was parseable but never executable (the
1148 // evaluator always trapped UnsupportedOperator).
1149 // String concatenation is spelled `+`; keep consuming
1150 // the token inside the BINARY_EXPR node (so recovery
1151 // stays structured and the round-trip stays lossless)
1152 // but flag the precise migration. Plain `error` — not
1153 // `error_at_current`, which would wrap the token in an
1154 // extra ERROR node and skip the normal bump below.
1155 self.error("`++` is not an operator — use `+` to concatenate strings");
1156 }
1157 self.bump();
1158 self.parse_expr_bp(rbp);
1159 self.close();
1160 }
1161 }
1162
1163 /// Prefix-unary or atom. Postfix call / index / dot are wrapped
1164 /// here via checkpoint.
1165 fn parse_unary(&mut self) {
1166 if self.at_set(&[SyntaxKind::MINUS, SyntaxKind::BANG, SyntaxKind::PLUS]) {
1167 self.open(SyntaxKind::UNARY_EXPR);
1168 self.bump();
1169 self.parse_unary();
1170 self.close();
1171 return;
1172 }
1173 self.parse_postfix();
1174 }
1175
1176 /// Atom with postfix suffixes (`.field`, `[i]`, `(args)`,
1177 /// plus optional-chain `?.field` / `?[i]`).
1178 fn parse_postfix(&mut self) {
1179 let ck = self.checkpoint();
1180 self.parse_atom();
1181 loop {
1182 if self.at(SyntaxKind::L_PAREN) {
1183 self.open_at(ck, SyntaxKind::CALL_EXPR);
1184 self.parse_call_args();
1185 self.close();
1186 } else if self.at(SyntaxKind::DOT)
1187 || self.at(SyntaxKind::L_BRACK)
1188 || (self.at(SyntaxKind::QUESTION)
1189 && matches!(
1190 self.nth(1),
1191 Some(SyntaxKind::DOT) | Some(SyntaxKind::L_BRACK)
1192 ))
1193 {
1194 // Path access — fold into VARIABLE_EXPR so dotted
1195 // paths like `a.b.c` end up as a single node. v1.8
1196 // positional access `xs.0` (number after `.`) is the
1197 // tuple/list index form — accepted alongside `.field`.
1198 // Optional chaining (`a?.b`, `a?[0]`) consumes the `?`
1199 // as a prefix on the next segment; the typed-AST
1200 // marks the segment as optional.
1201 self.open_at(ck, SyntaxKind::VARIABLE_EXPR);
1202 loop {
1203 let is_optional_prefix = self.at(SyntaxKind::QUESTION)
1204 && matches!(
1205 self.nth(1),
1206 Some(SyntaxKind::DOT) | Some(SyntaxKind::L_BRACK)
1207 );
1208 if is_optional_prefix {
1209 self.bump(); // ?
1210 } else if !self.at(SyntaxKind::DOT) && !self.at(SyntaxKind::L_BRACK) {
1211 break;
1212 }
1213 if self.at(SyntaxKind::DOT) {
1214 self.bump();
1215 if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::NUMBER) {
1216 self.bump();
1217 } else {
1218 self.error_at_current("expected identifier or index after `.`");
1219 }
1220 } else if self.at(SyntaxKind::L_BRACK) {
1221 // `[ index ]`
1222 self.bump();
1223 self.parse_expr();
1224 self.expect(SyntaxKind::R_BRACK);
1225 } else {
1226 break;
1227 }
1228 }
1229 self.close();
1230 } else {
1231 break;
1232 }
1233 }
1234 }
1235
1236 fn parse_atom(&mut self) {
1237 // Leading attributes (`#brand T {...}` / `@decorator(x) expr`)
1238 // stack above the atom they decorate. The CST keeps them as
1239 // siblings of the atom inside whatever node the caller opened
1240 // (typically a DICT_FIELD value, a LIST element, or a function
1241 // argument). The legacy parser handled this case the same way
1242 // — the attribute decorates whatever expression follows.
1243 while self.at(SyntaxKind::HASH) || self.at(SyntaxKind::AT) {
1244 // Guard: when `#` heads a directive whose body is bare
1245 // (e.g. `#relaxed` standing alone at file scope), there's
1246 // no following expression — `parse_attribute` consumes
1247 // nothing extra, and the loop would spin. Break out the
1248 // moment we see no progress.
1249 let before = self.pos;
1250 self.parse_attribute();
1251 if self.pos == before {
1252 break;
1253 }
1254 }
1255 match self.current() {
1256 Some(SyntaxKind::NUMBER) => {
1257 self.open(SyntaxKind::LITERAL);
1258 self.bump();
1259 self.close();
1260 }
1261 Some(SyntaxKind::STRING) => {
1262 let text = self.tokens[self.pos_skip_trivia()].1;
1263 if text.starts_with('f') {
1264 self.parse_f_string();
1265 } else {
1266 self.open(SyntaxKind::LITERAL);
1267 self.bump();
1268 self.close();
1269 }
1270 }
1271 Some(SyntaxKind::IDENT) => {
1272 // `true` / `false` / `Infinity` / `NaN` and the removed `null` spelling are
1273 // keyword-shaped literals but lex as IDENT — promote
1274 // here so the lowering can decode them via the LITERAL
1275 // walker (which dispatches on the inner token text).
1276 let text = self.tokens[self.pos_skip_trivia()].1;
1277 if matches!(text, "null" | "true" | "false" | "Infinity" | "NaN") {
1278 self.open(SyntaxKind::LITERAL);
1279 self.bump();
1280 self.close();
1281 } else if self.looks_like_variant_ctor() {
1282 // `Enum.Variant { ... }` — at least two dotted
1283 // segments followed by a brace body. Legacy
1284 // `parse_variant_ctor` requires `path.len() >= 2`
1285 // before committing; we match that here so plain
1286 // `foo.bar` member access still falls through to
1287 // the postfix loop as VARIABLE_EXPR.
1288 self.parse_variant_ctor();
1289 } else if self.looks_like_type_atom() {
1290 // Bareword type expressions (`Dict<String, Int>`,
1291 // `List<Int>`, `Foo?`). Legacy `parse_type_expr`
1292 // lowers these into `Expr::Type`; we follow suit so
1293 // forms like `#brand Dict<String, Int> { ... }`
1294 // and `#schema Id Type<Arg>` parse
1295 // cleanly without the Pratt grammar misreading
1296 // `<` as a comparison.
1297 self.parse_type();
1298 } else {
1299 self.open(SyntaxKind::VARIABLE_EXPR);
1300 self.bump();
1301 self.close();
1302 }
1303 }
1304 Some(SyntaxKind::AMP) => self.parse_reference(),
1305 Some(SyntaxKind::L_BRACE) => self.parse_dict(),
1306 Some(SyntaxKind::L_BRACK) => self.parse_list(),
1307 Some(SyntaxKind::L_PAREN) => {
1308 // Three shapes share the leading `(`:
1309 // 1. `(p1, p2) [-> RetType] => body` — a closure.
1310 // 2. `(expr)` — a parenthesised
1311 // group (precedence override, NOT a tuple).
1312 // 3. `()` / `(e,)` / `(e1, e2, ...)` — a tuple value.
1313 // The unit `()` and the trailing-comma 1-tuple
1314 // `(e,)` are the disambiguators that keep `(e)`
1315 // pure grouping.
1316 // Closure lookahead runs first (it can see the trailing
1317 // `=>`); the tuple-vs-group decision is made by scanning
1318 // the parenthesised body for a top-level comma.
1319 if self.try_parse_paren_closure() {
1320 return;
1321 }
1322 self.parse_paren_or_tuple();
1323 }
1324 Some(SyntaxKind::STAR) => {
1325 self.open(SyntaxKind::WILDCARD);
1326 self.bump();
1327 self.close();
1328 }
1329 Some(SyntaxKind::ELLIPSIS) => {
1330 self.open(SyntaxKind::SPREAD_EXPR);
1331 self.bump();
1332 // v1.3 typed spread: `...<Type> expr`. The type hint
1333 // sits between the ellipsis and the source expression
1334 // and disambiguates strict-mode derivation. The inner
1335 // expression follows the type with no separator.
1336 if self.at(SyntaxKind::LT) {
1337 self.bump();
1338 self.parse_type();
1339 self.expect(SyntaxKind::GT);
1340 }
1341 self.parse_unary();
1342 self.close();
1343 }
1344 _ => {
1345 // `parse_atom` is reached from inside dict / list /
1346 // call / argument productions. When no atom shape
1347 // matches the current token, recover to the nearest
1348 // structural boundary so the surrounding loop can
1349 // resume without spinning. We emit a single ERROR
1350 // covering the bad span; the diagnostic message is
1351 // the standard "expected expression."
1352 self.error_recover("expected expression", Self::STRUCTURAL_SYNC);
1353 }
1354 }
1355 }
1356
1357 /// Look ahead past the current IDENT for an `IDENT (DOT IDENT)+ {`
1358 /// sequence — the variant-constructor shape `Enum.Variant { ... }`
1359 /// the legacy `parse_variant_ctor` (`expr.rs`) detects. Returns
1360 /// true only when at least two dotted segments precede the `{`,
1361 /// matching the legacy `path.len() < 2` guard. Anything else
1362 /// (single-segment IDENT, dotted-path member access without a
1363 /// trailing brace) falls through to the regular VARIABLE_EXPR path.
1364 fn looks_like_variant_ctor(&self) -> bool {
1365 if !self.at(SyntaxKind::IDENT) {
1366 return false;
1367 }
1368 let mut idx = self.pos_skip_trivia() + 1;
1369 let advance_trivia = |i: &mut usize, toks: &[(SyntaxKind, &str)]| {
1370 while *i < toks.len() && toks[*i].0.is_trivia() {
1371 *i += 1;
1372 }
1373 };
1374 advance_trivia(&mut idx, &self.tokens);
1375 let mut segs: usize = 1;
1376 while self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::DOT) {
1377 idx += 1;
1378 advance_trivia(&mut idx, &self.tokens);
1379 if self.tokens.get(idx).map(|(k, _)| *k) != Some(SyntaxKind::IDENT) {
1380 return false;
1381 }
1382 idx += 1;
1383 segs += 1;
1384 advance_trivia(&mut idx, &self.tokens);
1385 }
1386 if segs < 2 {
1387 return false;
1388 }
1389 self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::L_BRACE)
1390 }
1391
1392 /// Decide whether the current IDENT atom heads a *type* expression
1393 /// (`Dict<String, Int>`, `List<Int>`, `Foo?`). Legacy
1394 /// `parse_type_expr` (`expr.rs`) lowers such atoms into
1395 /// `Expr::Type`; downstream forms like `#brand Dict<K, V> { ... }`
1396 /// rely on this so the value body isn't misread as `Dict < K`
1397 /// (binary comparison).
1398 ///
1399 /// Conservative: only fires when the type-ness signal is
1400 /// unambiguous — the IDENT is a known type head, OR is
1401 /// immediately followed by `<...>` generics (no whitespace
1402 /// before `<`), with the angle balance closing cleanly. A
1403 /// trailing `?` (optional marker) also qualifies.
1404 fn looks_like_type_atom(&self) -> bool {
1405 if !self.at(SyntaxKind::IDENT) {
1406 return false;
1407 }
1408 let head_text = self.current_text().unwrap_or("");
1409 let head_idx = self.pos_skip_trivia();
1410 let mut idx = head_idx + 1;
1411 let mut had_ws = false;
1412 while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
1413 had_ws = true;
1414 idx += 1;
1415 }
1416 let known_head = matches!(
1417 head_text,
1418 "Int" | "String" | "Bool" | "Float" | "Any" | "List" | "Dict"
1419 );
1420 // `IDENT < ...>` — type with generics. Requires `<`
1421 // immediately adjacent (no whitespace).
1422 if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::LT) && !had_ws {
1423 // Scan for the matching `>` while tracking parens.
1424 let mut depth: i32 = 1;
1425 let mut paren_depth: i32 = 0;
1426 let mut j = idx + 1;
1427 while j < self.tokens.len() && depth > 0 {
1428 match self.tokens[j].0 {
1429 SyntaxKind::LT => depth += 1,
1430 SyntaxKind::GT => depth -= 1,
1431 SyntaxKind::L_PAREN => paren_depth += 1,
1432 SyntaxKind::R_PAREN if paren_depth > 0 => paren_depth -= 1,
1433 SyntaxKind::L_BRACE
1434 | SyntaxKind::R_BRACE
1435 | SyntaxKind::R_PAREN
1436 | SyntaxKind::FAT_ARROW
1437 if depth == 1 && paren_depth == 0 =>
1438 {
1439 return false
1440 }
1441 _ => {}
1442 }
1443 j += 1;
1444 }
1445 return depth == 0;
1446 }
1447 // Bare type head with no generics — only fires when the IDENT
1448 // is recognised as a primitive type name. Guarded by what
1449 // follows so plain VARIABLE_EXPR usage doesn't accidentally
1450 // become a TYPE_NODE: must be followed by `{` (type-tagged
1451 // dict body, `#brand T { ... }`) or a stray type-suffix `?`.
1452 // The `?` no longer denotes optionality (that's `Option<T>`),
1453 // but routing it here lets `parse_type` emit a precise "use
1454 // Option<T>" error instead of a confusing ternary misparse.
1455 if known_head {
1456 let next = self.tokens.get(idx).map(|(k, _)| *k);
1457 if matches!(next, Some(SyntaxKind::QUESTION) | Some(SyntaxKind::L_BRACE)) {
1458 return true;
1459 }
1460 }
1461 // `IDENT ? {` — a legacy `Weather? { ... }` shape. The `?` is no
1462 // longer a valid optional marker, but the trailing brace makes
1463 // this unambiguously a (now-erroring) type-tagged value rather
1464 // than a ternary head, so route it into `parse_type` for the
1465 // helpful diagnostic.
1466 if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::QUESTION) {
1467 let mut j = idx + 1;
1468 while j < self.tokens.len() && self.tokens[j].0.is_trivia() {
1469 j += 1;
1470 }
1471 if self.tokens.get(j).map(|(k, _)| *k) == Some(SyntaxKind::L_BRACE) {
1472 return true;
1473 }
1474 }
1475 false
1476 }
1477
1478 /// `Enum (.Variant)+ { body }` — emit a VARIANT_CTOR node wrapping
1479 /// the dotted path (as plain IDENT + DOT tokens) and the brace
1480 /// body (a regular DICT). Caller has already determined via
1481 /// [`Self::looks_like_variant_ctor`] that we're at the head IDENT
1482 /// of such a construct.
1483 fn parse_variant_ctor(&mut self) {
1484 self.open(SyntaxKind::VARIANT_CTOR);
1485 // Head IDENT.
1486 self.bump();
1487 // Drain `.IDENT*` — guaranteed at least one by the peek.
1488 while self.at(SyntaxKind::DOT) {
1489 self.bump();
1490 if self.at(SyntaxKind::IDENT) {
1491 self.bump();
1492 } else {
1493 self.error_at_current("expected identifier after `.` in variant constructor");
1494 break;
1495 }
1496 }
1497 // Body is a regular dict literal.
1498 if self.at(SyntaxKind::L_BRACE) {
1499 self.parse_dict();
1500 } else {
1501 self.error("expected `{` in variant constructor");
1502 }
1503 self.close();
1504 }
1505
1506 /// Index into `tokens` of the next non-trivia token. Caller must
1507 /// guarantee `current().is_some()`.
1508 fn pos_skip_trivia(&self) -> usize {
1509 let mut idx = self.pos;
1510 while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
1511 idx += 1;
1512 }
1513 idx
1514 }
1515
1516 /// Decompose a leading `f"..."` / `f#"..."#` STRING token into a
1517 /// proper [`F_STRING`] subtree. The original token is consumed
1518 /// as a SINGLE leaf at the lex level, but for the CST we walk
1519 /// its bytes and emit:
1520 ///
1521 /// * `F_STRING_OPEN` — `f"` / `f#"` / `f##"` …
1522 /// * `F_STRING_LITERAL` — verbatim text between zones.
1523 /// * `F_STRING_INTERPOLATION` (a sub-node) — wraps a
1524 /// `F_STRING_INTERP_START`, a recursively-parsed expression
1525 /// (using the same flat lex on the interpolation bytes), and a
1526 /// `F_STRING_INTERP_END`.
1527 /// * `F_STRING_CLOSE` — matching `"` / `"#` / `"##` …
1528 ///
1529 /// Reuses [`lex::lex`] for the interpolation bytes so any future
1530 /// lexer change is picked up automatically. The whole emission is
1531 /// driven directly by the original byte span — so the round-trip
1532 /// invariant holds without help from the caller.
1533 fn parse_f_string(&mut self) {
1534 // Flush trivia FIRST so the F_STRING node nests under whatever
1535 // production opened most recently. We then refuse to advance
1536 // `self.pos` until we've emitted every sub-piece, so the
1537 // overall byte count matches the original STRING token.
1538 self.flush_trivia();
1539 let tok_idx = self.pos;
1540 let (_kind, full_text): (SyntaxKind, &'a str) = self.tokens[tok_idx];
1541 let start_byte = self.cursor_byte;
1542 // Parse the opening sequence: `f` + zero-or-more `#` + `"`.
1543 let bytes = full_text.as_bytes();
1544 // The lexer already guarantees this token starts with `f`,
1545 // and that `next_is_hash_quote(bytes, 1)` was true, but be
1546 // defensive — bail to plain LITERAL if anything else.
1547 if bytes.first() != Some(&b'f') {
1548 // Should be unreachable given the caller's guard.
1549 self.open(SyntaxKind::LITERAL);
1550 self.bump();
1551 self.close();
1552 return;
1553 }
1554 let mut idx: usize = 1;
1555 while bytes.get(idx) == Some(&b'#') {
1556 idx += 1;
1557 }
1558 if bytes.get(idx) != Some(&b'"') {
1559 // Malformed open — emit the whole thing as a single
1560 // LITERAL so byte-round-trip is preserved.
1561 self.open(SyntaxKind::LITERAL);
1562 self.bump();
1563 self.close();
1564 return;
1565 }
1566 let hash_count = idx - 1;
1567 let open_end = idx + 1;
1568 let mut closing = String::from("\"");
1569 for _ in 0..hash_count {
1570 closing.push('#');
1571 }
1572
1573 // Locate the close. The body starts at `open_end`; we have to
1574 // track interpolation depth so a literal `}` inside an
1575 // interpolation can't be mistaken for the close.
1576 let body_start = open_end;
1577 let close_pos = self.find_fstring_close(bytes, body_start, &closing, hash_count);
1578 let close_pos = match close_pos {
1579 Some(p) => p,
1580 None => {
1581 // Unterminated — fall back to LITERAL.
1582 self.open(SyntaxKind::LITERAL);
1583 self.bump();
1584 self.close();
1585 return;
1586 }
1587 };
1588
1589 // Open the composite node.
1590 self.open(SyntaxKind::F_STRING);
1591 // Emit OPEN.
1592 self.emit_raw_token(SyntaxKind::F_STRING_OPEN, &full_text[..open_end]);
1593 // Walk body, splitting LITERAL chunks vs interpolation zones.
1594 let mut i = body_start;
1595 let mut literal_start = i;
1596 let raw_string = hash_count > 0;
1597 while i < close_pos {
1598 if Self::starts_with_at(bytes, i, b"${") {
1599 if i > literal_start {
1600 self.emit_raw_token(SyntaxKind::F_STRING_LITERAL, &full_text[literal_start..i]);
1601 }
1602 // Find matching `}`.
1603 let interp_start = i;
1604 let interp_body_start = i + 2;
1605 let mut depth: usize = 1;
1606 let mut j = interp_body_start;
1607 while j < close_pos && depth > 0 {
1608 match bytes[j] {
1609 b'{' => {
1610 depth += 1;
1611 j += 1;
1612 }
1613 b'}' => {
1614 depth -= 1;
1615 if depth == 0 {
1616 break;
1617 }
1618 j += 1;
1619 }
1620 b'"' => {
1621 // Skip nested "..." (the lexer always
1622 // pairs them up safely on round-trip).
1623 j = crate::lex::scan_normal_string_for_cst(bytes, j);
1624 }
1625 b => {
1626 // Skip a full codepoint to make progress
1627 // on invalid UTF-8 boundaries.
1628 j += utf8_codepoint_len(b);
1629 }
1630 }
1631 }
1632 if depth != 0 {
1633 // Unterminated interpolation — emit the rest as
1634 // one literal so bytes survive, then stop.
1635 self.emit_raw_token(SyntaxKind::F_STRING_LITERAL, &full_text[i..close_pos]);
1636 literal_start = close_pos;
1637 break;
1638 }
1639 let interp_body_end = j;
1640 let interp_close = j + 1;
1641 // Emit the interpolation sub-node.
1642 self.open(SyntaxKind::F_STRING_INTERPOLATION);
1643 self.emit_raw_token(
1644 SyntaxKind::F_STRING_INTERP_START,
1645 &full_text[interp_start..interp_body_start],
1646 );
1647 // Sub-parse the inner expression. The inner text is a
1648 // self-contained slice; we hand it to a fresh `lex` +
1649 // mini-parser. This is recursive (an interpolation can
1650 // contain another f-string), but the byte-accounting
1651 // works because we splice sub-tokens directly into the
1652 // builder.
1653 self.parse_fstring_interp_inner(&full_text[interp_body_start..interp_body_end]);
1654 self.emit_raw_token(
1655 SyntaxKind::F_STRING_INTERP_END,
1656 &full_text[interp_body_end..interp_close],
1657 );
1658 self.close();
1659 literal_start = interp_close;
1660 i = interp_close;
1661 continue;
1662 }
1663 // Escape handling — only relevant in non-raw f-strings.
1664 if !raw_string && bytes[i] == b'\\' && i + 1 < close_pos {
1665 i += 1 + utf8_codepoint_len(bytes[i + 1]);
1666 continue;
1667 }
1668 i += utf8_codepoint_len(bytes[i]);
1669 }
1670 if literal_start < close_pos {
1671 self.emit_raw_token(
1672 SyntaxKind::F_STRING_LITERAL,
1673 &full_text[literal_start..close_pos],
1674 );
1675 }
1676 // Emit CLOSE.
1677 self.emit_raw_token(SyntaxKind::F_STRING_CLOSE, &full_text[close_pos..]);
1678 self.close();
1679 // Advance the parser past the original STRING token now that
1680 // we've emitted every sub-piece directly.
1681 self.cursor_byte = start_byte + full_text.len();
1682 self.pos = tok_idx + 1;
1683 }
1684
1685 /// Emit a single leaf token directly to the builder (bypassing
1686 /// the lex-token cursor). Used by f-string decomposition; never
1687 /// advances `pos` / `cursor_byte`.
1688 fn emit_raw_token(&mut self, kind: SyntaxKind, text: &str) {
1689 self.builder
1690 .token(RelonLanguage::kind_to_raw_static(kind), text);
1691 }
1692
1693 /// Sub-parser for the inside of `${ ... }` in an f-string. We
1694 /// temporarily swap `self.tokens` with the inner-text lex (the
1695 /// `&str` slices inside still borrow from the original source,
1696 /// so the swapped `Vec` is fully compatible lifetime-wise),
1697 /// run the same Pratt expression grammar, then restore.
1698 fn parse_fstring_interp_inner(&mut self, text: &'a str) {
1699 let inner_tokens: Vec<(SyntaxKind, &'a str)> = crate::lex::lex(text);
1700 // Stash outer state and install the inner stream.
1701 let outer_tokens = std::mem::replace(&mut self.tokens, inner_tokens);
1702 let outer_pos = std::mem::replace(&mut self.pos, 0);
1703 let outer_cursor = self.cursor_byte;
1704 self.cursor_byte = 0;
1705 if !self.at_end() {
1706 self.parse_expr();
1707 }
1708 // Absorb any remaining bytes so the F_STRING_INTERPOLATION
1709 // body has full byte coverage. Trailing whitespace becomes
1710 // trivia naturally; anything else lands in an ERROR node.
1711 if !self.at_end() {
1712 self.error_recover("trailing input in interpolation", &[]);
1713 }
1714 self.flush_trivia();
1715 // Restore outer state.
1716 self.tokens = outer_tokens;
1717 self.pos = outer_pos;
1718 self.cursor_byte = outer_cursor + text.len();
1719 }
1720
1721 fn find_fstring_close(
1722 &self,
1723 bytes: &[u8],
1724 body_start: usize,
1725 closing: &str,
1726 hashes: usize,
1727 ) -> Option<usize> {
1728 let raw = hashes > 0;
1729 let mut idx = body_start;
1730 while idx + closing.len() <= bytes.len() {
1731 // Skip past balanced `${...}` interpolations.
1732 if Self::starts_with_at(bytes, idx, b"${") {
1733 let mut depth: usize = 1;
1734 let mut j = idx + 2;
1735 while j < bytes.len() && depth > 0 {
1736 match bytes[j] {
1737 b'{' => depth += 1,
1738 b'}' => depth -= 1,
1739 b'"' => {
1740 j = crate::lex::scan_normal_string_for_cst(bytes, j);
1741 continue;
1742 }
1743 _ => {}
1744 }
1745 if depth == 0 {
1746 j += 1;
1747 break;
1748 }
1749 j += 1;
1750 }
1751 if depth != 0 {
1752 return None;
1753 }
1754 idx = j;
1755 continue;
1756 }
1757 if !raw && bytes[idx] == b'\\' {
1758 if idx + 1 >= bytes.len() {
1759 return None;
1760 }
1761 idx += 1 + utf8_codepoint_len(bytes[idx + 1]);
1762 continue;
1763 }
1764 if Self::starts_with_at(bytes, idx, closing.as_bytes()) {
1765 return Some(idx);
1766 }
1767 idx += utf8_codepoint_len(bytes[idx]);
1768 }
1769 None
1770 }
1771
1772 fn starts_with_at(bytes: &[u8], idx: usize, needle: &[u8]) -> bool {
1773 bytes
1774 .get(idx..idx + needle.len())
1775 .is_some_and(|s| s == needle)
1776 }
1777
1778 /// Scan forward (without committing) starting from `start_idx`,
1779 /// past a balanced `(...)`, returning the index of the first
1780 /// non-trivia token AFTER the closing `)`. `start_idx` must point
1781 /// at the opening `L_PAREN` token. Returns `None` if the parens
1782 /// are unbalanced (we ran past EOI before matching).
1783 fn scan_after_matching_paren(&self, start_idx: usize) -> Option<usize> {
1784 debug_assert!(self.tokens.get(start_idx).map(|(k, _)| *k) == Some(SyntaxKind::L_PAREN));
1785 let mut depth: i32 = 0;
1786 let mut idx = start_idx;
1787 while idx < self.tokens.len() {
1788 let kind = self.tokens[idx].0;
1789 match kind {
1790 SyntaxKind::L_PAREN => depth += 1,
1791 SyntaxKind::R_PAREN => {
1792 depth -= 1;
1793 if depth == 0 {
1794 let mut next = idx + 1;
1795 while next < self.tokens.len() && self.tokens[next].0.is_trivia() {
1796 next += 1;
1797 }
1798 return Some(next);
1799 }
1800 }
1801 _ => {}
1802 }
1803 idx += 1;
1804 }
1805 None
1806 }
1807
1808 /// Without consuming anything, decide whether the `(...)` at the
1809 /// current position is followed (modulo an optional `-> Type`) by
1810 /// a `=>` arrow — i.e. the parens are a closure parameter list,
1811 /// not a parenthesised expression. We're already at the
1812 /// `L_PAREN`.
1813 fn looks_like_closure_after_paren(&self) -> bool {
1814 let lparen_idx = self.pos_skip_trivia();
1815 let Some(after_paren) = self.scan_after_matching_paren(lparen_idx) else {
1816 return false;
1817 };
1818 // `=> ...`?
1819 if matches!(
1820 self.tokens.get(after_paren).map(|(k, _)| *k),
1821 Some(SyntaxKind::FAT_ARROW)
1822 ) {
1823 return true;
1824 }
1825 // `-> RetType => ...`? Skip past the return-type tokens. We
1826 // can't fully parse a type without committing, so scan ahead
1827 // conservatively until we hit `=>` (closure) or anything that
1828 // disqualifies (newline-like break is fine — trivia is skipped
1829 // by definition, but we treat `,`/`}`/`]`/`)`/`:` as a
1830 // disqualifier so we never confuse `-> Type:` patterns).
1831 if matches!(
1832 self.tokens.get(after_paren).map(|(k, _)| *k),
1833 Some(SyntaxKind::THIN_ARROW)
1834 ) {
1835 let mut idx = after_paren + 1;
1836 let mut bracket_depth: i32 = 0;
1837 while idx < self.tokens.len() {
1838 let kind = self.tokens[idx].0;
1839 if kind.is_trivia() {
1840 idx += 1;
1841 continue;
1842 }
1843 match kind {
1844 SyntaxKind::FAT_ARROW if bracket_depth == 0 => return true,
1845 SyntaxKind::COMMA
1846 | SyntaxKind::R_BRACE
1847 | SyntaxKind::R_BRACK
1848 | SyntaxKind::R_PAREN
1849 | SyntaxKind::COLON
1850 if bracket_depth == 0 =>
1851 {
1852 return false
1853 }
1854 SyntaxKind::L_BRACE
1855 | SyntaxKind::L_BRACK
1856 | SyntaxKind::L_PAREN
1857 | SyntaxKind::LT => {
1858 bracket_depth += 1;
1859 }
1860 SyntaxKind::R_BRACE | SyntaxKind::R_BRACK | SyntaxKind::GT
1861 if bracket_depth > 0 =>
1862 {
1863 bracket_depth -= 1;
1864 }
1865 _ => {}
1866 }
1867 idx += 1;
1868 }
1869 }
1870 false
1871 }
1872
1873 /// When `current()` is `L_PAREN` and `looks_like_closure_after_paren`
1874 /// is true, consume the entire `(params) [-> RetType] => body`
1875 /// construct and emit a CLOSURE node. Returns true on success.
1876 /// Leaves the parser untouched and returns false otherwise.
1877 fn try_parse_paren_closure(&mut self) -> bool {
1878 if !self.at(SyntaxKind::L_PAREN) {
1879 return false;
1880 }
1881 if !self.looks_like_closure_after_paren() {
1882 return false;
1883 }
1884 self.open(SyntaxKind::CLOSURE);
1885 self.bump(); // (
1886 // Comma-separated CLOSURE_PARAMs.
1887 while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
1888 self.parse_closure_param();
1889 if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_PAREN) {
1890 self.error_recover(
1891 "expected `,` or `)` in closure parameter list",
1892 &[SyntaxKind::COMMA, SyntaxKind::R_PAREN],
1893 );
1894 self.eat(SyntaxKind::COMMA);
1895 }
1896 }
1897 self.expect(SyntaxKind::R_PAREN);
1898 // Optional `-> RetType`.
1899 if self.eat(SyntaxKind::THIN_ARROW) {
1900 self.parse_type();
1901 }
1902 if self.expect(SyntaxKind::FAT_ARROW) {
1903 self.parse_expr();
1904 }
1905 self.close();
1906 true
1907 }
1908
1909 /// Parse a `(`-led atom that is NOT a closure: either a
1910 /// parenthesised group `(expr)` or a tuple value literal.
1911 ///
1912 /// Disambiguation (locked design):
1913 /// * `()` → unit / zero-tuple (TUPLE node, no children).
1914 /// * `(e)` → grouping (the inner expression, no wrapper).
1915 /// * `(e,)` → 1-tuple (trailing comma forces it).
1916 /// * `(e1, e2, ...)` → n-tuple.
1917 ///
1918 /// The opening `(` and closing `)` land inside the TUPLE node for the
1919 /// tuple shapes so the round-trip-by-bytes invariant holds; for the
1920 /// grouping shape the parens are bumped as bare leaves around the
1921 /// inner expression (matching the pre-tuple behaviour).
1922 fn parse_paren_or_tuple(&mut self) {
1923 debug_assert!(self.at(SyntaxKind::L_PAREN));
1924 let ck = self.checkpoint();
1925 self.bump(); // (
1926 // Empty parens — the unit tuple `()`.
1927 if self.at(SyntaxKind::R_PAREN) {
1928 self.open_at(ck, SyntaxKind::TUPLE);
1929 self.bump(); // )
1930 self.close();
1931 return;
1932 }
1933 // Parse the first element / grouped expression.
1934 self.parse_expr();
1935 if self.at(SyntaxKind::COMMA) {
1936 // At least one comma → a tuple. Wrap everything (including
1937 // the already-parsed first element) in a TUPLE node.
1938 self.open_at(ck, SyntaxKind::TUPLE);
1939 while self.eat(SyntaxKind::COMMA) {
1940 // Trailing comma before `)` is allowed (and is what
1941 // makes `(e,)` a 1-tuple).
1942 if self.at(SyntaxKind::R_PAREN) || self.at_end() {
1943 break;
1944 }
1945 self.parse_expr();
1946 }
1947 self.expect(SyntaxKind::R_PAREN);
1948 self.close();
1949 return;
1950 }
1951 // No comma — plain grouping `(expr)`. No TUPLE wrapper; the
1952 // inner expression stands on its own (precedence override only).
1953 self.expect(SyntaxKind::R_PAREN);
1954 }
1955
1956 /// One closure parameter — either `name` or `Type name`. P2
1957 /// records the type, when present, as a TYPE_NODE child preceding
1958 /// the IDENT.
1959 fn parse_closure_param(&mut self) {
1960 self.open(SyntaxKind::CLOSURE_PARAM);
1961 // Heuristic: if the next two non-trivia tokens are IDENT IDENT
1962 // (or a more elaborate type followed by an ident), treat the
1963 // leading run as a TypeNode. We delegate to `parse_type` which
1964 // commits conservatively (it stops at the first non-type-y
1965 // token, so a bare `IDENT` doesn't get swallowed as a type).
1966 // The simplest signal of "this is a typed param" is that
1967 // there are at least two adjacent IDENTs, possibly with `<...>`
1968 // / `?` in the type slot.
1969 if self.peek_is_typed_param() {
1970 self.parse_type();
1971 }
1972 // A bare `_` is a legal parameter name (the Rust-style
1973 // ignore binding `(acc, _) => ...`). Since the lexer now emits
1974 // `_` as `UNDERSCORE` rather than `IDENT`, accept it here too so
1975 // a `_` parameter keeps parsing exactly as it did before the
1976 // wildcard split.
1977 if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::UNDERSCORE) {
1978 self.bump();
1979 } else {
1980 self.error_at_current("expected closure parameter name");
1981 }
1982 self.close();
1983 }
1984
1985 /// Cheap lookahead: does the upcoming token stream look like
1986 /// `Type ident` (a typed closure parameter) or just `ident`
1987 /// (untyped)? We say "typed" if the current token is IDENT and
1988 /// the next non-trivia token after a `Type`-shaped run is another
1989 /// IDENT — meaning the first one is the type and the second is
1990 /// the param name. We allow `<...>` and `?` between them.
1991 ///
1992 /// Crucial heuristic: when a `<` appears, it must be immediately
1993 /// adjacent (no whitespace) to the preceding IDENT for it to
1994 /// count as opening a generic argument list. Without this
1995 /// guard, `a < b: c` (a closure param of type `a` named `< b`
1996 /// — but `<` isn't a valid name leader, so it bails)
1997 /// would still be misinterpreted in pathological cases. Rust /
1998 /// TypeScript both use the same lex-time adjacency check.
1999 fn peek_is_typed_param(&self) -> bool {
2000 if !self.at(SyntaxKind::IDENT) {
2001 return false;
2002 }
2003 // Walk past IDENT, optional `.IDENT*`, optional `<...>`,
2004 // optional `?`, then check for IDENT.
2005 let head_idx = self.pos_skip_trivia();
2006 let mut idx = head_idx + 1;
2007 let advance_trivia = |i: &mut usize| {
2008 while *i < self.tokens.len() && self.tokens[*i].0.is_trivia() {
2009 *i += 1;
2010 }
2011 };
2012 // For the adjacency check we want to know whether ANY trivia
2013 // intervenes between the IDENT and the next non-trivia token.
2014 let mut had_trivia_after_head = false;
2015 if idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
2016 had_trivia_after_head = true;
2017 advance_trivia(&mut idx);
2018 }
2019 // `.IDENT*`
2020 while idx < self.tokens.len() && self.tokens[idx].0 == SyntaxKind::DOT {
2021 idx += 1;
2022 advance_trivia(&mut idx);
2023 if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::IDENT) {
2024 idx += 1;
2025 advance_trivia(&mut idx);
2026 } else {
2027 return false;
2028 }
2029 had_trivia_after_head = false;
2030 }
2031 // `<...>` — balanced angle scan. Refuse when whitespace
2032 // separates the IDENT and the `<` — that's the disambiguation
2033 // hook between `Foo<Bar>` (type) and `a < b` (comparison).
2034 if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::LT) {
2035 if had_trivia_after_head {
2036 return false;
2037 }
2038 let mut depth: i32 = 1;
2039 // Track nested `(...)` so tuple-type arguments like
2040 // `List<(Int, String)>` don't trip the comma rejection.
2041 let mut paren_depth: i32 = 0;
2042 idx += 1;
2043 while idx < self.tokens.len() && depth > 0 {
2044 match self.tokens[idx].0 {
2045 SyntaxKind::LT => depth += 1,
2046 SyntaxKind::GT => depth -= 1,
2047 SyntaxKind::L_PAREN => paren_depth += 1,
2048 SyntaxKind::R_PAREN if paren_depth > 0 => paren_depth -= 1,
2049 // Anything that strongly disqualifies a type
2050 // expression — bail. Commas at depth==1 are
2051 // fine (`Dict<String, Int>`) — only structural
2052 // tokens that can never appear inside a type
2053 // disqualify the scan.
2054 SyntaxKind::L_BRACE
2055 | SyntaxKind::R_BRACE
2056 | SyntaxKind::R_PAREN
2057 | SyntaxKind::FAT_ARROW
2058 if depth == 1 && paren_depth == 0 =>
2059 {
2060 return false
2061 }
2062 _ => {}
2063 }
2064 idx += 1;
2065 }
2066 if depth != 0 {
2067 return false;
2068 }
2069 advance_trivia(&mut idx);
2070 }
2071 // Optional `?`.
2072 if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::QUESTION) {
2073 idx += 1;
2074 advance_trivia(&mut idx);
2075 }
2076 self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::IDENT)
2077 }
2078
2079 /// Parse a type-expression-shaped run of tokens into a TYPE_NODE.
2080 /// The grammar:
2081 ///
2082 /// TypeNode := TupleType | (PathSeg ('.' PathSeg)* GenericArgs?)
2083 /// TupleType := '(' ')' | '(' TypeNode ',' ')' | '(' TypeNode (',' TypeNode)+ ','? ')'
2084 /// PathSeg := IDENT | STRING
2085 /// GenericArgs := '<' (TypeNode (',' TypeNode)*)? ','? '>'
2086 ///
2087 /// Handles string-keyed segments (`"namespaced".Foo`), nested
2088 /// generics (`Map<String, Int>`), and v1.7 tuple types in both
2089 /// type-hint position (`(Int, String) pair: ...`) and as generic
2090 /// arguments (`List<(Int, String)>`). A trailing type-suffix `?`
2091 /// (`T?`) is no longer valid — optionality is written `Option<T>` —
2092 /// so we still consume any stray `?` token but flag it as an error.
2093 fn parse_type(&mut self) {
2094 // Tuple type — committed only when the caller picked
2095 // `parse_type` (typed-key / generic-arg / closure-param /
2096 // return-type position). The expression grammar uses its own
2097 // `(...)` handler so a parens group never reaches this branch.
2098 if self.at(SyntaxKind::L_PAREN) {
2099 self.parse_tuple_type();
2100 return;
2101 }
2102 self.open(SyntaxKind::TYPE_NODE);
2103 // First segment: IDENT or STRING (allowed in the v1 grammar
2104 // for dotted-string paths like `"foo".Bar`).
2105 if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::STRING) {
2106 self.bump();
2107 } else {
2108 self.error_at_current("expected type name");
2109 self.close();
2110 return;
2111 }
2112 // Dotted continuation.
2113 while self.at(SyntaxKind::DOT) {
2114 self.bump();
2115 if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::STRING) {
2116 self.bump();
2117 } else {
2118 self.error_at_current("expected identifier after `.` in type");
2119 }
2120 }
2121 // Generic argument list. We're in a committed type context
2122 // here (the caller already decided "this is a type"), so any
2123 // `<` opens generics — no adjacency check needed.
2124 if self.at(SyntaxKind::LT) {
2125 self.bump();
2126 loop {
2127 if self.at(SyntaxKind::GT) || self.at_end() {
2128 break;
2129 }
2130 self.parse_type();
2131 if !self.eat(SyntaxKind::COMMA) {
2132 break;
2133 }
2134 }
2135 self.expect(SyntaxKind::GT);
2136 }
2137 // Type-suffix `?` is no longer valid syntax — optional types
2138 // are written `Option<T>`. We still consume the token (keeping
2139 // the tree lossless and avoiding recovery spin) but flag it as
2140 // an error pointing the user to the named-type form.
2141 if self.at(SyntaxKind::QUESTION) {
2142 self.error("optional types are written `Option<T>`, not `T?`");
2143 self.bump();
2144 }
2145 self.close();
2146 }
2147
2148 /// `(T1, T2, ...)` tuple type. Three shapes:
2149 ///
2150 /// * `()` — zero-tuple.
2151 /// * `(T,)` — one-tuple (trailing comma is mandatory; without
2152 /// it the form is a parenthesised type, not used
2153 /// in the current grammar but still consumed as
2154 /// a single-element TUPLE_TYPE for forward-compat).
2155 /// * `(T1, T2)` — 2+ tuple, optional trailing comma.
2156 ///
2157 /// Caller has already committed to type-position via `parse_type`,
2158 /// so we don't have to worry about confusing this with a closure
2159 /// param list — the closure detection happens at the expression
2160 /// layer (`try_parse_paren_closure`) and never reaches here.
2161 fn parse_tuple_type(&mut self) {
2162 self.open(SyntaxKind::TUPLE_TYPE);
2163 self.bump(); // (
2164 while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
2165 self.parse_type();
2166 if !self.eat(SyntaxKind::COMMA) {
2167 break;
2168 }
2169 }
2170 self.expect(SyntaxKind::R_PAREN);
2171 // Type-suffix `?` is no longer valid syntax — optional tuple
2172 // types are written `Option<(...)>`. Consume the token but flag
2173 // it as an error.
2174 if self.at(SyntaxKind::QUESTION) {
2175 self.error("optional types are written `Option<T>`, not `T?`");
2176 self.bump();
2177 }
2178 self.close();
2179 }
2180
2181 fn parse_reference(&mut self) {
2182 // `&base.tail.tail...` with optional-chain `?.` / `?[` access
2183 // forms (`&a.b?.c`, `&a?.[0]`). The legacy `reference_var`
2184 // grammar accepts both `.` / `[` and the `?`-prefixed variant
2185 // — the typed-AST tags the optional-ness on each `TokenKey`.
2186 self.open(SyntaxKind::REFERENCE_EXPR);
2187 self.bump(); // &
2188 if self.at(SyntaxKind::IDENT) {
2189 self.bump(); // base name
2190 } else {
2191 self.error_at_current("expected reference base after `&`");
2192 }
2193 loop {
2194 // `?.` and `?[` — eat the `?` prefix first, then fall
2195 // through to the regular dot / bracket handling.
2196 if self.at(SyntaxKind::QUESTION)
2197 && matches!(
2198 self.nth(1),
2199 Some(SyntaxKind::DOT) | Some(SyntaxKind::L_BRACK)
2200 )
2201 {
2202 self.bump(); // ?
2203 } else if !self.at(SyntaxKind::DOT) && !self.at(SyntaxKind::L_BRACK) {
2204 break;
2205 }
2206 if self.at(SyntaxKind::DOT) {
2207 self.bump();
2208 if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::NUMBER) {
2209 self.bump();
2210 } else {
2211 self.error_at_current("expected identifier or index after `.`");
2212 }
2213 } else if self.at(SyntaxKind::L_BRACK) {
2214 self.bump(); // [
2215 self.parse_expr();
2216 self.expect(SyntaxKind::R_BRACK);
2217 } else {
2218 break;
2219 }
2220 }
2221 self.close();
2222 }
2223
2224 fn parse_list(&mut self) {
2225 // We don't know up-front whether this `[` opens a list or a
2226 // comprehension — comprehensions look like `[ expr for id in
2227 // iterable (if cond)? ]`. Use a checkpoint so we can wrap the
2228 // first expression into either LIST or COMPREHENSION based on
2229 // what we find next.
2230 let outer_ck = self.checkpoint();
2231 self.bump(); // [
2232 // Empty list — handle explicitly so we don't try to parse an
2233 // expression after `[`.
2234 if self.at(SyntaxKind::R_BRACK) {
2235 self.open_at(outer_ck, SyntaxKind::LIST);
2236 self.bump();
2237 self.close();
2238 return;
2239 }
2240 // Parse the first element (or `for` head). If it's a spread,
2241 // it can't be a comprehension head — emit LIST directly.
2242 if self.at(SyntaxKind::ELLIPSIS) {
2243 self.open_at(outer_ck, SyntaxKind::LIST);
2244 self.parse_list_body_tail();
2245 return;
2246 }
2247 self.parse_expr();
2248 // After the first expression: if `for IDENT in ...`, this is
2249 // a comprehension. Otherwise it's a regular list — wrap as
2250 // LIST and continue collecting the rest.
2251 if self.at(SyntaxKind::IDENT) && self.current_text() == Some("for") {
2252 self.open_at(outer_ck, SyntaxKind::COMPREHENSION);
2253 self.bump(); // `for`
2254 if self.at(SyntaxKind::IDENT) {
2255 self.bump();
2256 } else {
2257 self.error_at_current("expected identifier after `for`");
2258 }
2259 if self.at(SyntaxKind::IDENT) && self.current_text() == Some("in") {
2260 self.bump();
2261 } else {
2262 self.error("expected `in` in comprehension");
2263 }
2264 self.parse_expr();
2265 if self.at(SyntaxKind::IDENT) && self.current_text() == Some("if") {
2266 self.bump();
2267 self.parse_expr();
2268 }
2269 self.expect(SyntaxKind::R_BRACK);
2270 self.close();
2271 return;
2272 }
2273 // Regular list — wrap the existing first element into a LIST
2274 // node and continue.
2275 self.open_at(outer_ck, SyntaxKind::LIST);
2276 if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_BRACK) {
2277 self.error_recover(
2278 "expected `,` or `]` in list",
2279 &[SyntaxKind::COMMA, SyntaxKind::R_BRACK],
2280 );
2281 self.eat(SyntaxKind::COMMA);
2282 }
2283 self.parse_list_body_tail();
2284 }
2285
2286 /// Consume the remainder of a LIST body (after the optional leading
2287 /// element + comma have already been emitted) up to and including
2288 /// the closing `]`, then close the LIST node.
2289 fn parse_list_body_tail(&mut self) {
2290 while !self.at(SyntaxKind::R_BRACK) && !self.at_end() {
2291 self.parse_expr();
2292 if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_BRACK) {
2293 self.error_recover(
2294 "expected `,` or `]` in list",
2295 &[SyntaxKind::COMMA, SyntaxKind::R_BRACK],
2296 );
2297 self.eat(SyntaxKind::COMMA);
2298 }
2299 }
2300 self.expect(SyntaxKind::R_BRACK);
2301 self.close();
2302 }
2303
2304 /// Text of the current (non-trivia) token, or None at EOI. Used by
2305 /// keyword-tail productions (`for`, `in`, `if`, `match`, `where`,
2306 /// `with`) that the lexer doesn't split out.
2307 fn current_text(&self) -> Option<&'a str> {
2308 let idx = self.pos_skip_trivia();
2309 self.tokens.get(idx).map(|(_, t)| *t)
2310 }
2311
2312 fn parse_dict(&mut self) {
2313 self.open(SyntaxKind::DICT);
2314 self.bump(); // {
2315 while !self.at(SyntaxKind::R_BRACE) && !self.at_end() {
2316 self.parse_dict_field();
2317 if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_BRACE) {
2318 self.error_recover(
2319 "expected `,` or `}` in dict",
2320 &[SyntaxKind::COMMA, SyntaxKind::R_BRACE],
2321 );
2322 self.eat(SyntaxKind::COMMA);
2323 }
2324 }
2325 self.expect(SyntaxKind::R_BRACE);
2326 self.close();
2327 }
2328
2329 fn parse_dict_field(&mut self) {
2330 self.open(SyntaxKind::DICT_FIELD);
2331 // Leading attributes (e.g. `#internal` / `#expect "msg"` /
2332 // `@currency("USD")`) stack above the pair's key. Same
2333 // shape the file root permits.
2334 while self.at(SyntaxKind::HASH) || self.at(SyntaxKind::AT) {
2335 self.parse_attribute();
2336 }
2337 if self.at_end() {
2338 self.close();
2339 return;
2340 }
2341 // Attribute-only field: `#import x from "p", "next": 1` — the
2342 // `#import` directive already consumed its full body, leaving
2343 // the field separator next. Same for a sequence of bare
2344 // directives whose payload is the field itself (e.g.
2345 // `#schema X { ... },`). Close the field here so the surrounding
2346 // dict resumes at the separator.
2347 if matches!(
2348 self.current(),
2349 Some(SyntaxKind::COMMA) | Some(SyntaxKind::R_BRACE)
2350 ) {
2351 self.close();
2352 return;
2353 }
2354 // The key: an ident, a string, or `...` (spread).
2355 if self.at(SyntaxKind::ELLIPSIS) {
2356 self.open(SyntaxKind::SPREAD_EXPR);
2357 self.bump();
2358 // v1.3 typed spread `...<Type> source` — same shape as the
2359 // atom-level spread, but here we sit inside a dict field
2360 // so the source expression can be a richer form.
2361 if self.at(SyntaxKind::LT) {
2362 self.bump();
2363 self.parse_type();
2364 self.expect(SyntaxKind::GT);
2365 }
2366 self.parse_expr();
2367 self.close();
2368 self.close();
2369 return;
2370 }
2371 // Optional leading type hint: `Type key: value` /
2372 // `Type key(params): body`. We commit only when peeking
2373 // suggests a typed-key shape — otherwise the leading run is
2374 // the key itself (e.g. a single identifier). v1.7 tuple types
2375 // (`(Int, String) pair: ...`) take the same slot and are
2376 // detected by a separate `(...)`-leading peek.
2377 if self.peek_is_tuple_typed_dict_key() {
2378 self.parse_tuple_type();
2379 } else if self.peek_is_typed_dict_key() {
2380 self.parse_type();
2381 }
2382 if self.at(SyntaxKind::IDENT) || self.at(SyntaxKind::STRING) {
2383 self.bump();
2384 } else if self.at(SyntaxKind::L_BRACK) {
2385 // Dynamic key `[expr]: value`.
2386 self.bump();
2387 // Optional `<T>` type-hint between `[` and the expression.
2388 if self.at(SyntaxKind::LT) {
2389 self.bump();
2390 self.parse_type();
2391 self.expect(SyntaxKind::GT);
2392 }
2393 self.parse_expr();
2394 self.expect(SyntaxKind::R_BRACK);
2395 } else {
2396 self.error_recover(
2397 "expected dict key",
2398 &[SyntaxKind::COLON, SyntaxKind::COMMA, SyntaxKind::R_BRACE],
2399 );
2400 }
2401 // Method-shorthand closure: `key(params) [-> Ret]: body`.
2402 // Detect via a `(` immediately after the key. We commit to the
2403 // closure interpretation whenever a `(` follows the key, since
2404 // the v1 grammar already reserves that position exclusively
2405 // for the method shorthand.
2406 if self.at(SyntaxKind::L_PAREN) {
2407 // Emit `(params) [-> Ret]` as a CLOSURE_PARAM list now;
2408 // the body that follows the `:` will be wrapped together
2409 // with the params into a CLOSURE node via a checkpoint.
2410 let closure_ck = self.checkpoint();
2411 self.bump(); // (
2412 while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
2413 self.parse_closure_param();
2414 if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_PAREN) {
2415 self.error_recover(
2416 "expected `,` or `)` in closure parameter list",
2417 &[SyntaxKind::COMMA, SyntaxKind::R_PAREN],
2418 );
2419 self.eat(SyntaxKind::COMMA);
2420 }
2421 }
2422 self.expect(SyntaxKind::R_PAREN);
2423 // Optional `-> RetType`.
2424 if self.eat(SyntaxKind::THIN_ARROW) {
2425 self.parse_type();
2426 }
2427 if self.eat(SyntaxKind::COLON) {
2428 self.open_at(closure_ck, SyntaxKind::CLOSURE);
2429 self.parse_expr();
2430 self.close();
2431 } else {
2432 self.error("expected `:` in dict field");
2433 }
2434 } else if self.eat(SyntaxKind::COLON) {
2435 self.parse_expr();
2436 } else {
2437 self.error("expected `:` in dict field");
2438 }
2439 self.close();
2440 }
2441
2442 /// Does the upcoming token stream start with a Type-shaped run
2443 /// followed by an IDENT (or STRING) and then `:` / `(` (i.e. a
2444 /// typed-dict-key, NOT a dotted-path or a bare key)? Conservative
2445 /// — false negatives are fine (the field still parses untyped),
2446 /// but a false positive would consume the key as a type.
2447 fn peek_is_typed_dict_key(&self) -> bool {
2448 // Same logic as peek_is_typed_param, but we also accept STRING
2449 // as the trailing key segment, and we require a following
2450 // `:` or `(` so a dotted-path-as-value doesn't trip us up.
2451 if !self.at(SyntaxKind::IDENT) {
2452 return false;
2453 }
2454 let mut idx = self.pos_skip_trivia() + 1;
2455 let advance_trivia = |i: &mut usize, toks: &[(SyntaxKind, &str)]| {
2456 while *i < toks.len() && toks[*i].0.is_trivia() {
2457 *i += 1;
2458 }
2459 };
2460 advance_trivia(&mut idx, &self.tokens);
2461 while idx < self.tokens.len() && self.tokens[idx].0 == SyntaxKind::DOT {
2462 idx += 1;
2463 advance_trivia(&mut idx, &self.tokens);
2464 if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::IDENT) {
2465 idx += 1;
2466 advance_trivia(&mut idx, &self.tokens);
2467 } else {
2468 return false;
2469 }
2470 }
2471 let saw_generics = self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::LT);
2472 if saw_generics {
2473 let mut depth: i32 = 1;
2474 // Track nested `(` / `)` so a tuple-type argument like
2475 // `List<(Int, String)>` doesn't make the rejection bail
2476 // out the moment it hits a comma or `)`.
2477 let mut paren_depth: i32 = 0;
2478 idx += 1;
2479 while idx < self.tokens.len() && depth > 0 {
2480 match self.tokens[idx].0 {
2481 SyntaxKind::LT => depth += 1,
2482 SyntaxKind::GT => depth -= 1,
2483 SyntaxKind::L_PAREN => paren_depth += 1,
2484 SyntaxKind::R_PAREN if paren_depth > 0 => paren_depth -= 1,
2485 SyntaxKind::L_BRACE
2486 | SyntaxKind::R_BRACE
2487 | SyntaxKind::R_PAREN
2488 | SyntaxKind::FAT_ARROW
2489 | SyntaxKind::THIN_ARROW
2490 | SyntaxKind::COLON
2491 if depth == 1 && paren_depth == 0 =>
2492 {
2493 return false
2494 }
2495 _ => {}
2496 }
2497 idx += 1;
2498 }
2499 if depth != 0 {
2500 return false;
2501 }
2502 advance_trivia(&mut idx, &self.tokens);
2503 }
2504 let saw_question = self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::QUESTION);
2505 if saw_question {
2506 idx += 1;
2507 advance_trivia(&mut idx, &self.tokens);
2508 }
2509 // Now we must see IDENT or STRING (the key) followed by `:`
2510 // or `(`. If neither, the leading run wasn't a type — bail
2511 // and let the surrounding parser treat it as the key itself.
2512 if !matches!(
2513 self.tokens.get(idx).map(|(k, _)| *k),
2514 Some(SyntaxKind::IDENT) | Some(SyntaxKind::STRING)
2515 ) {
2516 return false;
2517 }
2518 let mut after_key = idx + 1;
2519 advance_trivia(&mut after_key, &self.tokens);
2520 let next = self.tokens.get(after_key).map(|(k, _)| *k);
2521 matches!(next, Some(SyntaxKind::COLON) | Some(SyntaxKind::L_PAREN))
2522 }
2523
2524 /// Does the upcoming token stream start with a balanced `(...)`
2525 /// tuple-type prefix followed by an IDENT (or STRING) and then
2526 /// `:` / `(` (i.e. `(Int, String) pair: ...`)? Used by
2527 /// [`parse_dict_field`] to commit to the tuple-type lead, which
2528 /// has to win over the "parens group" interpretation of the same
2529 /// bytes when they appear at the head of a dict field. The
2530 /// balanced paren scan walks past nested generics / nested parens
2531 /// so `List<(Int, String)>` doesn't fool the outer detector.
2532 fn peek_is_tuple_typed_dict_key(&self) -> bool {
2533 if !self.at(SyntaxKind::L_PAREN) {
2534 return false;
2535 }
2536 let lparen_idx = self.pos_skip_trivia();
2537 let Some(after_paren) = self.scan_after_matching_paren(lparen_idx) else {
2538 return false;
2539 };
2540 // Optional trailing `?` after the tuple type.
2541 let mut idx = after_paren;
2542 if self.tokens.get(idx).map(|(k, _)| *k) == Some(SyntaxKind::QUESTION) {
2543 idx += 1;
2544 while idx < self.tokens.len() && self.tokens[idx].0.is_trivia() {
2545 idx += 1;
2546 }
2547 }
2548 // Must see IDENT or STRING (the key), followed by `:` or `(`.
2549 if !matches!(
2550 self.tokens.get(idx).map(|(k, _)| *k),
2551 Some(SyntaxKind::IDENT) | Some(SyntaxKind::STRING)
2552 ) {
2553 return false;
2554 }
2555 let mut after_key = idx + 1;
2556 while after_key < self.tokens.len() && self.tokens[after_key].0.is_trivia() {
2557 after_key += 1;
2558 }
2559 matches!(
2560 self.tokens.get(after_key).map(|(k, _)| *k),
2561 Some(SyntaxKind::COLON) | Some(SyntaxKind::L_PAREN)
2562 )
2563 }
2564
2565 fn parse_call_args(&mut self) {
2566 self.open(SyntaxKind::CALL_ARG);
2567 self.bump(); // (
2568 while !self.at(SyntaxKind::R_PAREN) && !self.at_end() {
2569 self.parse_call_arg();
2570 if !self.eat(SyntaxKind::COMMA) && !self.at(SyntaxKind::R_PAREN) {
2571 self.error_recover(
2572 "expected `,` or `)` in argument list",
2573 &[SyntaxKind::COMMA, SyntaxKind::R_PAREN],
2574 );
2575 self.eat(SyntaxKind::COMMA);
2576 }
2577 }
2578 self.expect(SyntaxKind::R_PAREN);
2579 self.close();
2580 }
2581
2582 /// One argument inside a call's parens. Either positional (a
2583 /// bare expression) or named (`IDENT = expression`). The latter
2584 /// is detected by peeking IDENT-followed-by-EQ — the legacy
2585 /// `parse_call_arg` (`fn_call.rs`) uses the same lookahead. We
2586 /// emit the IDENT + EQ + value expression as siblings of each
2587 /// other under the parent CALL_ARG node so the lowering pass can
2588 /// pick the name back out without re-running token logic.
2589 fn parse_call_arg(&mut self) {
2590 if self.at(SyntaxKind::IDENT) && self.nth(1) == Some(SyntaxKind::EQ) {
2591 // Named: IDENT EQ <expr>.
2592 self.bump(); // name
2593 self.bump(); // =
2594 self.parse_expr();
2595 } else {
2596 self.parse_expr();
2597 }
2598 }
2599}
2600
2601// =====================================================================
2602// Operator precedence (Pratt binding-power table).
2603//
2604// Mirrors the existing precedence chain in `expr.rs`:
2605// 1. or ||
2606// 2. and &&
2607// 3. equality == !=
2608// 4. comparison < > <= >=
2609// 5. (retired) concat ++ — still consumed here for structured
2610// recovery, but `parse_expr_bp` flags it with a migration
2611// diagnostic pointing at `+` (string concatenation operator)
2612// 6. additive + -
2613// 7. multiplicative * / %
2614// 8. pipe |
2615// All operators are left-associative (right_bp = left_bp + 1).
2616// =====================================================================
2617
2618fn infix_bp(kind: SyntaxKind) -> Option<(u8, u8)> {
2619 Some(match kind {
2620 SyntaxKind::PIPE_PIPE => (10, 11),
2621 SyntaxKind::AMP_AMP => (20, 21),
2622 SyntaxKind::EQ_EQ | SyntaxKind::BANG_EQ => (30, 31),
2623 SyntaxKind::LT | SyntaxKind::GT | SyntaxKind::LT_EQ | SyntaxKind::GT_EQ => (40, 41),
2624 SyntaxKind::PLUS_PLUS => (50, 51),
2625 SyntaxKind::PLUS | SyntaxKind::MINUS => (60, 61),
2626 SyntaxKind::STAR | SyntaxKind::SLASH | SyntaxKind::PERCENT => (70, 71),
2627 SyntaxKind::PIPE => (80, 81),
2628 _ => return None,
2629 })
2630}
2631
2632// =====================================================================
2633// rowan `Language::kind_to_raw` is an instance method on a unit type;
2634// our hot inner loops want a `'static`-friendly free function. Wrap it.
2635// =====================================================================
2636
2637trait RawKind {
2638 fn kind_to_raw_static(kind: SyntaxKind) -> rowan::SyntaxKind;
2639}
2640impl RawKind for RelonLanguage {
2641 fn kind_to_raw_static(kind: SyntaxKind) -> rowan::SyntaxKind {
2642 kind.into()
2643 }
2644}
2645
2646#[cfg(test)]
2647mod tests {
2648 use super::*;
2649
2650 fn parse_round_trip(source: &str) -> Parse {
2651 let parsed = parse_cst(source);
2652 let reconstructed = parsed.syntax().text().to_string();
2653 assert_eq!(reconstructed, source, "round-trip mismatch");
2654 parsed
2655 }
2656
2657 #[test]
2658 fn empty_dict() {
2659 let parsed = parse_round_trip("{}");
2660 assert!(!parsed.has_errors());
2661 }
2662
2663 #[test]
2664 fn simple_dict() {
2665 parse_round_trip("{ foo: 1, bar: 2 }");
2666 }
2667
2668 #[test]
2669 fn nested_dict_and_list() {
2670 parse_round_trip("{\n foo: [1, 2, 3],\n bar: { baz: \"hi\" }\n}\n");
2671 }
2672
2673 #[test]
2674 fn reference_path() {
2675 parse_round_trip("{ x: &root.foo.bar[0] }");
2676 }
2677
2678 #[test]
2679 fn binary_expression() {
2680 let parsed = parse_round_trip("{ x: 1 + 2 * 3 }");
2681 assert!(!parsed.has_errors());
2682 // Multiplicative inside additive — verify the BINARY_EXPR
2683 // nesting by looking at the syntax tree.
2684 let syntax = parsed.syntax();
2685 let dict = syntax
2686 .descendants()
2687 .find(|n| n.kind() == SyntaxKind::DICT)
2688 .expect("dict");
2689 let outer_binary = dict
2690 .descendants()
2691 .find(|n| n.kind() == SyntaxKind::BINARY_EXPR)
2692 .expect("outer binary");
2693 // The outer binary is `1 + (2 * 3)`. The right child is
2694 // another BINARY_EXPR.
2695 let inner_binaries: Vec<_> = outer_binary
2696 .descendants()
2697 .filter(|n| n.kind() == SyntaxKind::BINARY_EXPR && *n != outer_binary)
2698 .collect();
2699 assert!(!inner_binaries.is_empty(), "expected nested BINARY_EXPR");
2700 }
2701
2702 #[test]
2703 fn method_shorthand_emits_closure() {
2704 let parsed = parse_round_trip("{ add(a, b): a + b }");
2705 assert!(!parsed.has_errors());
2706 let closures: Vec<_> = parsed
2707 .syntax()
2708 .descendants()
2709 .filter(|n| n.kind() == SyntaxKind::CLOSURE)
2710 .collect();
2711 assert_eq!(closures.len(), 1, "expected exactly one CLOSURE node");
2712 let params: Vec<_> = closures[0]
2713 .descendants()
2714 .filter(|n| n.kind() == SyntaxKind::CLOSURE_PARAM)
2715 .collect();
2716 assert_eq!(params.len(), 2, "expected two CLOSURE_PARAMs");
2717 }
2718
2719 #[test]
2720 fn standalone_paren_closure() {
2721 let parsed = parse_round_trip("{ f: (a, b) => a + b }");
2722 assert!(!parsed.has_errors());
2723 let closures: Vec<_> = parsed
2724 .syntax()
2725 .descendants()
2726 .filter(|n| n.kind() == SyntaxKind::CLOSURE)
2727 .collect();
2728 assert_eq!(closures.len(), 1);
2729 }
2730
2731 #[test]
2732 fn list_comprehension_emits_comprehension_node() {
2733 let parsed = parse_round_trip("{ xs: [x * 2 for x in src if x > 0] }");
2734 assert!(!parsed.has_errors());
2735 let comps: Vec<_> = parsed
2736 .syntax()
2737 .descendants()
2738 .filter(|n| n.kind() == SyntaxKind::COMPREHENSION)
2739 .collect();
2740 assert_eq!(comps.len(), 1);
2741 // The COMPREHENSION should NOT also be a LIST.
2742 let lists: Vec<_> = parsed
2743 .syntax()
2744 .descendants()
2745 .filter(|n| n.kind() == SyntaxKind::LIST)
2746 .collect();
2747 // The dict body is not a list, so the only [...] in source
2748 // becomes a COMPREHENSION — no LIST nodes at top level.
2749 assert!(
2750 lists.is_empty(),
2751 "comprehension `[...]` should not also produce a LIST"
2752 );
2753 }
2754
2755 #[test]
2756 fn match_expression_emits_match_node() {
2757 let parsed = parse_round_trip(
2758 "{ render(item): item match { Image: \"i\", Text: \"t\", _ : \"u\" } }",
2759 );
2760 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2761 let matches: Vec<_> = parsed
2762 .syntax()
2763 .descendants()
2764 .filter(|n| n.kind() == SyntaxKind::MATCH_EXPR)
2765 .collect();
2766 assert_eq!(matches.len(), 1);
2767 let arms: Vec<_> = parsed
2768 .syntax()
2769 .descendants()
2770 .filter(|n| n.kind() == SyntaxKind::MATCH_ARM)
2771 .collect();
2772 assert_eq!(arms.len(), 3);
2773 }
2774
2775 #[test]
2776 fn underscore_match_catch_all_parses_clean() {
2777 // The Rust-style `_` catch-all parses without errors and yields a
2778 // WILDCARD pattern node (the same node `*` used to produce).
2779 let parsed = parse_round_trip("{ render(item): item match { Image: \"i\", _: \"u\" } }");
2780 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2781 let wildcards: Vec<_> = parsed
2782 .syntax()
2783 .descendants()
2784 .filter(|n| n.kind() == SyntaxKind::WILDCARD)
2785 .collect();
2786 assert_eq!(wildcards.len(), 1);
2787 }
2788
2789 #[test]
2790 fn star_in_match_arm_now_errors() {
2791 // `*` is no longer the pattern wildcard — a match catch-all
2792 // spelled `*` is a parse error pointing at the new `_` spelling.
2793 let parsed = parse_round_trip("{ render(item): item match { Image: \"i\", *: \"u\" } }");
2794 assert!(
2795 parsed.has_errors(),
2796 "`*` in a match arm must error (use `_`): {:?}",
2797 parsed.errors
2798 );
2799 assert!(
2800 parsed.errors.iter().any(|e| e.message.contains("`_`")),
2801 "diagnostic should point at `_`: {:?}",
2802 parsed.errors
2803 );
2804 }
2805
2806 #[test]
2807 fn plus_plus_concat_now_errors() {
2808 // `++` was parseable but never executable — string
2809 // concatenation is spelled `+`. The token is still consumed
2810 // (round-trip stays lossless, recovery stays structured) but
2811 // the parse carries a migration diagnostic pointing at `+`.
2812 let parsed = parse_round_trip("{ msg: \"a\" ++ \"b\" }");
2813 assert!(
2814 parsed.has_errors(),
2815 "`++` must error (use `+`): {:?}",
2816 parsed.errors
2817 );
2818 assert!(
2819 parsed
2820 .errors
2821 .iter()
2822 .any(|e| e.message.contains("use `+` to concatenate strings")),
2823 "diagnostic should point at `+`: {:?}",
2824 parsed.errors
2825 );
2826 }
2827
2828 #[test]
2829 fn plus_plus_in_main_body_errors() {
2830 // Same diagnostic through the `#main` body expression path.
2831 let parsed = parse_round_trip("#main(String s) -> String\ns ++ \"!\"\n");
2832 assert!(
2833 parsed.has_errors(),
2834 "`++` in a #main body must error: {:?}",
2835 parsed.errors
2836 );
2837 assert!(
2838 parsed
2839 .errors
2840 .iter()
2841 .any(|e| e.message.contains("use `+` to concatenate strings")),
2842 "diagnostic should point at `+`: {:?}",
2843 parsed.errors
2844 );
2845 }
2846
2847 #[test]
2848 fn underscore_closure_param_parses_clean() {
2849 // A bare `_` is a legal closure parameter name (the Rust-style
2850 // ignore binding). The wildcard lexer split must NOT break it:
2851 // `(acc, _) => acc` parses without errors.
2852 let parsed = parse_round_trip("{ f(n): range(n).reduce(0, (acc, _) => acc) }");
2853 assert!(
2854 !parsed.has_errors(),
2855 "`_` closure param must parse clean: {:?}",
2856 parsed.errors
2857 );
2858 }
2859
2860 #[test]
2861 fn schema_directive_with_body() {
2862 let parsed = parse_round_trip("#schema User { String name: *, Int age: * }\n{ a: 1 }");
2863 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2864 let dirs: Vec<_> = parsed
2865 .syntax()
2866 .descendants()
2867 .filter(|n| n.kind() == SyntaxKind::DIRECTIVE)
2868 .collect();
2869 assert_eq!(dirs.len(), 1);
2870 }
2871
2872 #[test]
2873 fn schema_with_generic_params_and_with_block() {
2874 let parsed = parse_round_trip(
2875 "#schema Result<T, E> { T value: *, E error: * } with { unwrap(): value }\n{ x: 1 }",
2876 );
2877 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2878 }
2879
2880 #[test]
2881 fn import_directive_round_trip() {
2882 let parsed = parse_round_trip("#import string from \"std/string\"\n{ x: 1 }");
2883 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2884 }
2885
2886 #[test]
2887 fn import_with_sha256_integrity_round_trip() {
2888 let parsed =
2889 parse_round_trip("#import lib from \"./lib.relon\" sha256:\"deadbeef\"\n{ x: 1 }");
2890 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2891 }
2892
2893 #[test]
2894 fn import_with_missing_hex_string_reports_error() {
2895 // `sha256:` followed by something that is not a STRING should
2896 // raise a parse error (rather than silently consume tokens).
2897 let parsed = parse_round_trip("#import lib from \"./lib.relon\" sha256: bad\n{ x: 1 }");
2898 assert!(
2899 parsed.has_errors(),
2900 "expected parse error for malformed integrity pin"
2901 );
2902 }
2903
2904 #[test]
2905 fn main_directive_round_trip() {
2906 let parsed = parse_round_trip("#main(User u, Cart cart) -> Result<Order>\n{ x: 1 }");
2907 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2908 }
2909
2910 #[test]
2911 fn f_string_emits_f_string_node() {
2912 let parsed = parse_round_trip(r#"{ msg: f"hello ${name}!" }"#);
2913 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2914 let fs: Vec<_> = parsed
2915 .syntax()
2916 .descendants()
2917 .filter(|n| n.kind() == SyntaxKind::F_STRING)
2918 .collect();
2919 assert_eq!(fs.len(), 1);
2920 let interps: Vec<_> = parsed
2921 .syntax()
2922 .descendants()
2923 .filter(|n| n.kind() == SyntaxKind::F_STRING_INTERPOLATION)
2924 .collect();
2925 assert_eq!(interps.len(), 1);
2926 // Interpolation body should contain a VARIABLE_EXPR for `name`.
2927 let interp = &interps[0];
2928 let vars: Vec<_> = interp
2929 .descendants()
2930 .filter(|n| n.kind() == SyntaxKind::VARIABLE_EXPR)
2931 .collect();
2932 assert!(!vars.is_empty(), "expected VARIABLE_EXPR inside interp");
2933 }
2934
2935 #[test]
2936 fn raw_f_string_round_trip() {
2937 parse_round_trip("{ msg: f#\"raw ${x} text\"# }");
2938 }
2939
2940 #[test]
2941 fn plain_string_still_literal() {
2942 let parsed = parse_round_trip(r#"{ x: "hi" }"#);
2943 let fs: Vec<_> = parsed
2944 .syntax()
2945 .descendants()
2946 .filter(|n| n.kind() == SyntaxKind::F_STRING)
2947 .collect();
2948 assert!(fs.is_empty(), "plain string should not be F_STRING");
2949 }
2950
2951 #[test]
2952 fn where_expression_emits_where_node() {
2953 let parsed = parse_round_trip("{ x: a + b where { a: 1, b: 2 } }");
2954 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2955 let wheres: Vec<_> = parsed
2956 .syntax()
2957 .descendants()
2958 .filter(|n| n.kind() == SyntaxKind::WHERE_EXPR)
2959 .collect();
2960 assert_eq!(wheres.len(), 1);
2961 }
2962
2963 #[test]
2964 fn list_without_for_stays_list() {
2965 let parsed = parse_round_trip("{ xs: [1, 2, 3] }");
2966 assert!(!parsed.has_errors());
2967 let lists: Vec<_> = parsed
2968 .syntax()
2969 .descendants()
2970 .filter(|n| n.kind() == SyntaxKind::LIST)
2971 .collect();
2972 assert_eq!(lists.len(), 1);
2973 }
2974
2975 #[test]
2976 fn generic_type_in_closure_param() {
2977 let parsed = parse_round_trip("{ extract(List<Int> xs, Option<String> sep): xs }");
2978 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
2979 let types: Vec<_> = parsed
2980 .syntax()
2981 .descendants()
2982 .filter(|n| n.kind() == SyntaxKind::TYPE_NODE)
2983 .collect();
2984 // `List<Int>` outer + `Int` nested + `Option<String>` outer +
2985 // `String` nested = 4 TYPE_NODEs.
2986 assert!(
2987 types.len() >= 3,
2988 "expected at least 3 TYPE_NODE, got {}",
2989 types.len()
2990 );
2991 }
2992
2993 #[test]
2994 fn type_suffix_question_is_rejected() {
2995 // Wave A: the type-suffix `?` (`Int?`, `Weather?`, `List<T>?`)
2996 // is no longer valid — optionality is written `Option<T>`. Each
2997 // of these must surface a parse error pointing at `Option<T>`.
2998 for source in [
2999 "#main(Int? x) -> Int\n0\n",
3000 "{ extract(String? sep): sep }",
3001 "{ Weather? w: { a: 1 } }",
3002 "{ x: #brand Weather? { a: 1 } }",
3003 "{ extract(List<Int>? xs): xs }",
3004 ] {
3005 let parsed = parse_cst(source);
3006 assert!(
3007 parsed.has_errors(),
3008 "expected a parse error for type-suffix `?` in {source:?}"
3009 );
3010 assert!(
3011 parsed
3012 .errors
3013 .iter()
3014 .any(|e| e.message.contains("Option<T>")),
3015 "expected an `Option<T>` hint in errors for {source:?}, got {:?}",
3016 parsed.errors
3017 );
3018 }
3019 }
3020
3021 #[test]
3022 fn option_type_in_main_signature_parses_clean() {
3023 // The migration target `Option<T>` parses without errors where
3024 // the old `T?` used to.
3025 let parsed = parse_cst("#main(Option<Int> x) -> Int\n0\n");
3026 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3027 }
3028
3029 #[test]
3030 fn optional_chaining_and_ternary_still_parse() {
3031 // The call-chain `?` (optional chaining) and the ternary `?:`
3032 // must keep parsing cleanly — only the type-suffix `?` is gone.
3033 for source in [
3034 "{ f(a): a?.b }",
3035 "{ f(a): a?[0] }",
3036 "{ f(a): a?.b?.c }",
3037 "{ g(x): x < 0 ? -x : x }",
3038 ] {
3039 let parsed = parse_round_trip(source);
3040 assert!(
3041 !parsed.has_errors(),
3042 "unexpected errors for {source:?}: {:?}",
3043 parsed.errors
3044 );
3045 }
3046 }
3047
3048 #[test]
3049 fn comparison_lt_not_treated_as_generics() {
3050 // The closure-param peek must NOT decide `a < b` is a typed
3051 // param — there's whitespace between `a` and `<`. The dict
3052 // body should be a single binary expression.
3053 let parsed = parse_round_trip("{ f: a < b }");
3054 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3055 let binaries: Vec<_> = parsed
3056 .syntax()
3057 .descendants()
3058 .filter(|n| n.kind() == SyntaxKind::BINARY_EXPR)
3059 .collect();
3060 assert_eq!(binaries.len(), 1, "expected one BINARY_EXPR");
3061 }
3062
3063 #[test]
3064 fn typed_closure_param_records_type_node() {
3065 let parsed = parse_round_trip("{ add(Int a, Int b): a + b }");
3066 assert!(!parsed.has_errors());
3067 let type_nodes: Vec<_> = parsed
3068 .syntax()
3069 .descendants()
3070 .filter(|n| n.kind() == SyntaxKind::TYPE_NODE)
3071 .collect();
3072 assert!(
3073 type_nodes.len() >= 2,
3074 "expected TYPE_NODEs for typed params"
3075 );
3076 }
3077
3078 #[test]
3079 fn comments_round_trip() {
3080 parse_round_trip("// header\n{\n // inner\n x: 1, /* trail */ y: 2\n}\n");
3081 }
3082
3083 #[test]
3084 fn error_recovery_preserves_bytes() {
3085 // Deliberate parse failure: missing colon. The recovery
3086 // wraps `42` in an ERROR node and resyncs to `,`. Source
3087 // bytes are intact end-to-end.
3088 let parsed = parse_round_trip("{ foo 42, bar: 1 }");
3089 assert!(parsed.has_errors(), "expected an error report");
3090 }
3091
3092 #[test]
3093 fn unknown_byte_does_not_crash() {
3094 parse_round_trip("{ x: \u{0000} 1 }");
3095 }
3096
3097 #[test]
3098 fn variant_ctor_emits_variant_node() {
3099 let parsed = parse_round_trip("{ x: Result.Ok { value: 1 } }");
3100 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3101 let vc: Vec<_> = parsed
3102 .syntax()
3103 .descendants()
3104 .filter(|n| n.kind() == SyntaxKind::VARIANT_CTOR)
3105 .collect();
3106 assert_eq!(vc.len(), 1);
3107 }
3108
3109 #[test]
3110 fn variant_ctor_three_segment_path() {
3111 let parsed = parse_round_trip("{ x: Foo.Bar.Baz { field: 1 } }");
3112 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3113 let vc: Vec<_> = parsed
3114 .syntax()
3115 .descendants()
3116 .filter(|n| n.kind() == SyntaxKind::VARIANT_CTOR)
3117 .collect();
3118 assert_eq!(vc.len(), 1);
3119 }
3120
3121 #[test]
3122 fn dotted_access_without_brace_stays_variable() {
3123 // `foo.bar` alone is member access — must NOT become a
3124 // VARIANT_CTOR. Walks the post-fix path the same as before.
3125 let parsed = parse_round_trip("{ x: foo.bar }");
3126 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3127 let vc: Vec<_> = parsed
3128 .syntax()
3129 .descendants()
3130 .filter(|n| n.kind() == SyntaxKind::VARIANT_CTOR)
3131 .collect();
3132 assert!(vc.is_empty(), "single dotted access should not be a ctor");
3133 }
3134
3135 #[test]
3136 fn named_call_args_parse_without_errors() {
3137 let parsed = parse_round_trip("{ y: map(f = g) }");
3138 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3139 // The CALL_ARG node contains the IDENT, EQ, and value side by
3140 // side; the lowering pass groups them back into a `CallArg`.
3141 let call_args: Vec<_> = parsed
3142 .syntax()
3143 .descendants()
3144 .filter(|n| n.kind() == SyntaxKind::CALL_ARG)
3145 .collect();
3146 assert_eq!(call_args.len(), 1);
3147 let has_eq = call_args[0]
3148 .children_with_tokens()
3149 .filter_map(|el| el.into_token())
3150 .any(|t| t.kind() == SyntaxKind::EQ);
3151 assert!(has_eq, "named arg should carry an EQ token");
3152 }
3153
3154 #[test]
3155 fn mixed_positional_and_named_args() {
3156 let parsed = parse_round_trip("{ z: f(1, name = expr, more = 2) }");
3157 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3158 }
3159
3160 #[test]
3161 fn ternary_expression_emits_ternary_node() {
3162 let parsed = parse_round_trip("{ x: a ? 1 : 2 }");
3163 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3164 let ts: Vec<_> = parsed
3165 .syntax()
3166 .descendants()
3167 .filter(|n| n.kind() == SyntaxKind::TERNARY_EXPR)
3168 .collect();
3169 assert_eq!(ts.len(), 1, "expected one TERNARY_EXPR");
3170 }
3171
3172 #[test]
3173 fn ternary_root_no_whitespace() {
3174 // Legacy accepts `true? 1:2` — every `?` / `:` boundary is
3175 // surrounded by `soc0` so adjacent forms parse without spaces.
3176 let parsed = parse_round_trip("true? 1:2");
3177 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3178 }
3179
3180 #[test]
3181 fn ternary_nested_in_else() {
3182 // Right-recursive parse: `a ? 1 : b ? 2 : 3` should produce a
3183 // ternary whose `els` is another ternary.
3184 let parsed = parse_round_trip("{ x: a ? 1 : b ? 2 : 3 }");
3185 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3186 let ts: Vec<_> = parsed
3187 .syntax()
3188 .descendants()
3189 .filter(|n| n.kind() == SyntaxKind::TERNARY_EXPR)
3190 .collect();
3191 assert_eq!(ts.len(), 2);
3192 }
3193
3194 #[test]
3195 fn bare_directive_does_not_consume_next_field() {
3196 // `#internal` is a bare directive; the IDENT after it must
3197 // belong to the next dict field, not to the directive body.
3198 let src = "{ #internal\n field(s): s, next: 1 }";
3199 let parsed = parse_round_trip(src);
3200 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3201 }
3202
3203 #[test]
3204 fn dict_field_can_be_attribute_only() {
3205 // `#import x from "p"` consumes its whole body; the field is
3206 // attribute-only and the `,` belongs to the surrounding dict.
3207 let src = "{ #import x from \"p\", next: 1 }";
3208 let parsed = parse_round_trip(src);
3209 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3210 }
3211
3212 #[test]
3213 fn schema_with_block_emits_method_nodes() {
3214 // Slice-opener for the schema with-block grammar. Two methods
3215 // back-to-back, one carrying a `#derive` pragma and a `Self`
3216 // parameter type.
3217 let src = "#schema Money { Int cents: * } with {\n #derive Equatable\n eq(other: Self) -> Bool: self.cents == other.cents\n}\n{ Money p: { cents: 100 } }\n";
3218 let parsed = parse_round_trip(src);
3219 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3220 let with_blocks: Vec<_> = parsed
3221 .syntax()
3222 .descendants()
3223 .filter(|n| n.kind() == SyntaxKind::SCHEMA_WITH)
3224 .collect();
3225 assert_eq!(with_blocks.len(), 1);
3226 let methods: Vec<_> = with_blocks[0]
3227 .descendants()
3228 .filter(|n| n.kind() == SyntaxKind::SCHEMA_METHOD)
3229 .collect();
3230 assert_eq!(methods.len(), 1);
3231 // The method should contain the `#derive` directive and a
3232 // CLOSURE_PARAM for `other`.
3233 let dirs: Vec<_> = methods[0]
3234 .descendants()
3235 .filter(|n| n.kind() == SyntaxKind::DIRECTIVE)
3236 .collect();
3237 assert_eq!(dirs.len(), 1);
3238 let params: Vec<_> = methods[0]
3239 .descendants()
3240 .filter(|n| n.kind() == SyntaxKind::CLOSURE_PARAM)
3241 .collect();
3242 assert_eq!(params.len(), 1);
3243 }
3244
3245 #[test]
3246 fn schema_with_block_native_method_skips_body() {
3247 // `#native` method has no `: body` — just the signature.
3248 let src =
3249 "#schema Doc { String text: * } with {\n #native\n render() -> String\n}\n{}\n";
3250 let parsed = parse_round_trip(src);
3251 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3252 }
3253
3254 #[test]
3255 fn tuple_index_access_round_trips() {
3256 // v1.8 positional access `xs.0` — number after the dot is a
3257 // valid path tail, alongside identifier-style `xs.field`.
3258 let parsed = parse_round_trip("{ Int head: xs.0 }");
3259 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3260 }
3261
3262 #[test]
3263 fn type_atom_for_brand_directive_body() {
3264 // `#brand Dict<String, Int> { ... }` — the brand directive's
3265 // body is a type-tagged dict. The leading IDENT `Dict` (a
3266 // known type head) must lower into a TYPE_NODE so the
3267 // generics aren't mistaken for binary `<` / `>` operators.
3268 let src = "{ counters: #brand Dict<String, Int> { hits: 1 } }";
3269 let parsed = parse_round_trip(src);
3270 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3271 let types: Vec<_> = parsed
3272 .syntax()
3273 .descendants()
3274 .filter(|n| n.kind() == SyntaxKind::TYPE_NODE)
3275 .collect();
3276 assert!(!types.is_empty(), "expected a TYPE_NODE for Dict<...>");
3277 }
3278
3279 #[test]
3280 fn typed_spread_round_trips() {
3281 // v1.3 typed spread `...<Type> expr`. The `<Type>` annotation
3282 // lands inside the SPREAD_EXPR; the source expression follows.
3283 let parsed = parse_round_trip("{ val: { ...<Extra> base } }");
3284 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3285 let spreads: Vec<_> = parsed
3286 .syntax()
3287 .descendants()
3288 .filter(|n| n.kind() == SyntaxKind::SPREAD_EXPR)
3289 .collect();
3290 assert_eq!(spreads.len(), 1, "expected one SPREAD_EXPR");
3291 let types: Vec<_> = spreads[0]
3292 .descendants()
3293 .filter(|n| n.kind() == SyntaxKind::TYPE_NODE)
3294 .collect();
3295 assert!(!types.is_empty(), "typed spread should carry a TYPE_NODE");
3296 }
3297
3298 #[test]
3299 fn tuple_type_in_dict_field_round_trips() {
3300 // v1.7 tuple types in the type-hint slot of a dict field.
3301 let parsed = parse_round_trip("{ (Int, String) pair: (42, \"hello\") }");
3302 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3303 let tts: Vec<_> = parsed
3304 .syntax()
3305 .descendants()
3306 .filter(|n| n.kind() == SyntaxKind::TUPLE_TYPE)
3307 .collect();
3308 assert_eq!(tts.len(), 1, "expected one TUPLE_TYPE");
3309 }
3310
3311 #[test]
3312 fn tuple_type_inside_generic() {
3313 let parsed = parse_round_trip("{ List<(Int, String)> rows: [(1, \"a\")] }");
3314 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3315 let tts: Vec<_> = parsed
3316 .syntax()
3317 .descendants()
3318 .filter(|n| n.kind() == SyntaxKind::TUPLE_TYPE)
3319 .collect();
3320 assert_eq!(tts.len(), 1);
3321 }
3322
3323 #[test]
3324 fn tuple_type_zero_and_one() {
3325 // Zero-tuple `()` and one-tuple `(T,)` both round-trip
3326 // cleanly. The trailing comma in the one-tuple matters for the
3327 // typed-AST layer (it disambiguates from `(T)` parens), but the
3328 // CST keeps the bytes verbatim.
3329 let parsed = parse_round_trip("{ () unit: [], (Int,) one: [1] }");
3330 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3331 }
3332
3333 #[test]
3334 fn decorator_dotted_name_round_trips() {
3335 // `@ensure.int` / `@ensure.at_least(1024)` — dotted decorator
3336 // names appear in the corpus alongside plain `@name(...)`.
3337 let src = "{ @ensure.int\n @ensure.at_least(1024)\n \"port\": 80 }";
3338 let parsed = parse_round_trip(src);
3339 assert!(!parsed.has_errors(), "errors: {:?}", parsed.errors);
3340 }
3341
3342 /// Monotonic floor on how many checked-in `.relon` fixtures parse
3343 /// without ANY ERROR nodes. Each P2 slice MUST raise this number;
3344 /// regressions need a deliberate, recorded reason.
3345 ///
3346 /// The floor starts at 30 (closures slice). Bump it as more P2
3347 /// grammar lands.
3348 #[test]
3349 fn fixtures_clean_parse_floor() {
3350 // Each P2 slice bumps the floor. At slice 1 (closures) we hit
3351 // ~60 of ~210 — the directive / match / where / type slices
3352 // pushed this to 135. After the P4-prep grammar gaps
3353 // (ternary / named call args / variant ctor) we reach 148.
3354 // Directive-shape dispatch + attribute-only dict fields pushed
3355 // it to 157 (the next P2 slices target tuple types, typed
3356 // spreads, and the schema with-block named-param method
3357 // grammar). Tuple types `(T1, T2)` brought the floor to 165.
3358 // Typed spreads `...<Type> expr` brought it to 170.
3359 // Schema with-block structured method nodes brought it to 198.
3360 // Tuple-index `.N` access, type-atom recognition for
3361 // `#brand Dict<K, V> { ... }` / `Weather? { ... }`,
3362 // Enum-with-struct-variant inside generic args, and
3363 // expression-level leading attributes brought it to 208.
3364 // The remaining two `.relon` files
3365 // (`with_block_invalid/*.relon`) are intentional parse-error
3366 // fixtures used by the legacy parser's negative test suite.
3367 const FLOOR: usize = 208;
3368 let clean = fixture_clean_parse_count();
3369 eprintln!("[parser] fixtures clean-parse count: {clean}");
3370 assert!(
3371 clean >= FLOOR,
3372 "regressed clean-parse count: floor={FLOOR}, actual={clean}",
3373 );
3374 }
3375
3376 fn fixture_clean_parse_count() -> usize {
3377 use std::fs;
3378 use std::path::PathBuf;
3379
3380 let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
3381 let workspace_root = crate_dir
3382 .parent()
3383 .and_then(|p| p.parent())
3384 .expect("workspace root")
3385 .to_path_buf();
3386 let mut files = Vec::new();
3387 walk(&workspace_root, &mut files);
3388 files.retain(|p| !p.to_string_lossy().contains("/target/"));
3389 let mut clean = 0usize;
3390 for path in files {
3391 let source = fs::read_to_string(&path).unwrap_or_default();
3392 if source.is_empty() {
3393 continue;
3394 }
3395 let parsed = parse_cst(&source);
3396 if !parsed.has_errors() {
3397 clean += 1;
3398 }
3399 }
3400 clean
3401 }
3402
3403 /// The strongest invariant: every checked-in `.relon` file
3404 /// round-trips through the CST byte-exact. Some may still have
3405 /// parse errors (the v2 grammar doesn't cover every construct
3406 /// yet) — that's expected and tolerated. What MUST hold is the
3407 /// lossless tree property.
3408 #[test]
3409 fn every_fixture_round_trips_through_cst() {
3410 use std::fs;
3411 use std::path::PathBuf;
3412
3413 let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
3414 let workspace_root = crate_dir
3415 .parent()
3416 .and_then(|p| p.parent())
3417 .expect("workspace root")
3418 .to_path_buf();
3419 let mut files = Vec::new();
3420 walk(&workspace_root, &mut files);
3421 files.retain(|p| !p.to_string_lossy().contains("/target/"));
3422 assert!(!files.is_empty());
3423 for path in files {
3424 let source = fs::read_to_string(&path).unwrap_or_else(|e| panic!("read {path:?}: {e}"));
3425 let parsed = parse_cst(&source);
3426 let reconstructed = parsed.syntax().text().to_string();
3427 assert_eq!(reconstructed, source, "round-trip mismatch on {path:?}");
3428 }
3429 }
3430
3431 fn walk(dir: &std::path::Path, out: &mut Vec<std::path::PathBuf>) {
3432 let Ok(read) = std::fs::read_dir(dir) else {
3433 return;
3434 };
3435 for entry in read.flatten() {
3436 let p = entry.path();
3437 if p.is_dir() {
3438 let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
3439 if matches!(name, "target" | "node_modules" | ".git") {
3440 continue;
3441 }
3442 walk(&p, out);
3443 } else if p.extension().and_then(|e| e.to_str()) == Some("relon") {
3444 out.push(p);
3445 }
3446 }
3447 }
3448}