1use std::cell::Cell;
21use std::sync::Arc;
22
23use cstree::Syntax;
24use cstree::build::GreenNodeBuilder;
25use cstree::green::GreenNode;
26use cstree::interning::TokenInterner;
27use cstree::syntax::ResolvedNode;
28use text_size::{TextRange, TextSize};
29
30use crate::SyntaxKind;
31use crate::lexer::{RawToken, tokenize};
32use crate::prepass::run as run_prepass;
33
34mod grammar;
35
36#[derive(Debug, Clone)]
39pub struct Parse {
40 green: GreenNode,
41 interner: Arc<TokenInterner>,
42 errors: Vec<SyntaxError>,
43}
44
45impl Parse {
46 #[must_use]
49 pub fn syntax_node(&self) -> ResolvedNode<SyntaxKind> {
50 ResolvedNode::new_root_with_resolver(self.green.clone(), Arc::clone(&self.interner))
51 }
52
53 #[must_use]
55 pub fn errors(&self) -> &[SyntaxError] {
56 &self.errors
57 }
58
59 #[must_use]
61 pub fn green(&self) -> &GreenNode {
62 &self.green
63 }
64
65 #[must_use]
68 pub fn debug_tree(&self) -> String {
69 cstree::syntax::SyntaxNode::<SyntaxKind>::new_root(self.green.clone())
70 .debug(&self.interner, true)
71 }
72}
73
74impl PartialEq for Parse {
81 fn eq(&self, other: &Self) -> bool {
82 self.green == other.green && self.errors == other.errors
83 }
84}
85impl Eq for Parse {}
86
87#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct SyntaxError {
90 pub range: TextRange,
92 pub message: String,
94}
95
96#[must_use]
98pub fn parse(text: &str) -> Parse {
99 let raw = tokenize(text);
100 let (tokens, indent_diags) = run_prepass(&raw, text);
101
102 let mut p = Parser::new(text, &tokens);
103 p.source_file();
104 let Parser {
105 events, mut errors, ..
106 } = p;
107
108 errors.extend(indent_diags.into_iter().map(|d| SyntaxError {
109 range: d.range,
110 message: d.message,
111 }));
112
113 let (green, interner) = build_tree(&events, &tokens, text);
114 Parse {
115 green,
116 interner,
117 errors,
118 }
119}
120
121#[derive(Debug, Clone, Copy)]
124enum Event {
125 Open { kind: SyntaxKind },
126 Close,
127 Advance,
128}
129
130struct Marker {
132 pos: usize,
133}
134
135#[derive(Clone, Copy)]
138struct MarkClosed {
139 pos: usize,
140}
141
142const FUEL: u32 = 256;
146
147struct Parser<'s> {
148 src: &'s str,
149 tokens: &'s [RawToken],
150 nontrivia: Vec<usize>,
152 pos: usize,
154 fuel: Cell<u32>,
155 events: Vec<Event>,
156 errors: Vec<SyntaxError>,
157}
158
159impl<'s> Parser<'s> {
160 fn new(src: &'s str, tokens: &'s [RawToken]) -> Self {
161 let nontrivia = tokens
162 .iter()
163 .enumerate()
164 .filter(|(_, t)| !t.kind.is_trivia())
165 .map(|(i, _)| i)
166 .collect();
167 Self {
168 src,
169 tokens,
170 nontrivia,
171 pos: 0,
172 fuel: Cell::new(FUEL),
173 events: Vec::new(),
174 errors: Vec::new(),
175 }
176 }
177
178 fn nth(&self, n: usize) -> SyntaxKind {
180 assert!(self.fuel.get() > 0, "parser stuck at position {}", self.pos);
181 self.fuel.set(self.fuel.get() - 1);
182 self.nontrivia
183 .get(self.pos + n)
184 .map_or(SyntaxKind::Eof, |&i| self.tokens[i].kind)
185 }
186
187 fn at(&self, kind: SyntaxKind) -> bool {
188 self.nth(0) == kind
189 }
190
191 fn at_any(&self, kinds: &[SyntaxKind]) -> bool {
192 kinds.contains(&self.nth(0))
193 }
194
195 fn eof(&self) -> bool {
196 self.pos >= self.nontrivia.len()
197 }
198
199 fn cur_range(&self) -> TextRange {
201 self.nontrivia.get(self.pos).map_or_else(
202 || TextRange::empty(TextSize::of(self.src)),
203 |&i| self.tokens[i].range,
204 )
205 }
206
207 fn cur_text(&self) -> &str {
210 self.nontrivia
211 .get(self.pos)
212 .map_or("", |&i| &self.src[self.tokens[i].range])
213 }
214
215 fn advance(&mut self) {
216 if self.eof() {
220 return;
221 }
222 self.fuel.set(FUEL);
223 self.events.push(Event::Advance);
224 self.pos += 1;
225 }
226
227 fn open(&mut self) -> Marker {
228 let m = Marker {
229 pos: self.events.len(),
230 };
231 self.events.push(Event::Open {
232 kind: SyntaxKind::Tombstone,
233 });
234 m
235 }
236
237 #[allow(clippy::needless_pass_by_value)]
240 fn close(&mut self, m: Marker, kind: SyntaxKind) -> MarkClosed {
241 self.events[m.pos] = Event::Open { kind };
242 self.events.push(Event::Close);
243 MarkClosed { pos: m.pos }
244 }
245
246 fn open_before(&mut self, m: MarkClosed) -> Marker {
249 self.events.insert(
250 m.pos,
251 Event::Open {
252 kind: SyntaxKind::Tombstone,
253 },
254 );
255 Marker { pos: m.pos }
256 }
257
258 fn eat(&mut self, kind: SyntaxKind) -> bool {
259 if self.at(kind) {
260 self.advance();
261 true
262 } else {
263 false
264 }
265 }
266
267 fn expect(&mut self, kind: SyntaxKind) {
269 if self.eat(kind) {
270 return;
271 }
272 self.error(format!("expected {kind:?}"));
273 }
274
275 fn error(&mut self, message: String) {
277 self.errors.push(SyntaxError {
278 range: self.cur_range(),
279 message,
280 });
281 }
282
283 fn advance_with_error(&mut self, message: &str) -> MarkClosed {
287 let m = self.open();
288 self.error(message.to_owned());
289 if !self.eof() {
290 self.advance();
291 }
292 self.close(m, SyntaxKind::ErrorNode)
293 }
294}
295
296fn build_tree(events: &[Event], tokens: &[RawToken], src: &str) -> (GreenNode, Arc<TokenInterner>) {
300 let mut builder: GreenNodeBuilder<'static, 'static, SyntaxKind> = GreenNodeBuilder::new();
301 let mut tok = 0usize;
302 let mut depth: u32 = 0;
303
304 for event in events {
305 match *event {
306 Event::Open { kind } => {
307 if kind == SyntaxKind::Tombstone {
308 continue; }
310 depth += 1;
311 builder.start_node(kind);
312 }
313 Event::Close => {
314 depth -= 1;
315 if depth == 0 {
316 while tok < tokens.len() {
319 emit(&mut builder, tokens[tok], src);
320 tok += 1;
321 }
322 }
323 builder.finish_node();
324 }
325 Event::Advance => {
326 while tok < tokens.len() && tokens[tok].kind.is_trivia() {
327 emit(&mut builder, tokens[tok], src);
328 tok += 1;
329 }
330 if tok < tokens.len() {
331 emit(&mut builder, tokens[tok], src);
332 tok += 1;
333 }
334 }
335 }
336 }
337
338 let (green, cache) = builder.finish();
339 let interner = cache
340 .expect("a builder created with `new()` owns its cache")
341 .into_interner()
342 .expect("the cache owns its interner");
343 (green, Arc::new(interner))
344}
345
346fn emit(builder: &mut GreenNodeBuilder<'static, 'static, SyntaxKind>, t: RawToken, src: &str) {
350 if <SyntaxKind as Syntax>::static_text(t.kind).is_some() {
351 builder.static_token(t.kind);
352 } else {
353 builder.token(t.kind, &src[t.range]);
354 }
355}
356
357#[cfg(test)]
358mod tests {
359 use super::*;
360
361 fn round_trips(src: &str) {
362 let parse = parse(src);
363 assert_eq!(
364 parse.syntax_node().to_string(),
365 src,
366 "round-trip mismatch for {src:?}",
367 );
368 }
369
370 #[test]
371 fn round_trips_a_function() {
372 round_trips("func f():\n\tpass\n");
373 }
374
375 #[test]
376 fn round_trips_inline_function() {
377 round_trips("func square(a): return a\n");
378 }
379
380 #[test]
381 fn round_trips_with_trivia() {
382 round_trips("## doc\nfunc _ready() -> void:\n\tpass\n\n# trailing comment\n");
383 }
384
385 #[test]
386 fn round_trips_multiple_functions() {
387 round_trips("func a():\n\tpass\nfunc b():\n\tpass\n");
388 }
389
390 #[test]
391 fn round_trips_empty_and_blank() {
392 round_trips("");
393 round_trips("\n\n");
394 round_trips("# only a comment\n");
395 }
396
397 #[test]
398 fn produces_expected_top_level_shape() {
399 let parse = parse("func f():\n\tpass\n");
400 let root = parse.syntax_node();
401 assert_eq!(root.kind(), SyntaxKind::SourceFile);
402 let func = root
403 .children()
404 .find(|n| n.kind() == SyntaxKind::FuncDecl)
405 .expect("a FuncDecl child");
406 assert!(func.children().any(|n| n.kind() == SyntaxKind::Block));
407 }
408
409 fn node_sexpr(node: &ResolvedNode<SyntaxKind>) -> String {
412 let mut s = format!("({:?}", node.kind());
413 for child in node.children() {
414 s.push(' ');
415 s.push_str(&node_sexpr(child));
416 }
417 s.push(')');
418 s
419 }
420
421 fn structure(src: &str) -> String {
422 node_sexpr(&parse(src).syntax_node())
423 }
424
425 #[test]
426 fn precedence_factor_binds_tighter_than_add() {
427 assert_eq!(
429 structure("var x = 1 + 2 * 3\n"),
430 "(SourceFile (VarDecl (Name) (BinExpr (Literal) (BinExpr (Literal) (Literal)))))"
431 );
432 assert_eq!(
434 structure("var x = 1 * 2 + 3\n"),
435 "(SourceFile (VarDecl (Name) (BinExpr (BinExpr (Literal) (Literal)) (Literal))))"
436 );
437 }
438
439 #[test]
440 fn power_is_left_associative() {
441 assert_eq!(
443 structure("var x = 2 ** 3 ** 4\n"),
444 "(SourceFile (VarDecl (Name) (BinExpr (BinExpr (Literal) (Literal)) (Literal))))"
445 );
446 }
447
448 #[test]
449 fn unary_minus_then_power() {
450 assert_eq!(
452 structure("var x = -2 ** 2\n"),
453 "(SourceFile (VarDecl (Name) (UnaryExpr (BinExpr (Literal) (Literal)))))"
454 );
455 }
456
457 #[test]
458 fn ternary_is_right_associative() {
459 assert_eq!(
460 structure("var x = a if c else b\n"),
461 "(SourceFile (VarDecl (Name) (TernaryExpr (NameRef) (NameRef) (NameRef))))"
462 );
463 }
464
465 #[test]
466 fn postfix_chain_call_field_index() {
467 assert_eq!(
469 structure("var x = a.b().c[0]\n"),
470 "(SourceFile (VarDecl (Name) (IndexExpr (FieldExpr (CallExpr (FieldExpr (NameRef) \
471 (NameRef)) (ArgList)) (NameRef)) (Literal))))"
472 );
473 }
474
475 #[test]
476 fn leading_utf8_bom_is_trivia_not_an_error() {
477 let src = "\u{feff}class_name Foo\nextends Node\n";
480 let parse = parse(src);
481 assert_eq!(
482 parse.syntax_node().to_string(),
483 src,
484 "BOM file must round-trip byte-for-byte"
485 );
486 assert!(
487 parse.errors().is_empty(),
488 "BOM-prefixed file should parse clean: {:?}",
489 parse.errors()
490 );
491 assert!(
493 structure(src).starts_with("(SourceFile (ClassNameDecl"),
494 "{}",
495 structure(src)
496 );
497 }
498
499 #[test]
500 fn multiline_lambda_does_not_absorb_following_paren_line() {
501 let src = "func f():\n\tvar cb := func():\n\t\treturn 1\n\t(self).process()\n";
506 let st = structure(src);
507 assert!(
508 st.contains("(VarDecl (Name) (LambdaExpr"),
509 "lambda should be the var initializer, standalone: {st}"
510 );
511 assert!(
512 !st.contains("CallExpr (LambdaExpr"),
513 "the following `(` line must not be absorbed as a call on the lambda: {st}"
514 );
515 assert!(
517 st.contains("(ExprStmt (CallExpr (FieldExpr (ParenExpr"),
518 "the `(self).process()` line should be its own statement: {st}"
519 );
520 round_trips(src);
521 }
522
523 #[test]
524 fn inline_lambda_still_chains_postfix() {
525 let src = "var x = (func(): return 1).call()\n";
528 let st = structure(src);
529 assert!(
530 st.contains("CallExpr (FieldExpr (ParenExpr (LambdaExpr"),
531 "inline lambda should still accept a postfix chain: {st}"
532 );
533 round_trips(src);
534 }
535
536 #[test]
537 fn statement_level_annotation_in_a_body_parses_clean() {
538 let src = "func f():\n\t@warning_ignore(\"integer_division\")\n\tvar x := 1 / 2\n";
542 let parse = parse(src);
543 assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
544 round_trips(src);
545 }
546
547 #[test]
548 fn multiline_lambda_arg_with_dedented_closer_parses_clean() {
549 let src = "func f():\n\tobj.call(func():\n\t\t\tbody()\n\t\t)\n";
553 let parse = parse(src);
554 assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
555 round_trips(src);
556 }
557
558 #[test]
559 fn multiline_lambda_body_ending_at_a_comma_parses_clean() {
560 let src = "func f():\n\tobj.call(\n\t\tfunc(v):\n\t\t\tuse(v), 0.0, 1.0)\n";
564 let parse = parse(src);
565 assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
566 round_trips(src);
567 }
568
569 const CORPUS: &str = r#"@tool
572class_name Player extends CharacterBody2D
573## A documented player controller.
574
575const SPEED := 300.0
576@export var health: int = 100
577@export_range(0, 100) var armor := 0
578static var instances: Array[Player] = []
579
580enum State { IDLE, RUNNING, JUMPING = 10 }
581
582signal died(reason: String)
583
584var _vel: Vector2 = Vector2.ZERO:
585 get:
586 return _vel
587 set(value):
588 _vel = value
589
590class Inner extends RefCounted:
591 var x = 1
592 func helper() -> int:
593 return x * 2
594
595func _ready() -> void:
596 var node := $Sprite2D
597 var unique = %HealthBar
598 add_child(preload("res://thing.tscn").instantiate())
599 for i in range(0, 10):
600 if i % 2 == 0 and i > 0:
601 print(i, " even")
602 elif i == 5:
603 continue
604 else:
605 pass
606 while health > 0:
607 health -= 1
608 match State.IDLE:
609 State.IDLE, State.RUNNING:
610 pass
611 [var first, ..]:
612 print(first)
613 {"key": var v} when v > 0:
614 print(v)
615 _:
616 breakpoint
617 var cb := func(a: int, b := 2) -> int: return a + b
618 var ok = node is Node2D
619 var cast = node as Sprite2D
620 assert(health >= 0, "negative health")
621 died.emit("test")
622"#;
623
624 #[test]
625 fn corpus_round_trips_byte_for_byte() {
626 round_trips(CORPUS);
627 }
628
629 #[test]
630 fn corpus_parses_without_unexpected_errors() {
631 let parse = parse(CORPUS);
633 assert!(
634 parse.errors().is_empty(),
635 "unexpected parse errors:\n{:#?}",
636 parse.errors()
637 );
638 }
639
640 #[test]
641 fn inline_if_elif_else_clauses_attach() {
642 let src = "func f():\n\tif a: x = 1\n\telif b: x = 2\n\telse: x = 3\n";
647 let parse = parse(src);
648 assert_eq!(parse.syntax_node().to_string(), src, "lossless");
649 assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
650 let root = parse.syntax_node();
651 let if_stmt = root
652 .descendants()
653 .find(|n| n.kind() == SyntaxKind::IfStmt)
654 .expect("an IfStmt node");
655 assert!(
656 if_stmt
657 .descendants()
658 .any(|n| n.kind() == SyntaxKind::ElifClause),
659 "elif clause attached to the if"
660 );
661 assert!(
662 if_stmt
663 .descendants()
664 .any(|n| n.kind() == SyntaxKind::ElseClause),
665 "else clause attached to the if"
666 );
667 }
668
669 #[test]
670 fn soft_keyword_names_parse() {
671 let src = "static func match(when: bool) -> int:\n\tvar r = RUIRouteMatcher.match(when)\n\treturn when\n";
675 let parse = parse(src);
676 assert_eq!(parse.syntax_node().to_string(), src, "lossless");
677 assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
678 }
679
680 #[test]
681 fn multiline_lambda_with_trailing_call_paren() {
682 let src = "func f():\n\tt.connect(func():\n\t\tif ok:\n\t\t\tp.free())\n";
685 let parse = parse(src);
686 assert_eq!(parse.syntax_node().to_string(), src, "lossless");
687 assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
688 }
689
690 #[test]
691 fn multiline_lambda_in_call_argument_parses() {
692 let src = "func f():\n\tcb(func(a, b):\n\t\treturn a + b\n\t)\n";
694 let parse = parse(src);
695 assert_eq!(parse.syntax_node().to_string(), src, "lossless");
696 assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
697 let root = parse.syntax_node();
698 let lambda = root
699 .descendants()
700 .find(|n| n.kind() == SyntaxKind::LambdaExpr)
701 .expect("a LambdaExpr node");
702 let block = lambda
703 .children()
704 .find(|n| n.kind() == SyntaxKind::Block)
705 .expect("the lambda body Block");
706 assert!(
707 block
708 .descendants()
709 .any(|n| n.kind() == SyntaxKind::ReturnStmt),
710 "the lambda body contains the return statement"
711 );
712 }
713
714 #[test]
715 fn single_line_lambda_in_call_argument_parses() {
716 let src = "var m = arr.map(func(x): x * 2)\n";
718 let parse = parse(src);
719 assert_eq!(parse.syntax_node().to_string(), src, "lossless");
720 assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
721 assert!(
722 parse
723 .syntax_node()
724 .descendants()
725 .any(|n| n.kind() == SyntaxKind::LambdaExpr),
726 "a LambdaExpr node"
727 );
728 }
729
730 #[test]
731 fn broken_code_recovers_and_round_trips() {
732 let src = "func ok():\n\tpass\nfunc bad(:\n\tpass\nfunc also_ok():\n\tpass\n";
735 let parse = parse(src);
736 assert_eq!(
737 parse.syntax_node().to_string(),
738 src,
739 "recovery must stay lossless"
740 );
741 assert!(!parse.errors().is_empty(), "expected a syntax error");
742 let funcs = parse
744 .syntax_node()
745 .children()
746 .filter(|n| n.kind() == SyntaxKind::FuncDecl)
747 .count();
748 assert!(
749 funcs >= 2,
750 "siblings should survive a broken declaration, got {funcs}"
751 );
752 }
753
754 #[test]
755 fn golden_small_class() {
756 let parse = parse("class_name Foo\nvar x := 1\n");
757 expect_test::expect_file!["../test_data/golden/small_class.cst"]
758 .assert_eq(&parse.debug_tree());
759 }
760}