use std::cell::Cell;
use std::sync::Arc;
use cstree::Syntax;
use cstree::build::GreenNodeBuilder;
use cstree::green::GreenNode;
use cstree::interning::TokenInterner;
use cstree::syntax::ResolvedNode;
use text_size::{TextRange, TextSize};
use crate::SyntaxKind;
use crate::lexer::{RawToken, tokenize};
use crate::prepass::run as run_prepass;
mod grammar;
#[derive(Debug, Clone)]
pub struct Parse {
green: GreenNode,
interner: Arc<TokenInterner>,
errors: Vec<SyntaxError>,
}
impl Parse {
#[must_use]
pub fn syntax_node(&self) -> ResolvedNode<SyntaxKind> {
ResolvedNode::new_root_with_resolver(self.green.clone(), Arc::clone(&self.interner))
}
#[must_use]
pub fn errors(&self) -> &[SyntaxError] {
&self.errors
}
#[must_use]
pub fn green(&self) -> &GreenNode {
&self.green
}
#[must_use]
pub fn debug_tree(&self) -> String {
cstree::syntax::SyntaxNode::<SyntaxKind>::new_root(self.green.clone())
.debug(&self.interner, true)
}
}
impl PartialEq for Parse {
fn eq(&self, other: &Self) -> bool {
self.green == other.green && self.errors == other.errors
}
}
impl Eq for Parse {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SyntaxError {
pub range: TextRange,
pub message: String,
}
#[must_use]
pub fn parse(text: &str) -> Parse {
let raw = tokenize(text);
let (tokens, indent_diags) = run_prepass(&raw, text);
let mut p = Parser::new(text, &tokens);
p.source_file();
let Parser {
events, mut errors, ..
} = p;
errors.extend(indent_diags.into_iter().map(|d| SyntaxError {
range: d.range,
message: d.message,
}));
let (green, interner) = build_tree(&events, &tokens, text);
Parse {
green,
interner,
errors,
}
}
#[derive(Debug, Clone, Copy)]
enum Event {
Open { kind: SyntaxKind },
Close,
Advance,
}
struct Marker {
pos: usize,
}
#[derive(Clone, Copy)]
struct MarkClosed {
pos: usize,
}
const FUEL: u32 = 256;
struct Parser<'s> {
src: &'s str,
tokens: &'s [RawToken],
nontrivia: Vec<usize>,
pos: usize,
fuel: Cell<u32>,
events: Vec<Event>,
errors: Vec<SyntaxError>,
}
impl<'s> Parser<'s> {
fn new(src: &'s str, tokens: &'s [RawToken]) -> Self {
let nontrivia = tokens
.iter()
.enumerate()
.filter(|(_, t)| !t.kind.is_trivia())
.map(|(i, _)| i)
.collect();
Self {
src,
tokens,
nontrivia,
pos: 0,
fuel: Cell::new(FUEL),
events: Vec::new(),
errors: Vec::new(),
}
}
fn nth(&self, n: usize) -> SyntaxKind {
assert!(self.fuel.get() > 0, "parser stuck at position {}", self.pos);
self.fuel.set(self.fuel.get() - 1);
self.nontrivia
.get(self.pos + n)
.map_or(SyntaxKind::Eof, |&i| self.tokens[i].kind)
}
fn at(&self, kind: SyntaxKind) -> bool {
self.nth(0) == kind
}
fn at_any(&self, kinds: &[SyntaxKind]) -> bool {
kinds.contains(&self.nth(0))
}
fn eof(&self) -> bool {
self.pos >= self.nontrivia.len()
}
fn cur_range(&self) -> TextRange {
self.nontrivia.get(self.pos).map_or_else(
|| TextRange::empty(TextSize::of(self.src)),
|&i| self.tokens[i].range,
)
}
fn cur_text(&self) -> &str {
self.nontrivia
.get(self.pos)
.map_or("", |&i| &self.src[self.tokens[i].range])
}
fn advance(&mut self) {
if self.eof() {
return;
}
self.fuel.set(FUEL);
self.events.push(Event::Advance);
self.pos += 1;
}
fn open(&mut self) -> Marker {
let m = Marker {
pos: self.events.len(),
};
self.events.push(Event::Open {
kind: SyntaxKind::Tombstone,
});
m
}
#[allow(clippy::needless_pass_by_value)]
fn close(&mut self, m: Marker, kind: SyntaxKind) -> MarkClosed {
self.events[m.pos] = Event::Open { kind };
self.events.push(Event::Close);
MarkClosed { pos: m.pos }
}
fn open_before(&mut self, m: MarkClosed) -> Marker {
self.events.insert(
m.pos,
Event::Open {
kind: SyntaxKind::Tombstone,
},
);
Marker { pos: m.pos }
}
fn eat(&mut self, kind: SyntaxKind) -> bool {
if self.at(kind) {
self.advance();
true
} else {
false
}
}
fn expect(&mut self, kind: SyntaxKind) {
if self.eat(kind) {
return;
}
self.error(format!("expected {kind:?}"));
}
fn error(&mut self, message: String) {
self.errors.push(SyntaxError {
range: self.cur_range(),
message,
});
}
fn advance_with_error(&mut self, message: &str) -> MarkClosed {
let m = self.open();
self.error(message.to_owned());
if !self.eof() {
self.advance();
}
self.close(m, SyntaxKind::ErrorNode)
}
}
fn build_tree(events: &[Event], tokens: &[RawToken], src: &str) -> (GreenNode, Arc<TokenInterner>) {
let mut builder: GreenNodeBuilder<'static, 'static, SyntaxKind> = GreenNodeBuilder::new();
let mut tok = 0usize;
let mut depth: u32 = 0;
for event in events {
match *event {
Event::Open { kind } => {
if kind == SyntaxKind::Tombstone {
continue; }
depth += 1;
builder.start_node(kind);
}
Event::Close => {
depth -= 1;
if depth == 0 {
while tok < tokens.len() {
emit(&mut builder, tokens[tok], src);
tok += 1;
}
}
builder.finish_node();
}
Event::Advance => {
while tok < tokens.len() && tokens[tok].kind.is_trivia() {
emit(&mut builder, tokens[tok], src);
tok += 1;
}
if tok < tokens.len() {
emit(&mut builder, tokens[tok], src);
tok += 1;
}
}
}
}
let (green, cache) = builder.finish();
let interner = cache
.expect("a builder created with `new()` owns its cache")
.into_interner()
.expect("the cache owns its interner");
(green, Arc::new(interner))
}
fn emit(builder: &mut GreenNodeBuilder<'static, 'static, SyntaxKind>, t: RawToken, src: &str) {
if <SyntaxKind as Syntax>::static_text(t.kind).is_some() {
builder.static_token(t.kind);
} else {
builder.token(t.kind, &src[t.range]);
}
}
#[cfg(test)]
mod tests {
use super::*;
fn round_trips(src: &str) {
let parse = parse(src);
assert_eq!(
parse.syntax_node().to_string(),
src,
"round-trip mismatch for {src:?}",
);
}
#[test]
fn round_trips_a_function() {
round_trips("func f():\n\tpass\n");
}
#[test]
fn round_trips_inline_function() {
round_trips("func square(a): return a\n");
}
#[test]
fn round_trips_with_trivia() {
round_trips("## doc\nfunc _ready() -> void:\n\tpass\n\n# trailing comment\n");
}
#[test]
fn round_trips_multiple_functions() {
round_trips("func a():\n\tpass\nfunc b():\n\tpass\n");
}
#[test]
fn round_trips_empty_and_blank() {
round_trips("");
round_trips("\n\n");
round_trips("# only a comment\n");
}
#[test]
fn produces_expected_top_level_shape() {
let parse = parse("func f():\n\tpass\n");
let root = parse.syntax_node();
assert_eq!(root.kind(), SyntaxKind::SourceFile);
let func = root
.children()
.find(|n| n.kind() == SyntaxKind::FuncDecl)
.expect("a FuncDecl child");
assert!(func.children().any(|n| n.kind() == SyntaxKind::Block));
}
fn node_sexpr(node: &ResolvedNode<SyntaxKind>) -> String {
let mut s = format!("({:?}", node.kind());
for child in node.children() {
s.push(' ');
s.push_str(&node_sexpr(child));
}
s.push(')');
s
}
fn structure(src: &str) -> String {
node_sexpr(&parse(src).syntax_node())
}
#[test]
fn precedence_factor_binds_tighter_than_add() {
assert_eq!(
structure("var x = 1 + 2 * 3\n"),
"(SourceFile (VarDecl (Name) (BinExpr (Literal) (BinExpr (Literal) (Literal)))))"
);
assert_eq!(
structure("var x = 1 * 2 + 3\n"),
"(SourceFile (VarDecl (Name) (BinExpr (BinExpr (Literal) (Literal)) (Literal))))"
);
}
#[test]
fn power_is_left_associative() {
assert_eq!(
structure("var x = 2 ** 3 ** 4\n"),
"(SourceFile (VarDecl (Name) (BinExpr (BinExpr (Literal) (Literal)) (Literal))))"
);
}
#[test]
fn unary_minus_then_power() {
assert_eq!(
structure("var x = -2 ** 2\n"),
"(SourceFile (VarDecl (Name) (UnaryExpr (BinExpr (Literal) (Literal)))))"
);
}
#[test]
fn ternary_is_right_associative() {
assert_eq!(
structure("var x = a if c else b\n"),
"(SourceFile (VarDecl (Name) (TernaryExpr (NameRef) (NameRef) (NameRef))))"
);
}
#[test]
fn postfix_chain_call_field_index() {
assert_eq!(
structure("var x = a.b().c[0]\n"),
"(SourceFile (VarDecl (Name) (IndexExpr (FieldExpr (CallExpr (FieldExpr (NameRef) \
(NameRef)) (ArgList)) (NameRef)) (Literal))))"
);
}
#[test]
fn leading_utf8_bom_is_trivia_not_an_error() {
let src = "\u{feff}class_name Foo\nextends Node\n";
let parse = parse(src);
assert_eq!(
parse.syntax_node().to_string(),
src,
"BOM file must round-trip byte-for-byte"
);
assert!(
parse.errors().is_empty(),
"BOM-prefixed file should parse clean: {:?}",
parse.errors()
);
assert!(
structure(src).starts_with("(SourceFile (ClassNameDecl"),
"{}",
structure(src)
);
}
#[test]
fn multiline_lambda_does_not_absorb_following_paren_line() {
let src = "func f():\n\tvar cb := func():\n\t\treturn 1\n\t(self).process()\n";
let st = structure(src);
assert!(
st.contains("(VarDecl (Name) (LambdaExpr"),
"lambda should be the var initializer, standalone: {st}"
);
assert!(
!st.contains("CallExpr (LambdaExpr"),
"the following `(` line must not be absorbed as a call on the lambda: {st}"
);
assert!(
st.contains("(ExprStmt (CallExpr (FieldExpr (ParenExpr"),
"the `(self).process()` line should be its own statement: {st}"
);
round_trips(src);
}
#[test]
fn inline_lambda_still_chains_postfix() {
let src = "var x = (func(): return 1).call()\n";
let st = structure(src);
assert!(
st.contains("CallExpr (FieldExpr (ParenExpr (LambdaExpr"),
"inline lambda should still accept a postfix chain: {st}"
);
round_trips(src);
}
#[test]
fn statement_level_annotation_in_a_body_parses_clean() {
let src = "func f():\n\t@warning_ignore(\"integer_division\")\n\tvar x := 1 / 2\n";
let parse = parse(src);
assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
round_trips(src);
}
#[test]
fn multiline_lambda_arg_with_dedented_closer_parses_clean() {
let src = "func f():\n\tobj.call(func():\n\t\t\tbody()\n\t\t)\n";
let parse = parse(src);
assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
round_trips(src);
}
#[test]
fn multiline_lambda_body_ending_at_a_comma_parses_clean() {
let src = "func f():\n\tobj.call(\n\t\tfunc(v):\n\t\t\tuse(v), 0.0, 1.0)\n";
let parse = parse(src);
assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
round_trips(src);
}
const CORPUS: &str = r#"@tool
class_name Player extends CharacterBody2D
## A documented player controller.
const SPEED := 300.0
@export var health: int = 100
@export_range(0, 100) var armor := 0
static var instances: Array[Player] = []
enum State { IDLE, RUNNING, JUMPING = 10 }
signal died(reason: String)
var _vel: Vector2 = Vector2.ZERO:
get:
return _vel
set(value):
_vel = value
class Inner extends RefCounted:
var x = 1
func helper() -> int:
return x * 2
func _ready() -> void:
var node := $Sprite2D
var unique = %HealthBar
add_child(preload("res://thing.tscn").instantiate())
for i in range(0, 10):
if i % 2 == 0 and i > 0:
print(i, " even")
elif i == 5:
continue
else:
pass
while health > 0:
health -= 1
match State.IDLE:
State.IDLE, State.RUNNING:
pass
[var first, ..]:
print(first)
{"key": var v} when v > 0:
print(v)
_:
breakpoint
var cb := func(a: int, b := 2) -> int: return a + b
var ok = node is Node2D
var cast = node as Sprite2D
assert(health >= 0, "negative health")
died.emit("test")
"#;
#[test]
fn corpus_round_trips_byte_for_byte() {
round_trips(CORPUS);
}
#[test]
fn corpus_parses_without_unexpected_errors() {
let parse = parse(CORPUS);
assert!(
parse.errors().is_empty(),
"unexpected parse errors:\n{:#?}",
parse.errors()
);
}
#[test]
fn inline_if_elif_else_clauses_attach() {
let src = "func f():\n\tif a: x = 1\n\telif b: x = 2\n\telse: x = 3\n";
let parse = parse(src);
assert_eq!(parse.syntax_node().to_string(), src, "lossless");
assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
let root = parse.syntax_node();
let if_stmt = root
.descendants()
.find(|n| n.kind() == SyntaxKind::IfStmt)
.expect("an IfStmt node");
assert!(
if_stmt
.descendants()
.any(|n| n.kind() == SyntaxKind::ElifClause),
"elif clause attached to the if"
);
assert!(
if_stmt
.descendants()
.any(|n| n.kind() == SyntaxKind::ElseClause),
"else clause attached to the if"
);
}
#[test]
fn soft_keyword_names_parse() {
let src = "static func match(when: bool) -> int:\n\tvar r = RUIRouteMatcher.match(when)\n\treturn when\n";
let parse = parse(src);
assert_eq!(parse.syntax_node().to_string(), src, "lossless");
assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
}
#[test]
fn multiline_lambda_with_trailing_call_paren() {
let src = "func f():\n\tt.connect(func():\n\t\tif ok:\n\t\t\tp.free())\n";
let parse = parse(src);
assert_eq!(parse.syntax_node().to_string(), src, "lossless");
assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
}
#[test]
fn multiline_lambda_in_call_argument_parses() {
let src = "func f():\n\tcb(func(a, b):\n\t\treturn a + b\n\t)\n";
let parse = parse(src);
assert_eq!(parse.syntax_node().to_string(), src, "lossless");
assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
let root = parse.syntax_node();
let lambda = root
.descendants()
.find(|n| n.kind() == SyntaxKind::LambdaExpr)
.expect("a LambdaExpr node");
let block = lambda
.children()
.find(|n| n.kind() == SyntaxKind::Block)
.expect("the lambda body Block");
assert!(
block
.descendants()
.any(|n| n.kind() == SyntaxKind::ReturnStmt),
"the lambda body contains the return statement"
);
}
#[test]
fn single_line_lambda_in_call_argument_parses() {
let src = "var m = arr.map(func(x): x * 2)\n";
let parse = parse(src);
assert_eq!(parse.syntax_node().to_string(), src, "lossless");
assert!(parse.errors().is_empty(), "no errors: {:?}", parse.errors());
assert!(
parse
.syntax_node()
.descendants()
.any(|n| n.kind() == SyntaxKind::LambdaExpr),
"a LambdaExpr node"
);
}
#[test]
fn broken_code_recovers_and_round_trips() {
let src = "func ok():\n\tpass\nfunc bad(:\n\tpass\nfunc also_ok():\n\tpass\n";
let parse = parse(src);
assert_eq!(
parse.syntax_node().to_string(),
src,
"recovery must stay lossless"
);
assert!(!parse.errors().is_empty(), "expected a syntax error");
let funcs = parse
.syntax_node()
.children()
.filter(|n| n.kind() == SyntaxKind::FuncDecl)
.count();
assert!(
funcs >= 2,
"siblings should survive a broken declaration, got {funcs}"
);
}
#[test]
fn golden_small_class() {
let parse = parse("class_name Foo\nvar x := 1\n");
expect_test::expect_file!["../test_data/golden/small_class.cst"]
.assert_eq(&parse.debug_tree());
}
}