use std::mem;
use std::ops::{Index, IndexMut, Range};
use ecow::{EcoString, eco_format};
use rustc_hash::{FxHashMap, FxHashSet};
use typst_utils::default_math_class;
use unicode_math_class::MathClass;
use crate::set::{SyntaxSet, syntax_set};
use crate::{Lexer, SyntaxError, SyntaxKind, SyntaxMode, SyntaxNode, ast, set};
pub fn parse(text: &str) -> SyntaxNode {
let _scope = typst_timing::TimingScope::new("parse");
let mut p = Parser::new(text, 0, SyntaxMode::Markup);
markup_exprs(&mut p, true, syntax_set!(End));
p.finish_into(SyntaxKind::Markup)
}
pub fn parse_code(text: &str) -> SyntaxNode {
let _scope = typst_timing::TimingScope::new("parse code");
let mut p = Parser::new(text, 0, SyntaxMode::Code);
code_exprs(&mut p, syntax_set!(End));
p.finish_into(SyntaxKind::Code)
}
pub fn parse_math(text: &str) -> SyntaxNode {
let _scope = typst_timing::TimingScope::new("parse math");
let mut p = Parser::new(text, 0, SyntaxMode::Math);
math_exprs(&mut p, syntax_set!(End));
p.finish_into(SyntaxKind::Math)
}
fn markup(p: &mut Parser, at_start: bool, wrap_trivia: bool, stop_set: SyntaxSet) {
let m = if wrap_trivia { p.before_trivia() } else { p.marker() };
markup_exprs(p, at_start, stop_set);
if wrap_trivia {
p.flush_trivia();
}
p.wrap(m, SyntaxKind::Markup);
}
fn markup_exprs(p: &mut Parser, mut at_start: bool, stop_set: SyntaxSet) {
debug_assert!(stop_set.contains(SyntaxKind::End));
at_start |= p.had_newline();
let mut nesting: usize = 0;
while !p.at_set(stop_set) || (nesting > 0 && p.at(SyntaxKind::RightBracket)) {
markup_expr(p, at_start, &mut nesting);
at_start = p.had_newline();
}
}
pub(super) fn reparse_markup(
text: &str,
range: Range<usize>,
at_start: &mut bool,
nesting: &mut usize,
top_level: bool,
) -> Option<Vec<SyntaxNode>> {
let mut p = Parser::new(text, range.start, SyntaxMode::Markup);
*at_start |= p.had_newline();
while !p.end() && p.current_start() < range.end {
if !top_level && *nesting == 0 && p.at(SyntaxKind::RightBracket) {
break;
}
markup_expr(&mut p, *at_start, nesting);
*at_start = p.had_newline();
}
(p.balanced && p.current_start() == range.end).then(|| p.finish())
}
fn markup_expr(p: &mut Parser, at_start: bool, nesting: &mut usize) {
match p.current() {
SyntaxKind::LeftBracket => {
*nesting += 1;
p.convert_and_eat(SyntaxKind::Text);
}
SyntaxKind::RightBracket if *nesting > 0 => {
*nesting -= 1;
p.convert_and_eat(SyntaxKind::Text);
}
SyntaxKind::RightBracket => {
p.unexpected();
p.hint("try using a backslash escape: \\]");
}
SyntaxKind::Shebang => p.eat(),
SyntaxKind::Text
| SyntaxKind::Linebreak
| SyntaxKind::Escape
| SyntaxKind::Shorthand
| SyntaxKind::SmartQuote
| SyntaxKind::Link
| SyntaxKind::Label => p.eat(),
SyntaxKind::Raw => p.eat(),
SyntaxKind::Hash => embedded_code_expr(p),
SyntaxKind::Star => strong(p),
SyntaxKind::Underscore => emph(p),
SyntaxKind::HeadingMarker if at_start => heading(p),
SyntaxKind::ListMarker if at_start => list_item(p),
SyntaxKind::EnumMarker if at_start => enum_item(p),
SyntaxKind::TermMarker if at_start => term_item(p),
SyntaxKind::RefMarker => reference(p),
SyntaxKind::Dollar => equation(p),
SyntaxKind::HeadingMarker
| SyntaxKind::ListMarker
| SyntaxKind::EnumMarker
| SyntaxKind::TermMarker
| SyntaxKind::Colon => p.convert_and_eat(SyntaxKind::Text),
_ => p.unexpected(),
}
}
fn strong(p: &mut Parser) {
p.with_nl_mode(AtNewline::StopParBreak, |p| {
let m = p.marker();
p.assert(SyntaxKind::Star);
markup(p, false, true, syntax_set!(Star, RightBracket, End));
p.expect_closing_delimiter(m, SyntaxKind::Star);
p.wrap(m, SyntaxKind::Strong);
});
}
fn emph(p: &mut Parser) {
p.with_nl_mode(AtNewline::StopParBreak, |p| {
let m = p.marker();
p.assert(SyntaxKind::Underscore);
markup(p, false, true, syntax_set!(Underscore, RightBracket, End));
p.expect_closing_delimiter(m, SyntaxKind::Underscore);
p.wrap(m, SyntaxKind::Emph);
});
}
fn heading(p: &mut Parser) {
p.with_nl_mode(AtNewline::Stop, |p| {
let m = p.marker();
p.assert(SyntaxKind::HeadingMarker);
markup(p, false, false, syntax_set!(Label, RightBracket, End));
p.wrap(m, SyntaxKind::Heading);
});
}
fn list_item(p: &mut Parser) {
p.with_nl_mode(AtNewline::RequireColumn(p.current_column()), |p| {
let m = p.marker();
p.assert(SyntaxKind::ListMarker);
markup(p, true, false, syntax_set!(RightBracket, End));
p.wrap(m, SyntaxKind::ListItem);
});
}
fn enum_item(p: &mut Parser) {
p.with_nl_mode(AtNewline::RequireColumn(p.current_column()), |p| {
let m = p.marker();
p.assert(SyntaxKind::EnumMarker);
markup(p, true, false, syntax_set!(RightBracket, End));
p.wrap(m, SyntaxKind::EnumItem);
});
}
fn term_item(p: &mut Parser) {
p.with_nl_mode(AtNewline::RequireColumn(p.current_column()), |p| {
let m = p.marker();
p.with_nl_mode(AtNewline::Stop, |p| {
p.assert(SyntaxKind::TermMarker);
markup(p, false, false, syntax_set!(Colon, RightBracket, End));
});
p.expect(SyntaxKind::Colon);
markup(p, true, false, syntax_set!(RightBracket, End));
p.wrap(m, SyntaxKind::TermItem);
});
}
fn reference(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::RefMarker);
if p.directly_at(SyntaxKind::LeftBracket) {
content_block(p);
}
p.wrap(m, SyntaxKind::Ref);
}
fn equation(p: &mut Parser) {
let m = p.marker();
p.enter_modes(SyntaxMode::Math, AtNewline::Continue, |p| {
p.assert(SyntaxKind::Dollar);
math(p, syntax_set!(Dollar, End));
p.expect_closing_delimiter(m, SyntaxKind::Dollar);
});
p.wrap(m, SyntaxKind::Equation);
}
fn math(p: &mut Parser, stop_set: SyntaxSet) {
let m = p.marker();
math_exprs(p, stop_set);
p.wrap(m, SyntaxKind::Math);
}
fn math_exprs(p: &mut Parser, stop_set: SyntaxSet) -> usize {
debug_assert!(stop_set.contains(SyntaxKind::End));
let mut count = 0;
while !p.at_set(stop_set) {
if p.at_set(set::MATH_EXPR) {
math_expr(p);
count += 1;
} else {
p.unexpected();
}
}
count
}
fn math_expr(p: &mut Parser) {
math_expr_prec(p, 0, SyntaxKind::End)
}
fn math_expr_prec(p: &mut Parser, min_prec: usize, stop: SyntaxKind) {
let m = p.marker();
let mut continuable = false;
match p.current() {
SyntaxKind::Hash => embedded_code_expr(p),
SyntaxKind::MathIdent | SyntaxKind::FieldAccess => {
continuable = true;
p.eat();
if min_prec < 3
&& p.directly_at(SyntaxKind::MathText)
&& p.current_text() == "("
{
math_args(p);
p.wrap(m, SyntaxKind::FuncCall);
continuable = false;
}
}
SyntaxKind::Dot
| SyntaxKind::Comma
| SyntaxKind::Semicolon
| SyntaxKind::RightParen => {
p.convert_and_eat(SyntaxKind::MathText);
}
SyntaxKind::Text | SyntaxKind::MathText | SyntaxKind::MathShorthand => {
continuable = math_class(p.current_text()) == Some(MathClass::Alphabetic)
|| p.current_text().chars().all(char::is_alphabetic);
if !maybe_delimited(p) {
p.eat();
}
}
SyntaxKind::Linebreak | SyntaxKind::MathAlignPoint => p.eat(),
SyntaxKind::Escape | SyntaxKind::Str => {
continuable = true;
p.eat();
}
SyntaxKind::Root => {
if min_prec < 3 {
p.eat();
let m2 = p.marker();
math_expr_prec(p, 2, stop);
math_unparen(p, m2);
p.wrap(m, SyntaxKind::MathRoot);
}
}
SyntaxKind::Prime => {
continuable = true;
while p.at(SyntaxKind::Prime) {
let m2 = p.marker();
p.eat();
while p.eat_if_direct(SyntaxKind::Prime) {}
p.wrap(m2, SyntaxKind::MathPrimes);
}
}
_ => p.expected("expression"),
}
if continuable && min_prec < 3 && !p.had_trivia() && maybe_delimited(p) {
p.wrap(m, SyntaxKind::Math);
}
let mut primed = false;
while !p.end() && !p.at(stop) {
if p.directly_at(SyntaxKind::MathText) && p.current_text() == "!" {
p.eat();
p.wrap(m, SyntaxKind::Math);
continue;
}
let prime_marker = p.marker();
if p.eat_if_direct(SyntaxKind::Prime) {
while p.eat_if_direct(SyntaxKind::Prime) {}
p.wrap(prime_marker, SyntaxKind::MathPrimes);
if p.at(stop) {
p.wrap(m, SyntaxKind::MathAttach);
}
primed = true;
continue;
}
let Some((kind, stop, assoc, mut prec)) = math_op(p.current()) else {
if primed {
p.wrap(m, SyntaxKind::MathAttach);
}
break;
};
if primed && kind == SyntaxKind::MathFrac {
p.wrap(m, SyntaxKind::MathAttach);
}
if prec < min_prec {
break;
}
match assoc {
ast::Assoc::Left => prec += 1,
ast::Assoc::Right => {}
}
if kind == SyntaxKind::MathFrac {
math_unparen(p, m);
}
p.eat();
let m2 = p.marker();
math_expr_prec(p, prec, stop);
math_unparen(p, m2);
if p.eat_if(SyntaxKind::Underscore) || p.eat_if(SyntaxKind::Hat) {
let m3 = p.marker();
math_expr_prec(p, prec, SyntaxKind::End);
math_unparen(p, m3);
}
p.wrap(m, kind);
}
}
fn math_op(kind: SyntaxKind) -> Option<(SyntaxKind, SyntaxKind, ast::Assoc, usize)> {
match kind {
SyntaxKind::Underscore => {
Some((SyntaxKind::MathAttach, SyntaxKind::Hat, ast::Assoc::Right, 2))
}
SyntaxKind::Hat => {
Some((SyntaxKind::MathAttach, SyntaxKind::Underscore, ast::Assoc::Right, 2))
}
SyntaxKind::Slash => {
Some((SyntaxKind::MathFrac, SyntaxKind::End, ast::Assoc::Left, 1))
}
_ => None,
}
}
fn maybe_delimited(p: &mut Parser) -> bool {
let open = math_class(p.current_text()) == Some(MathClass::Opening);
if open {
math_delimited(p);
}
open
}
fn math_delimited(p: &mut Parser) {
let m = p.marker();
p.eat();
let m2 = p.marker();
while !p.at_set(syntax_set!(Dollar, End)) {
if math_class(p.current_text()) == Some(MathClass::Closing) {
p.wrap(m2, SyntaxKind::Math);
if p.at(SyntaxKind::RightParen) {
p.convert_and_eat(SyntaxKind::MathText);
} else {
p.eat();
}
p.wrap(m, SyntaxKind::MathDelimited);
return;
}
if p.at_set(set::MATH_EXPR) {
math_expr(p);
} else {
p.unexpected();
}
}
p.wrap(m, SyntaxKind::Math);
}
fn math_unparen(p: &mut Parser, m: Marker) {
let Some(node) = p.nodes.get_mut(m.0) else { return };
if node.kind() != SyntaxKind::MathDelimited {
return;
}
if let [first, .., last] = node.children_mut()
&& first.text() == "("
&& last.text() == ")"
{
first.convert_to_kind(SyntaxKind::LeftParen);
last.convert_to_kind(SyntaxKind::RightParen);
node.convert_to_kind(SyntaxKind::Math);
}
}
fn math_class(text: &str) -> Option<MathClass> {
match text {
"[|" => return Some(MathClass::Opening),
"|]" => return Some(MathClass::Closing),
"||" => return Some(MathClass::Fence),
_ => {}
}
let mut chars = text.chars();
chars
.next()
.filter(|_| chars.next().is_none())
.and_then(default_math_class)
}
fn math_args(p: &mut Parser) {
let m = p.marker();
p.convert_and_eat(SyntaxKind::LeftParen);
let mut positional = true;
let mut has_arrays = false;
let mut maybe_array_start = p.marker();
let mut seen = FxHashSet::default();
while !p.at_set(syntax_set!(End, Dollar, RightParen)) {
positional = math_arg(p, &mut seen);
match p.current() {
SyntaxKind::Comma => {
p.eat();
if !positional {
maybe_array_start = p.marker();
}
}
SyntaxKind::Semicolon => {
if !positional {
maybe_array_start = p.marker();
}
p.wrap(maybe_array_start, SyntaxKind::Array);
p.eat();
maybe_array_start = p.marker();
has_arrays = true;
}
SyntaxKind::End | SyntaxKind::Dollar | SyntaxKind::RightParen => {}
_ => p.expected("comma or semicolon"),
}
}
if maybe_array_start != p.marker() && has_arrays && positional {
p.wrap(maybe_array_start, SyntaxKind::Array);
}
p.expect_closing_delimiter(m, SyntaxKind::RightParen);
p.wrap(m, SyntaxKind::Args);
}
fn math_arg<'s>(p: &mut Parser<'s>, seen: &mut FxHashSet<&'s str>) -> bool {
let m = p.marker();
let start = p.current_start();
if p.at(SyntaxKind::Dot) {
if let Some(spread) = p.lexer.maybe_math_spread_arg(start) {
p.token.node = spread;
p.eat();
let m_arg = p.marker();
let count =
math_exprs(p, syntax_set!(End, Dollar, Comma, Semicolon, RightParen));
if count == 0 {
let dots = vec![
SyntaxNode::leaf(SyntaxKind::MathText, "."),
SyntaxNode::leaf(SyntaxKind::MathText, "."),
];
p[m] = SyntaxNode::inner(SyntaxKind::Math, dots);
} else {
if count > 1 {
p.wrap(m_arg, SyntaxKind::Math);
}
p.wrap(m, SyntaxKind::Spread);
}
return true;
}
}
let mut positional = true;
if p.at_set(syntax_set!(MathText, MathIdent, Underscore)) {
if let Some(named) = p.lexer.maybe_math_named_arg(start) {
p.token.node = named;
let text = p.current_text();
p.eat();
p.convert_and_eat(SyntaxKind::Colon);
if !seen.insert(text) {
p[m].convert_to_error(eco_format!("duplicate argument: {text}"));
}
positional = false;
}
}
let arg = p.marker();
let count = math_exprs(p, syntax_set!(End, Dollar, Comma, Semicolon, RightParen));
if count == 0 {
if !positional {
p.expected("expression");
}
p.flush_trivia();
}
if count != 1 {
p.wrap(arg, SyntaxKind::Math);
}
if !positional {
p.wrap(m, SyntaxKind::Named);
}
positional
}
fn code(p: &mut Parser, stop_set: SyntaxSet) {
let m = p.marker();
code_exprs(p, stop_set);
p.wrap(m, SyntaxKind::Code);
}
fn code_exprs(p: &mut Parser, stop_set: SyntaxSet) {
debug_assert!(stop_set.contains(SyntaxKind::End));
while !p.at_set(stop_set) {
p.with_nl_mode(AtNewline::ContextualContinue, |p| {
if !p.at_set(set::CODE_EXPR) {
p.unexpected();
return;
}
code_expr(p);
if !p.at_set(stop_set) && !p.eat_if(SyntaxKind::Semicolon) {
p.expected("semicolon or line break");
if p.at(SyntaxKind::Label) {
p.hint("labels can only be applied in markup mode");
p.hint("try wrapping your code in a markup block (`[ ]`)");
}
}
});
}
}
fn embedded_code_expr(p: &mut Parser) {
p.enter_modes(SyntaxMode::Code, AtNewline::Stop, |p| {
p.assert(SyntaxKind::Hash);
if p.had_trivia() || p.end() {
p.expected("expression");
return;
}
let stmt = p.at_set(set::STMT);
let at = p.at_set(set::ATOMIC_CODE_EXPR);
code_expr_prec(p, true, 0);
if !at {
p.unexpected();
}
let semi = (stmt || p.directly_at(SyntaxKind::Semicolon))
&& p.eat_if(SyntaxKind::Semicolon);
if stmt && !semi && !p.end() && !p.at(SyntaxKind::RightBracket) {
p.expected("semicolon or line break");
}
});
}
fn code_expr(p: &mut Parser) {
code_expr_prec(p, false, 0)
}
fn code_expr_prec(p: &mut Parser, atomic: bool, min_prec: u8) {
let m = p.marker();
if !atomic && p.at_set(set::UNARY_OP) {
let op = ast::UnOp::from_kind(p.current()).unwrap();
p.eat();
code_expr_prec(p, atomic, op.precedence());
p.wrap(m, SyntaxKind::Unary);
} else {
code_primary(p, atomic);
}
loop {
if p.directly_at(SyntaxKind::LeftParen) || p.directly_at(SyntaxKind::LeftBracket)
{
args(p);
p.wrap(m, SyntaxKind::FuncCall);
continue;
}
let at_field_or_method = p.directly_at(SyntaxKind::Dot)
&& p.lexer.clone().next().0 == SyntaxKind::Ident;
if atomic && !at_field_or_method {
break;
}
if p.eat_if(SyntaxKind::Dot) {
p.expect(SyntaxKind::Ident);
p.wrap(m, SyntaxKind::FieldAccess);
continue;
}
let binop = if p.at_set(set::BINARY_OP) {
ast::BinOp::from_kind(p.current())
} else if min_prec <= ast::BinOp::NotIn.precedence() && p.eat_if(SyntaxKind::Not)
{
if p.at(SyntaxKind::In) {
Some(ast::BinOp::NotIn)
} else {
p.expected("keyword `in`");
break;
}
} else {
None
};
if let Some(op) = binop {
let mut prec = op.precedence();
if prec < min_prec {
break;
}
match op.assoc() {
ast::Assoc::Left => prec += 1,
ast::Assoc::Right => {}
}
p.eat();
code_expr_prec(p, false, prec);
p.wrap(m, SyntaxKind::Binary);
continue;
}
break;
}
}
fn code_primary(p: &mut Parser, atomic: bool) {
let m = p.marker();
match p.current() {
SyntaxKind::Ident => {
p.eat();
if !atomic && p.at(SyntaxKind::Arrow) {
p.wrap(m, SyntaxKind::Params);
p.assert(SyntaxKind::Arrow);
code_expr(p);
p.wrap(m, SyntaxKind::Closure);
}
}
SyntaxKind::Underscore if !atomic => {
p.eat();
if p.at(SyntaxKind::Arrow) {
p.wrap(m, SyntaxKind::Params);
p.eat();
code_expr(p);
p.wrap(m, SyntaxKind::Closure);
} else if p.eat_if(SyntaxKind::Eq) {
code_expr(p);
p.wrap(m, SyntaxKind::DestructAssignment);
} else {
p[m].expected("expression");
}
}
SyntaxKind::LeftBrace => code_block(p),
SyntaxKind::LeftBracket => content_block(p),
SyntaxKind::LeftParen => expr_with_paren(p, atomic),
SyntaxKind::Dollar => equation(p),
SyntaxKind::Let => let_binding(p),
SyntaxKind::Set => set_rule(p),
SyntaxKind::Show => show_rule(p),
SyntaxKind::Context => contextual(p, atomic),
SyntaxKind::If => conditional(p),
SyntaxKind::While => while_loop(p),
SyntaxKind::For => for_loop(p),
SyntaxKind::Import => module_import(p),
SyntaxKind::Include => module_include(p),
SyntaxKind::Break => break_stmt(p),
SyntaxKind::Continue => continue_stmt(p),
SyntaxKind::Return => return_stmt(p),
SyntaxKind::Raw => p.eat(),
SyntaxKind::None
| SyntaxKind::Auto
| SyntaxKind::Int
| SyntaxKind::Float
| SyntaxKind::Bool
| SyntaxKind::Numeric
| SyntaxKind::Str
| SyntaxKind::Label => p.eat(),
_ => p.expected("expression"),
}
}
pub(super) fn reparse_block(text: &str, range: Range<usize>) -> Option<SyntaxNode> {
let mut p = Parser::new(text, range.start, SyntaxMode::Code);
assert!(p.at(SyntaxKind::LeftBracket) || p.at(SyntaxKind::LeftBrace));
block(&mut p);
(p.balanced && p.prev_end() == range.end)
.then(|| p.finish().into_iter().next().unwrap())
}
fn block(p: &mut Parser) {
match p.current() {
SyntaxKind::LeftBracket => content_block(p),
SyntaxKind::LeftBrace => code_block(p),
_ => p.expected("block"),
}
}
fn code_block(p: &mut Parser) {
let m = p.marker();
p.enter_modes(SyntaxMode::Code, AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftBrace);
code(p, syntax_set!(RightBrace, RightBracket, RightParen, End));
p.expect_closing_delimiter(m, SyntaxKind::RightBrace);
});
p.wrap(m, SyntaxKind::CodeBlock);
}
fn content_block(p: &mut Parser) {
let m = p.marker();
p.enter_modes(SyntaxMode::Markup, AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftBracket);
markup(p, true, true, syntax_set!(RightBracket, End));
p.expect_closing_delimiter(m, SyntaxKind::RightBracket);
});
p.wrap(m, SyntaxKind::ContentBlock);
}
fn let_binding(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Let);
let m2 = p.marker();
let mut closure = false;
let mut other = false;
if p.eat_if(SyntaxKind::Ident) {
if p.directly_at(SyntaxKind::LeftParen) {
params(p);
closure = true;
}
} else {
pattern(p, false, &mut FxHashSet::default(), None);
other = true;
}
let f = if closure || other { Parser::expect } else { Parser::eat_if };
if f(p, SyntaxKind::Eq) {
code_expr(p);
}
if closure {
p.wrap(m2, SyntaxKind::Closure);
}
p.wrap(m, SyntaxKind::LetBinding);
}
fn set_rule(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Set);
let m2 = p.marker();
p.expect(SyntaxKind::Ident);
while p.eat_if(SyntaxKind::Dot) {
p.expect(SyntaxKind::Ident);
p.wrap(m2, SyntaxKind::FieldAccess);
}
args(p);
if p.eat_if(SyntaxKind::If) {
code_expr(p);
}
p.wrap(m, SyntaxKind::SetRule);
}
fn show_rule(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Show);
let m2 = p.before_trivia();
if !p.at(SyntaxKind::Colon) {
code_expr(p);
}
if p.eat_if(SyntaxKind::Colon) {
code_expr(p);
} else {
p.expected_at(m2, "colon");
}
p.wrap(m, SyntaxKind::ShowRule);
}
fn contextual(p: &mut Parser, atomic: bool) {
let m = p.marker();
p.assert(SyntaxKind::Context);
code_expr_prec(p, atomic, 0);
p.wrap(m, SyntaxKind::Contextual);
}
fn conditional(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::If);
code_expr(p);
block(p);
if p.eat_if(SyntaxKind::Else) {
if p.at(SyntaxKind::If) {
conditional(p);
} else {
block(p);
}
}
p.wrap(m, SyntaxKind::Conditional);
}
fn while_loop(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::While);
code_expr(p);
block(p);
p.wrap(m, SyntaxKind::WhileLoop);
}
fn for_loop(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::For);
let mut seen = FxHashSet::default();
pattern(p, false, &mut seen, None);
if p.at(SyntaxKind::Comma) {
let node = p.eat_and_get();
node.unexpected();
node.hint("destructuring patterns must be wrapped in parentheses");
if p.at_set(set::PATTERN) {
pattern(p, false, &mut seen, None);
}
}
p.expect(SyntaxKind::In);
code_expr(p);
block(p);
p.wrap(m, SyntaxKind::ForLoop);
}
fn module_import(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Import);
code_expr(p);
if p.eat_if(SyntaxKind::As) {
p.expect(SyntaxKind::Ident);
}
if p.eat_if(SyntaxKind::Colon) {
if p.at(SyntaxKind::LeftParen) {
p.with_nl_mode(AtNewline::Continue, |p| {
let m2 = p.marker();
p.assert(SyntaxKind::LeftParen);
import_items(p);
p.expect_closing_delimiter(m2, SyntaxKind::RightParen);
});
} else if !p.eat_if(SyntaxKind::Star) {
import_items(p);
}
}
p.wrap(m, SyntaxKind::ModuleImport);
}
fn import_items(p: &mut Parser) {
let m = p.marker();
while !p.current().is_terminator() {
let item_marker = p.marker();
if !p.eat_if(SyntaxKind::Ident) {
p.unexpected();
}
while p.eat_if(SyntaxKind::Dot) {
p.expect(SyntaxKind::Ident);
}
p.wrap(item_marker, SyntaxKind::ImportItemPath);
if p.eat_if(SyntaxKind::As) {
p.expect(SyntaxKind::Ident);
p.wrap(item_marker, SyntaxKind::RenamedImportItem);
}
if !p.current().is_terminator() {
p.expect(SyntaxKind::Comma);
}
}
p.wrap(m, SyntaxKind::ImportItems);
}
fn module_include(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Include);
code_expr(p);
p.wrap(m, SyntaxKind::ModuleInclude);
}
fn break_stmt(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Break);
p.wrap(m, SyntaxKind::LoopBreak);
}
fn continue_stmt(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Continue);
p.wrap(m, SyntaxKind::LoopContinue);
}
fn return_stmt(p: &mut Parser) {
let m = p.marker();
p.assert(SyntaxKind::Return);
if p.at_set(set::CODE_EXPR) {
code_expr(p);
}
p.wrap(m, SyntaxKind::FuncReturn);
}
fn expr_with_paren(p: &mut Parser, atomic: bool) {
if atomic {
parenthesized_or_array_or_dict(p);
return;
}
let Some((memo_key, checkpoint)) = p.restore_memo_or_checkpoint() else { return };
let prev_len = checkpoint.node_len;
let kind = parenthesized_or_array_or_dict(p);
if p.at(SyntaxKind::Arrow) {
p.restore(checkpoint);
let m = p.marker();
params(p);
if !p.expect(SyntaxKind::Arrow) {
return;
}
code_expr(p);
p.wrap(m, SyntaxKind::Closure);
} else if p.at(SyntaxKind::Eq) && kind != SyntaxKind::Parenthesized {
p.restore(checkpoint);
let m = p.marker();
destructuring_or_parenthesized(p, true, &mut FxHashSet::default());
if !p.expect(SyntaxKind::Eq) {
return;
}
code_expr(p);
p.wrap(m, SyntaxKind::DestructAssignment);
} else {
return;
}
p.memoize_parsed_nodes(memo_key, prev_len);
}
fn parenthesized_or_array_or_dict(p: &mut Parser) -> SyntaxKind {
let mut state = GroupState {
count: 0,
maybe_just_parens: true,
kind: None,
seen: FxHashSet::default(),
};
let m = p.marker();
p.with_nl_mode(AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftParen);
if p.eat_if(SyntaxKind::Colon) {
state.kind = Some(SyntaxKind::Dict);
}
while !p.current().is_terminator() {
if !p.at_set(set::ARRAY_OR_DICT_ITEM) {
p.unexpected();
continue;
}
array_or_dict_item(p, &mut state);
state.count += 1;
if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
state.maybe_just_parens = false;
}
}
p.expect_closing_delimiter(m, SyntaxKind::RightParen);
});
let kind = if state.maybe_just_parens && state.count == 1 {
SyntaxKind::Parenthesized
} else {
state.kind.unwrap_or(SyntaxKind::Array)
};
p.wrap(m, kind);
kind
}
struct GroupState {
count: usize,
maybe_just_parens: bool,
kind: Option<SyntaxKind>,
seen: FxHashSet<EcoString>,
}
fn array_or_dict_item(p: &mut Parser, state: &mut GroupState) {
let m = p.marker();
if p.eat_if(SyntaxKind::Dots) {
code_expr(p);
p.wrap(m, SyntaxKind::Spread);
state.maybe_just_parens = false;
return;
}
code_expr(p);
if p.eat_if(SyntaxKind::Colon) {
code_expr(p);
let node = &mut p[m];
let pair_kind = match node.kind() {
SyntaxKind::Ident => SyntaxKind::Named,
_ => SyntaxKind::Keyed,
};
if let Some(key) = match node.cast::<ast::Expr>() {
Some(ast::Expr::Ident(ident)) => Some(ident.get().clone()),
Some(ast::Expr::Str(s)) => Some(s.get()),
_ => None,
} && !state.seen.insert(key.clone())
{
node.convert_to_error(eco_format!("duplicate key: {key}"));
}
p.wrap(m, pair_kind);
state.maybe_just_parens = false;
if state.kind == Some(SyntaxKind::Array) {
p[m].expected("expression");
} else {
state.kind = Some(SyntaxKind::Dict);
}
} else {
if state.kind == Some(SyntaxKind::Dict) {
p[m].expected("named or keyed pair");
} else {
state.kind = Some(SyntaxKind::Array)
}
}
}
fn args(p: &mut Parser) {
if !p.directly_at(SyntaxKind::LeftParen) && !p.directly_at(SyntaxKind::LeftBracket) {
p.expected("argument list");
if p.at(SyntaxKind::LeftParen) || p.at(SyntaxKind::LeftBracket) {
p.hint("there may not be any spaces before the argument list");
}
}
let m = p.marker();
if p.at(SyntaxKind::LeftParen) {
let m2 = p.marker();
p.with_nl_mode(AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftParen);
let mut seen = FxHashSet::default();
while !p.current().is_terminator() {
if !p.at_set(set::ARG) {
p.unexpected();
continue;
}
arg(p, &mut seen);
if !p.current().is_terminator() {
p.expect(SyntaxKind::Comma);
}
}
p.expect_closing_delimiter(m2, SyntaxKind::RightParen);
});
}
while p.directly_at(SyntaxKind::LeftBracket) {
content_block(p);
}
p.wrap(m, SyntaxKind::Args);
}
fn arg<'s>(p: &mut Parser<'s>, seen: &mut FxHashSet<&'s str>) {
let m = p.marker();
if p.eat_if(SyntaxKind::Dots) {
code_expr(p);
p.wrap(m, SyntaxKind::Spread);
return;
}
let was_at_expr = p.at_set(set::CODE_EXPR);
let text = p.current_text();
code_expr(p);
if p.eat_if(SyntaxKind::Colon) {
if was_at_expr {
if p[m].kind() != SyntaxKind::Ident {
p[m].expected("identifier");
} else if !seen.insert(text) {
p[m].convert_to_error(eco_format!("duplicate argument: {text}"));
}
}
code_expr(p);
p.wrap(m, SyntaxKind::Named);
}
}
fn params(p: &mut Parser) {
let m = p.marker();
p.with_nl_mode(AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftParen);
let mut seen = FxHashSet::default();
let mut sink = false;
while !p.current().is_terminator() {
if !p.at_set(set::PARAM) {
p.unexpected();
continue;
}
param(p, &mut seen, &mut sink);
if !p.current().is_terminator() {
p.expect(SyntaxKind::Comma);
}
}
p.expect_closing_delimiter(m, SyntaxKind::RightParen);
});
p.wrap(m, SyntaxKind::Params);
}
fn param<'s>(p: &mut Parser<'s>, seen: &mut FxHashSet<&'s str>, sink: &mut bool) {
let m = p.marker();
if p.eat_if(SyntaxKind::Dots) {
if p.at_set(set::PATTERN_LEAF) {
pattern_leaf(p, false, seen, Some("parameter"));
}
p.wrap(m, SyntaxKind::Spread);
if mem::replace(sink, true) {
p[m].convert_to_error("only one argument sink is allowed");
}
return;
}
let was_at_pat = p.at_set(set::PATTERN);
pattern(p, false, seen, Some("parameter"));
if p.eat_if(SyntaxKind::Colon) {
if was_at_pat && p[m].kind() != SyntaxKind::Ident {
p[m].expected("identifier");
}
code_expr(p);
p.wrap(m, SyntaxKind::Named);
}
}
fn pattern<'s>(
p: &mut Parser<'s>,
reassignment: bool,
seen: &mut FxHashSet<&'s str>,
dupe: Option<&'s str>,
) {
match p.current() {
SyntaxKind::Underscore => p.eat(),
SyntaxKind::LeftParen => destructuring_or_parenthesized(p, reassignment, seen),
_ => pattern_leaf(p, reassignment, seen, dupe),
}
}
fn destructuring_or_parenthesized<'s>(
p: &mut Parser<'s>,
reassignment: bool,
seen: &mut FxHashSet<&'s str>,
) {
let mut sink = false;
let mut count = 0;
let mut maybe_just_parens = true;
let m = p.marker();
p.with_nl_mode(AtNewline::Continue, |p| {
p.assert(SyntaxKind::LeftParen);
while !p.current().is_terminator() {
if !p.at_set(set::DESTRUCTURING_ITEM) {
p.unexpected();
continue;
}
destructuring_item(p, reassignment, seen, &mut maybe_just_parens, &mut sink);
count += 1;
if !p.current().is_terminator() && p.expect(SyntaxKind::Comma) {
maybe_just_parens = false;
}
}
p.expect_closing_delimiter(m, SyntaxKind::RightParen);
});
if maybe_just_parens && count == 1 && !sink {
p.wrap(m, SyntaxKind::Parenthesized);
} else {
p.wrap(m, SyntaxKind::Destructuring);
}
}
fn destructuring_item<'s>(
p: &mut Parser<'s>,
reassignment: bool,
seen: &mut FxHashSet<&'s str>,
maybe_just_parens: &mut bool,
sink: &mut bool,
) {
let m = p.marker();
if p.eat_if(SyntaxKind::Dots) {
if p.at_set(set::PATTERN_LEAF) {
pattern_leaf(p, reassignment, seen, None);
}
p.wrap(m, SyntaxKind::Spread);
if mem::replace(sink, true) {
p[m].convert_to_error("only one destructuring sink is allowed");
}
return;
}
let was_at_pat = p.at_set(set::PATTERN);
let checkpoint = p.checkpoint();
if !(p.eat_if(SyntaxKind::Ident) && p.at(SyntaxKind::Colon)) {
p.restore(checkpoint);
pattern(p, reassignment, seen, None);
}
if p.eat_if(SyntaxKind::Colon) {
if was_at_pat && p[m].kind() != SyntaxKind::Ident {
p[m].expected("identifier");
}
pattern(p, reassignment, seen, None);
p.wrap(m, SyntaxKind::Named);
*maybe_just_parens = false;
}
}
fn pattern_leaf<'s>(
p: &mut Parser<'s>,
reassignment: bool,
seen: &mut FxHashSet<&'s str>,
dupe: Option<&'s str>,
) {
if p.current().is_keyword() {
p.eat_and_get().expected("pattern");
return;
} else if !p.at_set(set::PATTERN_LEAF) {
p.expected("pattern");
return;
}
let m = p.marker();
let text = p.current_text();
code_expr_prec(p, true, 0);
if !reassignment {
let node = &mut p[m];
if node.kind() == SyntaxKind::Ident {
if !seen.insert(text) {
node.convert_to_error(eco_format!(
"duplicate {}: {text}",
dupe.unwrap_or("binding"),
));
}
} else {
node.expected("pattern");
}
}
}
struct Parser<'s> {
text: &'s str,
lexer: Lexer<'s>,
nl_mode: AtNewline,
token: Token,
balanced: bool,
nodes: Vec<SyntaxNode>,
memo: MemoArena,
}
#[derive(Debug, Clone)]
struct Token {
kind: SyntaxKind,
node: SyntaxNode,
n_trivia: usize,
newline: Option<Newline>,
start: usize,
prev_end: usize,
}
#[derive(Debug, Copy, Clone)]
struct Newline {
column: Option<usize>,
parbreak: bool,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum AtNewline {
Continue,
Stop,
ContextualContinue,
StopParBreak,
RequireColumn(usize),
}
impl AtNewline {
fn stop_at(self, Newline { column, parbreak }: Newline, kind: SyntaxKind) -> bool {
#[allow(clippy::match_like_matches_macro)]
match self {
AtNewline::Continue => false,
AtNewline::Stop => true,
AtNewline::ContextualContinue => match kind {
SyntaxKind::Else | SyntaxKind::Dot => false,
_ => true,
},
AtNewline::StopParBreak => parbreak,
AtNewline::RequireColumn(min_col) => {
column.is_some_and(|column| column <= min_col)
}
}
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct Marker(usize);
impl Index<Marker> for Parser<'_> {
type Output = SyntaxNode;
fn index(&self, m: Marker) -> &Self::Output {
&self.nodes[m.0]
}
}
impl IndexMut<Marker> for Parser<'_> {
fn index_mut(&mut self, m: Marker) -> &mut Self::Output {
&mut self.nodes[m.0]
}
}
impl<'s> Parser<'s> {
fn new(text: &'s str, offset: usize, mode: SyntaxMode) -> Self {
let mut lexer = Lexer::new(text, mode);
lexer.jump(offset);
let nl_mode = AtNewline::Continue;
let mut nodes = vec![];
let token = Self::lex(&mut nodes, &mut lexer, nl_mode);
Self {
text,
lexer,
nl_mode,
token,
balanced: true,
nodes,
memo: Default::default(),
}
}
fn finish(self) -> Vec<SyntaxNode> {
self.nodes
}
fn finish_into(self, kind: SyntaxKind) -> SyntaxNode {
assert!(self.at(SyntaxKind::End));
SyntaxNode::inner(kind, self.finish())
}
fn current(&self) -> SyntaxKind {
self.token.kind
}
fn at(&self, kind: SyntaxKind) -> bool {
self.token.kind == kind
}
fn at_set(&self, set: SyntaxSet) -> bool {
set.contains(self.token.kind)
}
fn end(&self) -> bool {
self.at(SyntaxKind::End)
}
fn directly_at(&self, kind: SyntaxKind) -> bool {
self.token.kind == kind && !self.had_trivia()
}
fn had_trivia(&self) -> bool {
self.token.n_trivia > 0
}
fn had_newline(&self) -> bool {
self.token.newline.is_some()
}
fn current_column(&self) -> usize {
self.token
.newline
.and_then(|newline| newline.column)
.unwrap_or_else(|| self.lexer.column(self.token.start))
}
fn current_text(&self) -> &'s str {
&self.text[self.token.start..self.current_end()]
}
fn current_start(&self) -> usize {
self.token.start
}
fn current_end(&self) -> usize {
self.lexer.cursor()
}
fn prev_end(&self) -> usize {
self.token.prev_end
}
}
impl<'s> Parser<'s> {
fn marker(&self) -> Marker {
Marker(self.nodes.len())
}
fn before_trivia(&self) -> Marker {
Marker(self.nodes.len() - self.token.n_trivia)
}
#[track_caller]
fn eat_and_get(&mut self) -> &mut SyntaxNode {
let offset = self.nodes.len();
self.eat();
&mut self.nodes[offset]
}
fn eat_if(&mut self, kind: SyntaxKind) -> bool {
let at = self.at(kind);
if at {
self.eat();
}
at
}
fn eat_if_direct(&mut self, kind: SyntaxKind) -> bool {
let at = self.directly_at(kind);
if at {
self.eat();
}
at
}
#[track_caller]
fn assert(&mut self, kind: SyntaxKind) {
assert_eq!(self.token.kind, kind);
self.eat();
}
fn convert_and_eat(&mut self, kind: SyntaxKind) {
self.token.node.convert_to_kind(kind);
self.eat();
}
fn eat(&mut self) {
self.nodes.push(std::mem::take(&mut self.token.node));
self.token = Self::lex(&mut self.nodes, &mut self.lexer, self.nl_mode);
}
fn flush_trivia(&mut self) {
self.token.n_trivia = 0;
self.token.prev_end = self.token.start;
}
fn wrap(&mut self, from: Marker, kind: SyntaxKind) {
let to = self.before_trivia().0;
let from = from.0.min(to);
let children = self.nodes.drain(from..to).collect();
self.nodes.insert(from, SyntaxNode::inner(kind, children));
}
fn enter_modes(
&mut self,
mode: SyntaxMode,
stop: AtNewline,
func: impl FnOnce(&mut Parser<'s>),
) {
let previous = self.lexer.mode();
self.lexer.set_mode(mode);
self.with_nl_mode(stop, func);
if mode != previous {
self.lexer.set_mode(previous);
self.lexer.jump(self.token.prev_end);
self.nodes.truncate(self.nodes.len() - self.token.n_trivia);
self.token = Self::lex(&mut self.nodes, &mut self.lexer, self.nl_mode);
}
}
fn with_nl_mode(&mut self, mode: AtNewline, func: impl FnOnce(&mut Parser<'s>)) {
let previous = self.nl_mode;
self.nl_mode = mode;
func(self);
self.nl_mode = previous;
if let Some(newline) = self.token.newline
&& mode != previous
{
let actual_kind = self.token.node.kind();
if self.nl_mode.stop_at(newline, actual_kind) {
self.token.kind = SyntaxKind::End;
} else {
self.token.kind = actual_kind;
}
}
}
fn lex(nodes: &mut Vec<SyntaxNode>, lexer: &mut Lexer, nl_mode: AtNewline) -> Token {
let prev_end = lexer.cursor();
let mut start = prev_end;
let (mut kind, mut node) = lexer.next();
let mut n_trivia = 0;
let mut had_newline = false;
let mut parbreak = false;
while kind.is_trivia() {
had_newline |= lexer.newline(); parbreak |= kind == SyntaxKind::Parbreak;
n_trivia += 1;
nodes.push(node);
start = lexer.cursor();
(kind, node) = lexer.next();
}
let newline = if had_newline {
let column =
(lexer.mode() == SyntaxMode::Markup).then(|| lexer.column(start));
let newline = Newline { column, parbreak };
if nl_mode.stop_at(newline, kind) {
kind = SyntaxKind::End;
}
Some(newline)
} else {
None
};
Token { kind, node, n_trivia, newline, start, prev_end }
}
}
#[derive(Default)]
struct MemoArena {
arena: Vec<SyntaxNode>,
memo_map: FxHashMap<MemoKey, (Range<usize>, PartialState)>,
}
type MemoKey = usize;
struct Checkpoint {
node_len: usize,
state: PartialState,
}
#[derive(Clone)]
struct PartialState {
cursor: usize,
lex_mode: SyntaxMode,
token: Token,
}
impl Parser<'_> {
fn memoize_parsed_nodes(&mut self, key: MemoKey, prev_len: usize) {
let Checkpoint { state, node_len } = self.checkpoint();
let memo_start = self.memo.arena.len();
self.memo.arena.extend_from_slice(&self.nodes[prev_len..node_len]);
let arena_range = memo_start..self.memo.arena.len();
self.memo.memo_map.insert(key, (arena_range, state));
}
fn restore_memo_or_checkpoint(&mut self) -> Option<(MemoKey, Checkpoint)> {
let key: MemoKey = self.current_start();
match self.memo.memo_map.get(&key).cloned() {
Some((range, state)) => {
self.nodes.extend_from_slice(&self.memo.arena[range]);
self.restore_partial(state);
None
}
None => Some((key, self.checkpoint())),
}
}
fn restore(&mut self, checkpoint: Checkpoint) {
self.nodes.truncate(checkpoint.node_len);
self.restore_partial(checkpoint.state);
}
fn restore_partial(&mut self, state: PartialState) {
self.lexer.jump(state.cursor);
self.lexer.set_mode(state.lex_mode);
self.token = state.token;
}
fn checkpoint(&self) -> Checkpoint {
let node_len = self.nodes.len();
let state = PartialState {
cursor: self.lexer.cursor(),
lex_mode: self.lexer.mode(),
token: self.token.clone(),
};
Checkpoint { node_len, state }
}
}
impl Parser<'_> {
fn expect(&mut self, kind: SyntaxKind) -> bool {
let at = self.at(kind);
if at {
self.eat();
} else if kind == SyntaxKind::Ident && self.token.kind.is_keyword() {
self.trim_errors();
self.eat_and_get().expected(kind.name());
} else {
self.balanced &= !kind.is_grouping();
self.expected(kind.name());
}
at
}
#[track_caller]
fn expect_closing_delimiter(&mut self, open: Marker, kind: SyntaxKind) {
if !self.eat_if(kind) {
self.nodes[open.0].convert_to_error("unclosed delimiter");
}
}
fn expected(&mut self, thing: &str) {
if !self.after_error() {
self.expected_at(self.before_trivia(), thing);
}
}
fn after_error(&mut self) -> bool {
let m = self.before_trivia();
m.0 > 0 && self.nodes[m.0 - 1].kind().is_error()
}
fn expected_at(&mut self, m: Marker, thing: &str) {
let error =
SyntaxNode::error(SyntaxError::new(eco_format!("expected {thing}")), "");
self.nodes.insert(m.0, error);
}
fn hint(&mut self, hint: &str) {
let m = self.before_trivia();
if let Some(error) = self.nodes.get_mut(m.0 - 1) {
error.hint(hint);
}
}
fn unexpected(&mut self) {
self.trim_errors();
self.balanced &= !self.token.kind.is_grouping();
self.eat_and_get().unexpected();
}
fn trim_errors(&mut self) {
let Marker(end) = self.before_trivia();
let mut start = end;
while start > 0
&& self.nodes[start - 1].kind().is_error()
&& self.nodes[start - 1].is_empty()
{
start -= 1;
}
self.nodes.drain(start..end);
}
}