mod defs;
mod exprs;
mod span;
mod types;
use chumsky::input::{Stream, ValueInput};
use chumsky::prelude::*;
use crate::ast::{
BlockStatement, Definition, Expr, File, Ident, LetBinding, Literal, Statement, UseItems,
UseStmt, Visibility,
};
use crate::lexer::Token;
use crate::location::Span as CustomSpan;
use defs::{binding_pattern_parser, definition_parser};
use exprs::expr_parser;
use span::fill_file_spans;
use types::type_parser;
pub fn parse_file(tokens: &[(Token, CustomSpan)]) -> Result<File, Vec<(String, CustomSpan)>> {
parse_file_internal(tokens, None)
}
pub fn parse_file_with_source(
tokens: &[(Token, CustomSpan)],
source: &str,
) -> Result<File, Vec<(String, CustomSpan)>> {
parse_file_internal(tokens, Some(source))
}
fn parse_file_internal(
tokens: &[(Token, CustomSpan)],
source: Option<&str>,
) -> Result<File, Vec<(String, CustomSpan)>> {
let token_iter = tokens.iter().map(|(tok, span)| {
(
tok.clone(),
SimpleSpan::new(span.start.offset, span.end.offset),
)
});
let end_offset = tokens.last().map_or(0, |(_, s)| s.end.offset);
let end_span: SimpleSpan = (end_offset..end_offset).into();
let token_stream = Stream::from_iter(token_iter).map(end_span, |(t, s)| (t, s));
let mut file = file_parser()
.parse(token_stream)
.into_result()
.map_err(|errors| {
errors
.into_iter()
.map(|e| {
let simple_span = e.span();
let message = format_parse_error(&e);
let custom_span = source.map_or_else(
|| {
tokens
.iter()
.find(|(_, span)| {
span.start.offset == simple_span.start
&& span.end.offset == simple_span.end
})
.map_or_else(|| span_from_simple(*simple_span), |(_, span)| *span)
},
|src| {
CustomSpan::from_range_with_source(
simple_span.start,
simple_span.end,
src,
)
},
);
(message, custom_span)
})
.collect::<Vec<_>>()
})?;
if let Some(src) = source {
fill_file_spans(&mut file, src);
}
Ok(file)
}
#[expect(
clippy::wildcard_enum_match_arm,
reason = "RichPattern is defined in the chumsky library and cannot be exhaustively enumerated"
)]
fn format_parse_error(error: &Rich<'_, Token>) -> String {
use chumsky::error::RichPattern;
let found = error
.found()
.map_or_else(|| "end of input".to_string(), |t| format!("{t}"));
let expected: Vec<String> = error
.expected()
.map(|exp| match exp {
RichPattern::Token(tok) => {
format!("{}", &**tok)
}
RichPattern::Label(label) => label.to_string(),
RichPattern::EndOfInput => "end of input".to_string(),
_ => "<unknown>".to_string(),
})
.collect();
let span = error.span();
if expected.is_empty() {
format!("found {} at {}..{}", found, span.start, span.end)
} else if expected.len() == 1 {
#[expect(
clippy::indexing_slicing,
reason = "bounds checked above: expected.len() == 1"
)]
let first = &expected[0];
format!(
"found {} at {}..{}, expected {}",
found, span.start, span.end, first
)
} else {
format!(
"found {} at {}..{}, expected one of: {}",
found,
span.start,
span.end,
expected.join(", ")
)
}
}
fn file_parser<'tokens, I>(
) -> impl Parser<'tokens, I, File, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
let statement_start = one_of([
Token::Use,
Token::Let,
Token::Pub,
Token::Struct,
Token::Enum,
Token::Trait,
Token::Impl,
Token::Fn,
Token::Extern,
Token::Module,
])
.ignored();
statement_parser()
.recover_with(skip_then_retry_until(
any().ignored(),
statement_start.rewind().ignored().or(end()),
))
.repeated()
.collect::<Vec<_>>()
.map_with(|statements, e| File {
format_version: crate::ast::FORMAT_VERSION,
statements,
span: span_from_simple(e.span()),
})
}
fn statement_parser<'tokens, I>(
) -> impl Parser<'tokens, I, Statement, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
doc_comments_parser()
.then(choice((
use_stmt_parser().map(Statement::Use),
let_binding_parser().map(|lb| Statement::Let(Box::new(lb))),
definition_parser().map(|d| Statement::Definition(Box::new(d))),
)))
.map(|(doc, stmt)| attach_doc_to_statement(doc, stmt))
.labelled("statement (use, let, or definition: struct, enum, trait, impl, fn, extern, mod)")
}
pub(super) fn doc_comments_parser<'tokens, I>(
) -> impl Parser<'tokens, I, Option<String>, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
select! { Token::DocComment(s) => s }
.repeated()
.collect::<Vec<_>>()
.map(|lines| {
if lines.is_empty() {
None
} else {
Some(lines.join("\n"))
}
})
}
fn attach_doc_to_statement(doc: Option<String>, stmt: Statement) -> Statement {
let Some(doc) = doc else {
return stmt;
};
match stmt {
Statement::Let(mut lb) => {
lb.doc = Some(doc);
Statement::Let(lb)
}
Statement::Definition(def) => {
Statement::Definition(Box::new(attach_doc_to_definition(doc, *def)))
}
Statement::Use(_) => stmt,
}
}
fn attach_doc_to_definition(doc: String, def: Definition) -> Definition {
match def {
Definition::Function(mut f) => {
f.doc = Some(doc);
Definition::Function(f)
}
Definition::Struct(mut s) => {
s.doc = Some(doc);
Definition::Struct(s)
}
Definition::Trait(mut t) => {
t.doc = Some(doc);
Definition::Trait(t)
}
Definition::Enum(mut e) => {
e.doc = Some(doc);
Definition::Enum(e)
}
Definition::Impl(mut i) => {
i.doc = Some(doc);
Definition::Impl(i)
}
Definition::Module(mut m) => {
m.doc = Some(doc);
Definition::Module(m)
}
}
}
fn use_stmt_parser<'tokens, I>(
) -> impl Parser<'tokens, I, UseStmt, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
visibility_parser()
.then_ignore(just(Token::Use))
.then(
ident_parser()
.separated_by(just(Token::DoubleColon))
.at_least(1)
.collect::<Vec<_>>(),
)
.then(
just(Token::DoubleColon)
.ignore_then(use_items_parser())
.or_not(),
)
.map_with(|((visibility, mut path), items), e| {
let items = items.unwrap_or_else(|| {
path.pop().map_or_else(
|| {
UseItems::Single(Ident {
name: String::new(),
span: CustomSpan::default(),
})
},
UseItems::Single,
)
});
UseStmt {
visibility,
path,
items,
span: span_from_simple(e.span()),
}
})
}
fn use_items_parser<'tokens, I>(
) -> impl Parser<'tokens, I, UseItems, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
choice((
just(Token::Star).to(UseItems::Glob),
ident_parser()
.separated_by(just(Token::Comma))
.at_least(1)
.collect::<Vec<_>>()
.delimited_by(just(Token::LBrace), just(Token::RBrace))
.map(UseItems::Multiple),
ident_parser().map(UseItems::Single),
))
}
fn let_binding_parser<'tokens, I>(
) -> impl Parser<'tokens, I, LetBinding, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
visibility_parser()
.then_ignore(just(Token::Let))
.then(mutability_parser())
.then(binding_pattern_parser())
.then(just(Token::Colon).ignore_then(type_parser()).or_not()) .then_ignore(just(Token::Equals))
.then(expr_parser())
.map_with(
|((((visibility, mutable), pattern), type_annotation), value), e| LetBinding {
visibility,
mutable,
pattern,
type_annotation,
value,
doc: None,
span: span_from_simple(e.span()),
},
)
}
fn visibility_parser<'tokens, I>(
) -> impl Parser<'tokens, I, Visibility, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
just(Token::Pub)
.to(Visibility::Public)
.or_not()
.map(|v| v.unwrap_or(Visibility::Private))
}
fn mutability_parser<'tokens, I>(
) -> impl Parser<'tokens, I, bool, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
just(Token::Mut).or_not().map(|m| m.is_some())
}
fn ident_parser<'tokens, I>(
) -> impl Parser<'tokens, I, Ident, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
select! {
Token::Ident(name) = e => Ident::new(name, span_from_simple(e.span())),
Token::SelfKeyword = e => Ident::new("self".to_string(), span_from_simple(e.span()))
}
.labelled("identifier")
}
fn ident_no_self_parser<'tokens, I>(
) -> impl Parser<'tokens, I, Ident, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
select! {
Token::Ident(name) = e => Ident::new(name, span_from_simple(e.span()))
}
.labelled("identifier")
}
fn invocation_target_parser<'tokens, I>(
) -> impl Parser<'tokens, I, Ident, extra::Err<Rich<'tokens, Token>>> + Clone
where
I: ValueInput<'tokens, Token = Token, Span = SimpleSpan>,
{
select! {
Token::Ident(name) = e => Ident::new(name, span_from_simple(e.span())),
Token::SelfKeyword = e => Ident::new("self".to_string(), span_from_simple(e.span())),
}
.labelled("identifier")
}
fn block_statements_to_expr(mut statements: Vec<BlockStatement>, span: crate::Span) -> Expr {
if statements.is_empty() {
return Expr::Literal {
value: Literal::Nil,
span,
};
}
let Some(last) = statements.pop() else {
return Expr::Literal {
value: Literal::Nil,
span,
};
};
let result = match last {
BlockStatement::Expr(expr) => expr,
stmt @ (BlockStatement::Let { .. } | BlockStatement::Assign { .. }) => {
statements.push(stmt);
Expr::Literal {
value: Literal::Nil,
span,
}
}
};
if statements.is_empty() {
return result;
}
Expr::Block {
statements,
result: Box::new(result),
span,
}
}
const fn span_from_simple(s: SimpleSpan) -> CustomSpan {
CustomSpan::from_range(s.start, s.end)
}
#[cfg(test)]
mod tests;