use alloc::string::{String, ToString};
use alloc::vec;
use alloc::vec::Vec;
use nom::{
IResult, Parser,
branch::alt,
bytes::complete::{tag, take_while},
character::complete::{char, line_ending, satisfy, space0, space1},
combinator::{eof, opt, recognize, value},
multi::many0,
sequence::{delimited, preceded, terminated},
};
use crate::ast::{Atom, Body, Conn, ListOp, Literal, Located, Program, Span, Statement};
use crate::diag::{Diagnostic, Diagnostics};
use crate::keywords::{is_reserved, is_top_level, keyword_in, kw};
#[derive(Debug, Clone)]
struct Problem<'a> {
input: Span<'a>,
message: String,
}
impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
Problem {
input,
message: String::from("unexpected token"),
}
}
fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
other
}
}
type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
match r {
Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
input: at,
message: String::from(msg),
})),
other => other,
}
}
fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
Err(nom::Err::Error(Problem {
input,
message: String::from("unexpected token"),
}))
}
fn is_ident_char(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}
fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
recognize((satisfy(|c| c.is_alphabetic()), take_while(is_ident_char))).parse(input)
}
fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
let start = input;
let (rest, sp) = raw_identifier(input)?;
if is_reserved(sp.fragment()) {
return perr(start);
}
Ok((
rest,
Located {
data: *sp.fragment(),
span: start,
},
))
}
fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
}
fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
}
fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
value((), (space0, opt(comment), line_ending)).parse(input)
}
fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
value((), many0(noise_line)).parse(input)
}
fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
let start = input;
let (input, domain) = opt(terminated(identifier, char('.'))).parse(input)?;
let (input, subject) = identifier(input)?;
let (input, _) = space1(input)?;
let (input, predicate) = identifier(input)?;
let (input, object) = opt(preceded(space1, identifier)).parse(input)?;
Ok((
input,
Located {
data: Atom {
domain: domain.map(|d| d.data),
subject: subject.data,
predicate: predicate.data,
object: object.map(|o| o.data),
},
span: start,
},
))
}
fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
let start = input;
let (input, neg) = opt(terminated(tag(kw::NOT), space1)).parse(input)?;
let (input, a) = atom(input)?;
Ok((
input,
Located {
data: Literal {
negated: neg.is_some(),
atom: a.data,
},
span: start,
},
))
}
fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
let (input, _) = space0(input)?;
let (input, a) = atom(input)?;
let (input, _) = eol(input)?;
Ok((input, a))
}
fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
alt((
value(ListOp::Exclusive, tag(kw::EXCLUSIVE)),
value(ListOp::Forbids, tag(kw::FORBIDS)),
value(ListOp::OneOf, tag(kw::ONEOF)),
value(ListOp::AtLeast, tag(kw::ATLEAST)),
))
.parse(input)
}
fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
let (input, _) = space0(input)?;
let (input, op) = list_op(input)?;
let (input, _) = promote(
eol(input),
input,
"expected a newline after the list operator",
)?;
let at = input;
let (input, first) = promote(
atom_line(input),
at,
"a list premise needs at least two atoms",
)?;
let at = input;
let (input, second) = promote(
atom_line(input),
at,
"a list premise needs at least two atoms",
)?;
let (input, rest) = many0(atom_line).parse(input)?;
let mut atoms = vec![first, second];
atoms.extend(rest);
Ok((input, Body::List { op, atoms }))
}
fn cont_line<'a>(input: Span<'a>) -> PResult<'a, (Conn, Located<'a, Literal<'a>>)> {
let (input, _) = space0(input)?;
let (input, conn) =
alt((value(Conn::And, tag(kw::AND)), value(Conn::Or, tag(kw::OR)))).parse(input)?;
let (input, _) = space1(input)?;
let at = input;
let (input, lit) = promote(
literal(input),
at,
"AND/OR expects a literal: [NOT] <Subject> <predicate> [<object>]",
)?;
let (input, _) = promote(
eol(input),
input,
"unexpected text after the AND/OR literal",
)?;
Ok((input, (conn, lit)))
}
fn group_conn<'a>(conts: &[(Conn, Located<'a, Literal<'a>>)]) -> Result<Conn, Span<'a>> {
let mut seen: Option<Conn> = None;
for (conn, lit) in conts {
match seen {
None => seen = Some(*conn),
Some(s) if s != *conn => return Err(lit.span),
_ => {}
}
}
Ok(seen.unwrap_or(Conn::And))
}
fn fail_at<'a, T>(at: Span<'a>, msg: &str) -> PResult<'a, T> {
Err(nom::Err::Failure(Problem {
input: at,
message: String::from(msg),
}))
}
fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
let (input, _) = space0(input)?;
let (input, _) = (tag(kw::WHEN), space1).parse(input)?;
let at = input;
let (input, when) = promote(
literal(input),
at,
"WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
)?;
let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
let (input, ante_rest) = many0(cont_line).parse(input)?;
let ante_conn = match group_conn(&ante_rest) {
Ok(c) => c,
Err(span) => {
return fail_at(
span,
"don't mix AND and OR in one WHEN group — split it into separate premises",
);
}
};
let (input, _) = space0(input)?;
let at = input;
let (input, _) = promote(
tag(kw::THEN).parse(input),
at,
"expected THEN to complete the WHEN ... THEN implication",
)?;
let at = input;
let (input, then) = promote(
preceded(space1, literal).parse(input),
at,
"THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
)?;
let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
let (input, cons_rest) = many0(cont_line).parse(input)?;
let cons_conn = match group_conn(&cons_rest) {
Ok(c) => c,
Err(span) => {
return fail_at(
span,
"don't mix AND and OR in one THEN group — split it into separate premises",
);
}
};
let mut antecedent = vec![when];
antecedent.extend(ante_rest.into_iter().map(|(_, l)| l));
let mut consequent = vec![then];
consequent.extend(cons_rest.into_iter().map(|(_, l)| l));
Ok((
input,
Body::Impl {
antecedent,
ante_conn,
consequent,
cons_conn,
},
))
}
fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
let (input, _) = (tag(kw::IMPORT), space1).parse(input)?;
let start = input;
let (input, path) = promote(
delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
start,
"IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
)?;
let (input, alias) = opt(preceded((space1, tag(kw::AS), space1), identifier)).parse(input)?;
let (input, _) = promote(
eol(input),
input,
"unexpected text after the IMPORT path (did you mean AS <alias>?)",
)?;
Ok((
input,
Statement::Import {
path: Located {
data: *path.fragment(),
span: start,
},
alias,
},
))
}
fn stmt_domain<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
let (input, _) = (tag(kw::DOMAIN), space1).parse(input)?;
let at = input;
let (input, name) = promote(
identifier(input),
at,
"DOMAIN expects a name (a lowercase identifier), e.g. DOMAIN physics",
)?;
let (input, _) = promote(eol(input), input, "unexpected text after the DOMAIN name")?;
Ok((input, Statement::Domain(name)))
}
fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
let (input, _) = (tag(kw::FACT), space1).parse(input)?;
let at = input;
let (input, a) = promote(
atom(input),
at,
"FACT expects an atom: <Subject> <predicate> [<object>]",
)?;
let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
Ok((input, Statement::Fact(a)))
}
fn stmt_assume<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
let (input, _) = (tag(kw::ASSUME), space1).parse(input)?;
let at = input;
let (input, lit) = promote(
literal(input),
at,
"ASSUME expects an atom: [NOT] <Subject> <predicate> [<object>]",
)?;
let (input, _) = promote(eol(input), input, "unexpected text after the ASSUME atom")?;
Ok((input, Statement::Assume(lit)))
}
fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
let (input, _) = (tag(kw::NOT), space1).parse(input)?;
let at = input;
let (input, a) = promote(
atom(input),
at,
"NOT expects an atom: <Subject> <predicate> [<object>]",
)?;
let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
Ok((input, Statement::Negation(a)))
}
fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
let (input, _) = tag(kw::CHECK).parse(input)?;
let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
let (input, bidir) = opt(preceded(space1, tag(kw::BIDIRECTIONAL))).parse(input)?;
let (input, _) = eol(input)?;
Ok((
input,
Statement::Check {
subject,
bidirectional: bidir.is_some(),
},
))
}
fn stmt_premise<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
let (input, _) = (tag(kw::PREMISE), space1).parse(input)?;
let at = input;
let (input, name) = promote(
identifier(input),
at,
"expected a premise name (a lowercase identifier)",
)?;
let (input, _) = space0(input)?;
let (input, _) = promote(
char(':').parse(input),
input,
"expected ':' after the premise name",
)?;
let (input, _) = promote(eol(input), input, "unexpected text after 'PREMISE <name>:'")?;
let at = input;
let (input, body) = promote(
alt((list_body, impl_body)).parse(input),
at,
"a premise body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST) or WHEN ... THEN",
)?;
Ok((input, Statement::Premise { name, body }))
}
fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
let (input, _) = (tag(kw::RULE), space1).parse(input)?;
let at = input;
let (input, name) = promote(
identifier(input),
at,
"expected a rule name (a lowercase identifier)",
)?;
let (input, _) = space0(input)?;
let (input, _) = promote(
char(':').parse(input),
input,
"expected ':' after the rule name",
)?;
let (input, _) = promote(eol(input), input, "unexpected text after 'RULE <name>:'")?;
let at = input;
let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
Ok((input, Statement::Rule { name, body }))
}
fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
let (input, _) = space0(input)?;
alt((
stmt_domain,
stmt_import,
stmt_fact,
stmt_assume,
stmt_premise,
stmt_rule,
stmt_check,
stmt_negation,
))
.parse(input)
}
const NOT_A_STATEMENT: &str = "expected a statement — a line must start with DOMAIN, FACT, NOT, ASSUME, PREMISE, RULE, CHECK, or IMPORT";
pub fn parse(src: &str) -> Result<Program<'_>, Diagnostics> {
let mut input = Span::new(src);
let mut statements = Vec::new();
let mut errors: Vec<Diagnostic> = Vec::new();
loop {
if let Ok((rest, _)) = skip_noise(input) {
input = rest;
}
if at_end(input.fragment()) {
break;
}
match statement(input) {
Ok((rest, stmt)) => {
statements.push(stmt);
input = rest;
}
Err(nom::Err::Failure(p)) => {
errors.push(make_diag(src, p.input, p.message, false));
input = resync(p.input);
}
Err(nom::Err::Error(p)) => {
errors.push(make_diag(src, p.input, String::from(NOT_A_STATEMENT), true));
input = resync(p.input);
}
Err(nom::Err::Incomplete(_)) => break,
}
}
if errors.is_empty() {
Ok(Program { statements })
} else {
Err(Diagnostics { file: None, errors })
}
}
fn at_end(frag: &str) -> bool {
let t = frag.trim_start();
t.is_empty() || (t.starts_with("//") && !t.contains('\n'))
}
fn make_diag(src: &str, at: Span<'_>, message: String, general: bool) -> Diagnostic {
let line = at.location_line() as usize;
let col = at.get_column();
let line_text = src.lines().nth(line.saturating_sub(1)).unwrap_or("");
let keyword = if general { None } else { keyword_in(&message) };
Diagnostic {
line,
col,
width: caret_width(line_text, col),
message,
keyword,
general,
line_text: line_text.to_string(),
}
}
fn caret_width(line_text: &str, col: usize) -> usize {
let start = col.saturating_sub(1);
let trimmed_len = line_text.trim_end().chars().count();
trimmed_len.saturating_sub(start).max(1)
}
fn resync(at: Span<'_>) -> Span<'_> {
let mut input = consume_line(at);
loop {
if input.fragment().is_empty() || starts_top_level(input) {
return input;
}
input = consume_line(input);
}
}
fn consume_line(input: Span<'_>) -> Span<'_> {
let parsed: PResult<'_, Span<'_>> =
recognize((take_while(|c| c != '\n' && c != '\r'), opt(line_ending))).parse(input);
parsed.map(|(rest, _)| rest).unwrap_or(input)
}
fn starts_top_level(input: Span<'_>) -> bool {
let after = match space0::<_, Problem<'_>>(input) {
Ok((rest, _)) => rest,
Err(_) => return false,
};
let word: String = after
.fragment()
.chars()
.take_while(|c| c.is_ascii_uppercase())
.collect();
is_top_level(&word)
}