Skip to main content

elenchus_parser/
lib.rs

1//! elenchus-parser — parses the English-like elenchus DSL into an AST.
2//!
3//! Style mirrors `vsm-parser`: zero-copy over `&str`, `nom` + `nom_locate`
4//! for line/column tracking, and a human-friendly `^--- here` error display.
5//! Syntax is line/keyword-oriented (not S-expressions) so small models cannot
6//! trip on parentheses or indentation.
7//!
8//! Grammar (see docs/SPEC.md, "Grammar (EBNF)"):
9//! - statements are newline-terminated; indentation is cosmetic, not significant;
10//! - keywords are ALWAYS CAPS (ASCII); identifiers are content (case-sensitive,
11//!   verbatim, any-script letters — e.g. `условие`, `名前`);
12//! - block boundaries (PREMISE/RULE bodies) are found by keywords, never by indent.
13//!
14//! # Example
15//!
16//! ```
17//! use elenchus_parser::{Statement, parse};
18//!
19//! // One statement per line; the result is a flat list of `Statement`s.
20//! let program = parse("FACT socrates is human\nCHECK socrates\n").unwrap();
21//! assert_eq!(program.statements.len(), 2);
22//! assert!(matches!(program.statements[0], Statement::Fact(_)));
23//! ```
24#![no_std]
25// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
26#![warn(missing_docs)]
27
28extern crate alloc;
29
30use alloc::string::String;
31use alloc::vec;
32use alloc::vec::Vec;
33use core::fmt;
34
35use nom::{
36    IResult, Parser,
37    branch::alt,
38    bytes::complete::{tag, take_while},
39    character::complete::{char, line_ending, satisfy, space0, space1},
40    combinator::{eof, opt, recognize, value},
41    multi::many0,
42    sequence::{delimited, preceded, terminated},
43};
44use nom_locate::LocatedSpan;
45
46/// Source code fragment with line and column tracking.
47pub type Span<'a> = LocatedSpan<&'a str>;
48
49/// Container for data associated with its source location.
50#[derive(Debug, Clone, PartialEq)]
51pub struct Located<'a, T> {
52    /// The actual parsed data.
53    pub data: T,
54    /// The location in the source text (start of the construct).
55    pub span: Span<'a>,
56}
57
58// --- AST -------------------------------------------------------------------
59
60/// An atom is the triple `(subject, predicate, object?)` — the unit of identity.
61/// `Creature.A has flying` and `Creature.A has swimming` are DIFFERENT atoms.
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct Atom<'a> {
64    /// The entity the claim is about, e.g. `Creature.A` or `Motor`.
65    pub subject: &'a str,
66    /// The relation or property asserted, e.g. `has` or `over_100`.
67    pub predicate: &'a str,
68    /// Optional value the predicate relates the subject to, e.g. `flying`.
69    /// `None` for two-word atoms such as `Motor over_100`. The object is part of
70    /// identity: `has flying` and `has swimming` are different atoms.
71    pub object: Option<&'a str>,
72}
73
74/// A literal is an atom, optionally negated (`NOT ...`).
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct Literal<'a> {
77    /// `true` when written with a leading `NOT` (asserts the atom is FALSE).
78    pub negated: bool,
79    /// The underlying atom being asserted true or false.
80    pub atom: Atom<'a>,
81}
82
83/// List constraint operators (body of a list-style `PREMISE`).
84///
85/// These are surface sugar; the compiler desugars each to `Impossible` clauses
86/// (see `elenchus-compiler`). The meanings below are *what the author asserts*.
87#[derive(Debug, Clone, Copy, PartialEq, Eq)]
88pub enum ListOp {
89    /// `EXCLUSIVE` — at most one of the listed atoms may be TRUE (mutual
90    /// exclusion). For n > 2 this is pairwise, not "not all at once".
91    Exclusive,
92    /// `FORBIDS` — at most one may be TRUE; a synonym of [`ListOp::Exclusive`]
93    /// (same pairwise expansion), kept as a separate word for readability.
94    Forbids,
95    /// `ONEOF` — exactly one is TRUE: at-most-one (pairwise) plus at-least-one.
96    OneOf,
97    /// `ATLEAST` — at least one of the listed atoms is TRUE.
98    AtLeast,
99}
100
101/// How the literals in a `WHEN`/`THEN` group combine. A single-literal group is
102/// always [`Conn::And`] (the connective is irrelevant with one literal).
103#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104pub enum Conn {
105    /// Continuation lines used `AND` — all literals must hold.
106    And,
107    /// Continuation lines used `OR` — at least one literal must hold.
108    Or,
109}
110
111/// The body of an `PREMISE` or `RULE`.
112#[derive(Debug, Clone, PartialEq)]
113pub enum Body<'a> {
114    /// `EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST` over >= 2 atoms.
115    List {
116        /// Which list constraint this is.
117        op: ListOp,
118        /// The atoms it ranges over (the parser guarantees at least two).
119        atoms: Vec<Located<'a, Atom<'a>>>,
120    },
121    /// `WHEN ... [AND|OR ...] THEN ... [AND|OR ...]` — antecedent + consequent.
122    /// Within one group the continuation keyword is uniform (no mixing `AND`/`OR`).
123    Impl {
124        /// `WHEN`/`AND`/`OR` conditions.
125        antecedent: Vec<Located<'a, Literal<'a>>>,
126        /// How the antecedent literals combine.
127        ante_conn: Conn,
128        /// `THEN`/`AND`/`OR` results that follow when the antecedent holds.
129        consequent: Vec<Located<'a, Literal<'a>>>,
130        /// How the consequent literals combine.
131        cons_conn: Conn,
132    },
133}
134
135/// A top-level statement.
136#[derive(Debug, Clone, PartialEq)]
137pub enum Statement<'a> {
138    /// `IMPORT "path"` — reuse another source (resolved by the compiler).
139    Import(Located<'a, &'a str>),
140    /// `FACT <atom>` — a TRUE assertion.
141    Fact(Located<'a, Atom<'a>>),
142    /// `NOT <atom>` — a FALSE assertion.
143    Negation(Located<'a, Atom<'a>>),
144    /// `ASSUME [NOT] <atom>` — a *soft* (retractable) assertion. Same shape as a
145    /// `FACT`/`NOT`, but it is a hypothesis, not a commitment: when the
146    /// assumptions cannot all hold the solver names which to drop, and it never
147    /// blames a `FACT`/`PREMISE`. The `Literal` carries the optional `NOT`.
148    Assume(Located<'a, Literal<'a>>),
149    /// `PREMISE <name>: ...` — a checked first principle.
150    Premise {
151        /// The premise's label (a per-source name, not a global identifier).
152        name: Located<'a, &'a str>,
153        /// The constraint itself: a list body or a `WHEN … THEN` implication.
154        body: Body<'a>,
155    },
156    /// `RULE <name>: ...` — a fact-producing inference rule.
157    Rule {
158        /// The rule's label.
159        name: Located<'a, &'a str>,
160        /// Always an implication body (the grammar forbids a list body here).
161        body: Body<'a>,
162    },
163    /// `CHECK [subject] [BIDIRECTIONAL]` — a query.
164    Check {
165        /// Restrict the report to this subject; `None` checks everything.
166        subject: Option<Located<'a, &'a str>>,
167        /// `true` enables the backward (all-SAT) pass for UNDERDETERMINED.
168        bidirectional: bool,
169    },
170}
171
172/// A parsed program: a flat sequence of statements.
173#[derive(Debug, Clone, PartialEq)]
174pub struct Program<'a> {
175    /// Top-level statements in source order. The list is flat: PREMISE/RULE bodies
176    /// live inside their [`Statement`], not as separate entries.
177    pub statements: Vec<Statement<'a>>,
178}
179
180/// Reserved words — always CAPS, in full. An identifier may not equal any of these.
181pub const RESERVED: &[&str] = &[
182    "IMPORT",
183    "FACT",
184    "NOT",
185    "ASSUME",
186    "PREMISE",
187    "RULE",
188    "CHECK",
189    "BIDIRECTIONAL",
190    "WHEN",
191    "AND",
192    "OR",
193    "THEN",
194    "EXCLUSIVE",
195    "FORBIDS",
196    "ONEOF",
197    "ATLEAST",
198];
199
200/// Whether `word` is a reserved keyword.
201pub fn is_reserved(word: &str) -> bool {
202    RESERVED.contains(&word)
203}
204
205// --- Error -----------------------------------------------------------------
206
207/// A friendly error structure that can be displayed to the user.
208#[derive(Debug)]
209pub struct ParseError<'a> {
210    /// The original full input string (needed for context).
211    pub source: &'a str,
212    /// The span where the error occurred.
213    pub span: Span<'a>,
214    /// A description of the error.
215    pub message: String,
216}
217
218impl<'a> fmt::Display for ParseError<'a> {
219    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
220        let line = self.span.location_line() as usize;
221        let column = self.span.get_column();
222        let full_line = self
223            .source
224            .lines()
225            .nth(line.saturating_sub(1))
226            .unwrap_or("");
227        let indent = " ".repeat(if column > 0 { column - 1 } else { 0 });
228
229        write!(
230            f,
231            "Syntax Error at line {}, col {}: {}\n  | {}\n  | {}^--- here",
232            line, column, self.message, full_line, indent
233        )
234    }
235}
236
237// --- Parser primitives -----------------------------------------------------
238
239/// Internal nom error carrying a human message and the precise failure span.
240/// Recoverable failures are `nom::Err::Error`; once a statement has committed
241/// (its leading keyword is recognized), failures are promoted to
242/// `nom::Err::Failure` so the error points at the *real* problem with a specific
243/// message instead of backtracking to a generic "expected a statement".
244#[derive(Debug, Clone)]
245struct Problem<'a> {
246    input: Span<'a>,
247    message: String,
248}
249
250impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
251    fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
252        Problem {
253            input,
254            message: String::from("unexpected token"),
255        }
256    }
257    fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
258        other
259    }
260}
261
262/// nom result specialized to our [`Problem`] error and located `Span` input.
263type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
264
265/// Turn a recoverable `Error` into a committed `Failure` at `at` with `msg`.
266/// Already-`Failure` (a deeper, more specific message) and `Ok` pass through.
267fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
268    match r {
269        Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
270            input: at,
271            message: String::from(msg),
272        })),
273        other => other,
274    }
275}
276
277/// A plain recoverable `Error` at `input` (lets an enclosing `alt` backtrack).
278fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
279    Err(nom::Err::Error(Problem {
280        input,
281        message: String::from("unexpected token"),
282    }))
283}
284
285/// Characters allowed *after* the first in an identifier. Letters and digits of
286/// *any* script are accepted (Unicode `is_alphanumeric`), so `условие` or `名前`
287/// are valid; `_` joins multi-word names and `.` makes dotted subjects like
288/// `Creature.A` a single token. Punctuation and other symbols are rejected.
289fn is_ident_char(c: char) -> bool {
290    c.is_alphanumeric() || c == '_' || c == '.'
291}
292
293/// A bare identifier (does not reject reserved words). The first character must
294/// be a letter of any script (`is_alphabetic`) — never a digit, `_`, `.`, or
295/// punctuation — so identifiers stay distinct from numbers and operators.
296fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
297    recognize((satisfy(|c| c.is_alphabetic()), take_while(is_ident_char))).parse(input)
298}
299
300/// An identifier that is not a reserved keyword.
301fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
302    let start = input;
303    let (rest, sp) = raw_identifier(input)?;
304    if is_reserved(sp.fragment()) {
305        return perr(start);
306    }
307    Ok((
308        rest,
309        Located {
310            data: *sp.fragment(),
311            span: start,
312        },
313    ))
314}
315
316/// A line comment `// ... ` up to (but not including) the line ending.
317fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
318    recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
319}
320
321/// End of a statement line: trailing spaces, an optional trailing comment,
322/// then a line ending or EOF.
323fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
324    value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
325}
326
327/// A blank line or a full-line comment (must consume a line ending to progress).
328fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
329    value((), (space0, opt(comment), line_ending)).parse(input)
330}
331
332/// Skip any number of blank / comment lines between statements.
333fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
334    value((), many0(noise_line)).parse(input)
335}
336
337// --- Atoms & literals ------------------------------------------------------
338
339/// `<subject> <predicate> [<object>]` — two or three space-separated identifiers.
340fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
341    let start = input;
342    let (input, subject) = identifier(input)?;
343    let (input, _) = space1(input)?;
344    let (input, predicate) = identifier(input)?;
345    let (input, object) = opt(preceded(space1, identifier)).parse(input)?;
346    Ok((
347        input,
348        Located {
349            data: Atom {
350                subject: subject.data,
351                predicate: predicate.data,
352                object: object.map(|o| o.data),
353            },
354            span: start,
355        },
356    ))
357}
358
359/// An optionally `NOT`-prefixed [`atom`] — a literal inside a `WHEN`/`THEN` body.
360fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
361    let start = input;
362    let (input, neg) = opt(terminated(tag("NOT"), space1)).parse(input)?;
363    let (input, a) = atom(input)?;
364    Ok((
365        input,
366        Located {
367            data: Literal {
368                negated: neg.is_some(),
369                atom: a.data,
370            },
371            span: start,
372        },
373    ))
374}
375
376/// An atom on its own (possibly indented) line: used inside list bodies.
377fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
378    let (input, _) = space0(input)?;
379    let (input, a) = atom(input)?;
380    let (input, _) = eol(input)?;
381    Ok((input, a))
382}
383
384// --- Bodies ----------------------------------------------------------------
385
386/// One of the list-constraint keywords (`EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST`).
387fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
388    alt((
389        value(ListOp::Exclusive, tag("EXCLUSIVE")),
390        value(ListOp::Forbids, tag("FORBIDS")),
391        value(ListOp::OneOf, tag("ONEOF")),
392        value(ListOp::AtLeast, tag("ATLEAST")),
393    ))
394    .parse(input)
395}
396
397/// A list-constraint body: a list operator on its own line, then one atom per
398/// line (at least two).
399///
400/// Commit strategy: the leading `list_op` failing stays a recoverable `Error`
401/// so the `PREMISE` `alt` can fall through and try [`impl_body`] instead. *Once*
402/// the operator matched we are committed, so every subsequent failure is
403/// [`promote`]d to a `Failure` with a specific message — no backtracking to a
404/// generic "expected a statement".
405fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
406    let (input, _) = space0(input)?;
407    // list_op failing stays Error so the PREMISE alt can try impl_body.
408    let (input, op) = list_op(input)?;
409    // Past here we are committed to a list body.
410    let (input, _) = promote(
411        eol(input),
412        input,
413        "expected a newline after the list operator",
414    )?;
415    let at = input;
416    let (input, first) = promote(
417        atom_line(input),
418        at,
419        "a list premise needs at least two atoms",
420    )?;
421    let at = input;
422    let (input, second) = promote(
423        atom_line(input),
424        at,
425        "a list premise needs at least two atoms",
426    )?;
427    let (input, rest) = many0(atom_line).parse(input)?;
428
429    let mut atoms = vec![first, second];
430    atoms.extend(rest);
431    Ok((input, Body::List { op, atoms }))
432}
433
434/// A continuation `AND <literal>` / `OR <literal>` line inside a `WHEN`/`THEN`
435/// block. Returns `(Conn, literal)`. A line that is neither `AND` nor `OR` yields
436/// a recoverable `Error` so `many0` stops cleanly (e.g. at `THEN`/EOF).
437fn cont_line<'a>(input: Span<'a>) -> PResult<'a, (Conn, Located<'a, Literal<'a>>)> {
438    let (input, _) = space0(input)?;
439    // Not an AND/OR line → Error so many0 stops cleanly.
440    let (input, conn) =
441        alt((value(Conn::And, tag("AND")), value(Conn::Or, tag("OR")))).parse(input)?;
442    let (input, _) = space1(input)?;
443    let at = input;
444    let (input, lit) = promote(
445        literal(input),
446        at,
447        "AND/OR expects a literal: [NOT] <Subject> <predicate> [<object>]",
448    )?;
449    let (input, _) = promote(
450        eol(input),
451        input,
452        "unexpected text after the AND/OR literal",
453    )?;
454    Ok((input, (conn, lit)))
455}
456
457/// Reduce a group's continuation lines to a single [`Conn`], rejecting a mix of
458/// `AND` and `OR` in one group (point the error at the first line that switches).
459fn group_conn<'a>(conts: &[(Conn, Located<'a, Literal<'a>>)]) -> Result<Conn, Span<'a>> {
460    let mut seen: Option<Conn> = None;
461    for (conn, lit) in conts {
462        match seen {
463            None => seen = Some(*conn),
464            Some(s) if s != *conn => return Err(lit.span),
465            _ => {}
466        }
467    }
468    Ok(seen.unwrap_or(Conn::And))
469}
470
471/// Fail with a committed message at `at` (for in-body semantic checks).
472fn fail_at<'a, T>(at: Span<'a>, msg: &str) -> PResult<'a, T> {
473    Err(nom::Err::Failure(Problem {
474        input: at,
475        message: String::from(msg),
476    }))
477}
478
479/// An implication body: `WHEN <lit> [AND <lit>]* THEN <lit> [AND <lit>]*`.
480///
481/// Like [`list_body`], a missing leading `WHEN` stays a recoverable `Error` (so
482/// the `PREMISE` `alt` can try a list body); after `WHEN` matches we are committed
483/// and use [`promote`] for precise errors. Antecedent and consequent each start
484/// with one mandatory literal followed by zero or more `AND` lines.
485fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
486    let (input, _) = space0(input)?;
487    // No WHEN → Error so the PREMISE alt can fall through to a list body.
488    let (input, _) = (tag("WHEN"), space1).parse(input)?;
489    // Committed to an implication body now.
490    let at = input;
491    let (input, when) = promote(
492        literal(input),
493        at,
494        "WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
495    )?;
496    let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
497    let (input, ante_rest) = many0(cont_line).parse(input)?;
498    let ante_conn = match group_conn(&ante_rest) {
499        Ok(c) => c,
500        Err(span) => {
501            return fail_at(
502                span,
503                "don't mix AND and OR in one WHEN group — split it into separate premises",
504            );
505        }
506    };
507
508    let (input, _) = space0(input)?;
509    let at = input;
510    let (input, _) = promote(
511        tag("THEN").parse(input),
512        at,
513        "expected THEN to complete the WHEN ... THEN implication",
514    )?;
515    let at = input;
516    let (input, then) = promote(
517        preceded(space1, literal).parse(input),
518        at,
519        "THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
520    )?;
521    let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
522    let (input, cons_rest) = many0(cont_line).parse(input)?;
523    let cons_conn = match group_conn(&cons_rest) {
524        Ok(c) => c,
525        Err(span) => {
526            return fail_at(
527                span,
528                "don't mix AND and OR in one THEN group — split it into separate premises",
529            );
530        }
531    };
532
533    let mut antecedent = vec![when];
534    antecedent.extend(ante_rest.into_iter().map(|(_, l)| l));
535    let mut consequent = vec![then];
536    consequent.extend(cons_rest.into_iter().map(|(_, l)| l));
537    Ok((
538        input,
539        Body::Impl {
540            antecedent,
541            ante_conn,
542            consequent,
543            cons_conn,
544        },
545    ))
546}
547
548// --- Statements ------------------------------------------------------------
549
550/// `IMPORT "<path>"` — a quoted path on one line.
551fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
552    let (input, _) = (tag("IMPORT"), space1).parse(input)?;
553    let start = input;
554    let (input, path) = promote(
555        delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
556        start,
557        "IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
558    )?;
559    let (input, _) = promote(eol(input), input, "unexpected text after the IMPORT path")?;
560    Ok((
561        input,
562        Statement::Import(Located {
563            data: *path.fragment(),
564            span: start,
565        }),
566    ))
567}
568
569/// `FACT <atom>` — a TRUE assertion.
570fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
571    let (input, _) = (tag("FACT"), space1).parse(input)?;
572    let at = input;
573    let (input, a) = promote(
574        atom(input),
575        at,
576        "FACT expects an atom: <Subject> <predicate> [<object>]",
577    )?;
578    let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
579    Ok((input, Statement::Fact(a)))
580}
581
582/// `ASSUME [NOT] <atom>` — a soft (retractable) assertion. Accepts a leading
583/// `NOT` (like a `WHEN`/`THEN` literal), so `ASSUME NOT x a` is FALSE-by-default.
584fn stmt_assume<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
585    let (input, _) = (tag("ASSUME"), space1).parse(input)?;
586    let at = input;
587    let (input, lit) = promote(
588        literal(input),
589        at,
590        "ASSUME expects an atom: [NOT] <Subject> <predicate> [<object>]",
591    )?;
592    let (input, _) = promote(eol(input), input, "unexpected text after the ASSUME atom")?;
593    Ok((input, Statement::Assume(lit)))
594}
595
596/// `NOT <atom>` — a FALSE assertion. Tried last among statements so a body-level
597/// `NOT` literal is never mistaken for a top-level negation.
598fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
599    let (input, _) = (tag("NOT"), space1).parse(input)?;
600    let at = input;
601    let (input, a) = promote(
602        atom(input),
603        at,
604        "NOT expects an atom: <Subject> <predicate> [<object>]",
605    )?;
606    let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
607    Ok((input, Statement::Negation(a)))
608}
609
610/// `CHECK [<subject>] [BIDIRECTIONAL]` — both modifiers optional.
611fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
612    let (input, _) = tag("CHECK").parse(input)?;
613    let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
614    let (input, bidir) = opt(preceded(space1, tag("BIDIRECTIONAL"))).parse(input)?;
615    let (input, _) = eol(input)?;
616    Ok((
617        input,
618        Statement::Check {
619            subject,
620            bidirectional: bidir.is_some(),
621        },
622    ))
623}
624
625/// `PREMISE <name>: <body>` where the body is a list or an implication.
626fn stmt_premise<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
627    let (input, _) = (tag("PREMISE"), space1).parse(input)?;
628    // Committed to a premise now.
629    let at = input;
630    let (input, name) = promote(
631        identifier(input),
632        at,
633        "expected a premise name (a lowercase identifier)",
634    )?;
635    let (input, _) = space0(input)?;
636    let (input, _) = promote(
637        char(':').parse(input),
638        input,
639        "expected ':' after the premise name",
640    )?;
641    let (input, _) = promote(eol(input), input, "unexpected text after 'PREMISE <name>:'")?;
642    let at = input;
643    let (input, body) = promote(
644        alt((list_body, impl_body)).parse(input),
645        at,
646        "a premise body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST) or WHEN ... THEN",
647    )?;
648    Ok((input, Statement::Premise { name, body }))
649}
650
651/// `RULE <name>: <implication>` — like a premise but the body must be `WHEN … THEN`.
652fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
653    let (input, _) = (tag("RULE"), space1).parse(input)?;
654    let at = input;
655    let (input, name) = promote(
656        identifier(input),
657        at,
658        "expected a rule name (a lowercase identifier)",
659    )?;
660    let (input, _) = space0(input)?;
661    let (input, _) = promote(
662        char(':').parse(input),
663        input,
664        "expected ':' after the rule name",
665    )?;
666    let (input, _) = promote(eol(input), input, "unexpected text after 'RULE <name>:'")?;
667    let at = input;
668    let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
669    Ok((input, Statement::Rule { name, body }))
670}
671
672/// One top-level statement. Order matters: each branch commits on its keyword,
673/// and `stmt_negation` comes last so a leading `NOT` is only read as a top-level
674/// negation when nothing else matched.
675fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
676    // Leading indentation is cosmetic everywhere, including on the top-level
677    // keyword line — so a whole program may be written indented (e.g. inside a
678    // host's here-doc) and parse identically.
679    let (input, _) = space0(input)?;
680    alt((
681        stmt_import,
682        stmt_fact,
683        stmt_assume,
684        stmt_premise,
685        stmt_rule,
686        stmt_check,
687        stmt_negation,
688    ))
689    .parse(input)
690}
691
692/// The whole program: leading noise, then statements each followed by noise
693/// (blank/comment lines). Stops at the first byte that is not a valid statement;
694/// [`parse`] then checks the tail is empty and otherwise reports an error there.
695fn program<'a>(input: Span<'a>) -> PResult<'a, Vec<Statement<'a>>> {
696    let (input, _) = skip_noise(input)?;
697    many0(terminated(statement, skip_noise)).parse(input)
698}
699
700/// Parse a full `.vrf` source into a [`Program`].
701///
702/// On error, returns a [`ParseError`] whose `Display` shows the offending line
703/// with a `^--- here` caret, mirroring vsm-parser.
704pub fn parse(src: &str) -> Result<Program<'_>, ParseError<'_>> {
705    let input = Span::new(src);
706    match program(input) {
707        Ok((rest, statements)) => {
708            if !trailing_is_empty(rest.fragment()) {
709                return Err(ParseError {
710                    source: src,
711                    span: rest,
712                    message: String::from(
713                        "expected a statement (IMPORT/FACT/NOT/ASSUME/PREMISE/RULE/CHECK)",
714                    ),
715                });
716            }
717            Ok(Program { statements })
718        }
719        Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => Err(ParseError {
720            source: src,
721            span: e.input,
722            message: e.message,
723        }),
724        Err(nom::Err::Incomplete(_)) => Err(ParseError {
725            source: src,
726            span: input,
727            message: String::from("incomplete input"),
728        }),
729    }
730}
731
732/// True if the unparsed tail is only whitespace and trailing comments.
733fn trailing_is_empty(tail: &str) -> bool {
734    for raw in tail.lines() {
735        let t = raw.trim();
736        if t.is_empty() || t.starts_with("//") {
737            continue;
738        }
739        return false;
740    }
741    true
742}
743
744#[cfg(test)]
745mod tests {
746    use super::*;
747    use alloc::format;
748
749    fn prog(src: &str) -> Program<'_> {
750        parse(src).expect("should parse")
751    }
752
753    /// `(subject, predicate, object?)` of one atom, borrowed from the source.
754    type AtomShape<'a> = (&'a str, &'a str, Option<&'a str>);
755    /// A list premise flattened to `(operator, its atoms)`.
756    type ListShape<'a> = (ListOp, Vec<AtomShape<'a>>);
757
758    /// Atom data flattened to owned tuples — span-independent, for structural
759    /// comparison (spans differ by offset, which is exactly what "cosmetic" means).
760    fn atom_shapes<'a>(p: &Program<'a>) -> Vec<ListShape<'a>> {
761        p.statements
762            .iter()
763            .filter_map(|s| match s {
764                Statement::Premise {
765                    body: Body::List { op, atoms },
766                    ..
767                } => Some((
768                    *op,
769                    atoms
770                        .iter()
771                        .map(|a| (a.data.subject, a.data.predicate, a.data.object))
772                        .collect(),
773                )),
774                _ => None,
775            })
776            .collect()
777    }
778
779    #[test]
780    fn parses_fact_and_negation() {
781        let p = prog(
782            r#"
783        FACT Creature.A has flying
784        NOT Creature.A has cold_blood
785        "#,
786        );
787        assert_eq!(p.statements.len(), 2);
788        match &p.statements[0] {
789            Statement::Fact(a) => {
790                assert_eq!(a.data.subject, "Creature.A");
791                assert_eq!(a.data.predicate, "has");
792                assert_eq!(a.data.object, Some("flying"));
793            }
794            other => panic!("expected fact, got {:?}", other),
795        }
796        match &p.statements[1] {
797            Statement::Negation(a) => {
798                assert_eq!(a.data.object, Some("cold_blood"));
799            }
800            other => panic!("expected negation, got {:?}", other),
801        }
802    }
803
804    #[test]
805    fn fact_without_object() {
806        let p = prog("FACT Motor over_100\n");
807        match &p.statements[0] {
808            Statement::Fact(a) => {
809                assert_eq!(a.data.subject, "Motor");
810                assert_eq!(a.data.predicate, "over_100");
811                assert_eq!(a.data.object, None);
812            }
813            other => panic!("expected fact, got {:?}", other),
814        }
815    }
816
817    #[test]
818    fn parses_assume_positive_and_negated() {
819        let p = prog(
820            r#"
821        ASSUME rel in_prod
822        ASSUME NOT rel has_rollback
823        "#,
824        );
825        assert_eq!(p.statements.len(), 2);
826        match &p.statements[0] {
827            Statement::Assume(l) => {
828                assert!(!l.data.negated);
829                assert_eq!(l.data.atom.subject, "rel");
830                assert_eq!(l.data.atom.predicate, "in_prod");
831                assert_eq!(l.data.atom.object, None);
832            }
833            other => panic!("expected assume, got {:?}", other),
834        }
835        match &p.statements[1] {
836            Statement::Assume(l) => {
837                assert!(l.data.negated);
838                assert_eq!(l.data.atom.predicate, "has_rollback");
839            }
840            other => panic!("expected negated assume, got {:?}", other),
841        }
842    }
843
844    #[test]
845    fn assume_is_a_reserved_word() {
846        assert!(parse("FACT ASSUME has x\n").is_err());
847    }
848
849    #[test]
850    fn parses_import() {
851        let p = prog("IMPORT \"physics.vrf\"\n");
852        match &p.statements[0] {
853            Statement::Import(path) => assert_eq!(path.data, "physics.vrf"),
854            other => panic!("expected import, got {:?}", other),
855        }
856    }
857
858    #[test]
859    fn parses_exclusive_premise() {
860        let src = r#"
861        PREMISE fly_xor_swim:
862            EXCLUSIVE
863                Creature.A has flying
864                Creature.A has swimming
865        "#;
866        let p = prog(src);
867        match &p.statements[0] {
868            Statement::Premise { name, body } => {
869                assert_eq!(name.data, "fly_xor_swim");
870                match body {
871                    Body::List { op, atoms } => {
872                        assert_eq!(*op, ListOp::Exclusive);
873                        assert_eq!(atoms.len(), 2);
874                        assert_eq!(atoms[1].data.object, Some("swimming"));
875                    }
876                    other => panic!("expected list body, got {:?}", other),
877                }
878            }
879            other => panic!("expected premise, got {:?}", other),
880        }
881    }
882
883    #[test]
884    fn parses_implication_premise_with_and() {
885        let src = r#"
886        PREMISE wings_need_bone:
887            WHEN Creature.A has flying
888            THEN Creature.A has wing
889            AND  Creature.A has bone
890        "#;
891        let p = prog(src);
892        match &p.statements[0] {
893            Statement::Premise {
894                body:
895                    Body::Impl {
896                        antecedent,
897                        consequent,
898                        ..
899                    },
900                ..
901            } => {
902                assert_eq!(antecedent.len(), 1);
903                assert_eq!(antecedent[0].data.atom.object, Some("flying"));
904                assert_eq!(consequent.len(), 2);
905                assert_eq!(consequent[0].data.atom.object, Some("wing"));
906                assert_eq!(consequent[1].data.atom.object, Some("bone"));
907            }
908            other => panic!("expected impl premise, got {:?}", other),
909        }
910    }
911
912    #[test]
913    fn antecedent_and_goes_before_then() {
914        let src = r#"
915        PREMISE deploy:
916            WHEN s tested
917            AND s reviewed
918            THEN s can_deploy
919        "#;
920        let p = prog(src);
921        match &p.statements[0] {
922            Statement::Premise {
923                body:
924                    Body::Impl {
925                        antecedent,
926                        consequent,
927                        ..
928                    },
929                ..
930            } => {
931                assert_eq!(antecedent.len(), 2);
932                assert_eq!(consequent.len(), 1);
933            }
934            other => panic!("unexpected: {:?}", other),
935        }
936    }
937
938    #[test]
939    fn when_or_sets_disjunctive_antecedent() {
940        let src = r#"
941        PREMISE p:
942            WHEN x a
943            OR x b
944            THEN x c
945        "#;
946        match &prog(src).statements[0] {
947            Statement::Premise {
948                body:
949                    Body::Impl {
950                        antecedent,
951                        ante_conn,
952                        consequent,
953                        cons_conn,
954                    },
955                ..
956            } => {
957                assert_eq!(antecedent.len(), 2);
958                assert_eq!(*ante_conn, Conn::Or);
959                assert_eq!(consequent.len(), 1);
960                assert_eq!(*cons_conn, Conn::And); // single consequent → AND
961            }
962            other => panic!("expected impl premise, got {:?}", other),
963        }
964    }
965
966    #[test]
967    fn then_or_sets_disjunctive_consequent() {
968        let src = r#"
969        PREMISE p:
970            WHEN x a
971            THEN x b
972            OR x c
973        "#;
974        match &prog(src).statements[0] {
975            Statement::Premise {
976                body:
977                    Body::Impl {
978                        consequent,
979                        cons_conn,
980                        ..
981                    },
982                ..
983            } => {
984                assert_eq!(consequent.len(), 2);
985                assert_eq!(*cons_conn, Conn::Or);
986            }
987            other => panic!("expected impl premise, got {:?}", other),
988        }
989    }
990
991    #[test]
992    fn mixing_and_or_in_one_group_is_an_error() {
993        let mixed_when = r#"
994        PREMISE p:
995            WHEN x a
996            AND x b
997            OR x c
998            THEN x d
999        "#;
1000        let mixed_then = r#"
1001        PREMISE p:
1002            WHEN x a
1003            THEN x b
1004            AND x c
1005            OR x d
1006        "#;
1007        assert!(parse(mixed_when).is_err());
1008        assert!(parse(mixed_then).is_err());
1009    }
1010
1011    #[test]
1012    fn or_is_a_reserved_word() {
1013        assert!(parse("FACT OR has x\n").is_err());
1014    }
1015
1016    #[test]
1017    fn parses_negated_literal_in_rule() {
1018        let src = r#"
1019        RULE pick_slow:
1020            WHEN NOT Motor over_100
1021            THEN Motor uses slow_path
1022        "#;
1023        let p = prog(src);
1024        match &p.statements[0] {
1025            Statement::Rule {
1026                body: Body::Impl { antecedent, .. },
1027                ..
1028            } => {
1029                assert!(antecedent[0].data.negated);
1030                assert_eq!(antecedent[0].data.atom.predicate, "over_100");
1031            }
1032            other => panic!("expected rule, got {:?}", other),
1033        }
1034    }
1035
1036    #[test]
1037    fn parses_check_variants() {
1038        let p = prog("CHECK Creature.A BIDIRECTIONAL\n");
1039        match &p.statements[0] {
1040            Statement::Check {
1041                subject,
1042                bidirectional,
1043            } => {
1044                assert_eq!(subject.as_ref().unwrap().data, "Creature.A");
1045                assert!(bidirectional);
1046            }
1047            other => panic!("expected check, got {:?}", other),
1048        }
1049
1050        let p = prog("CHECK\n");
1051        match &p.statements[0] {
1052            Statement::Check {
1053                subject,
1054                bidirectional,
1055            } => {
1056                assert!(subject.is_none());
1057                assert!(!bidirectional);
1058            }
1059            other => panic!("expected check, got {:?}", other),
1060        }
1061    }
1062
1063    #[test]
1064    fn comments_and_blanks_are_ignored() {
1065        let src = "// header\n\nFACT a b   // trailing comment\n\n// tail\n";
1066        let p = prog(src);
1067        assert_eq!(p.statements.len(), 1);
1068    }
1069
1070    #[test]
1071    fn indentation_is_cosmetic() {
1072        let flat = r#"
1073        PREMISE x:
1074        EXCLUSIVE
1075        a b
1076        a c
1077        "#;
1078        let indented = r#"
1079        PREMISE x:
1080                EXCLUSIVE
1081          a b
1082                    a c
1083        "#;
1084        // Spans differ by offset (cosmetic); the parsed structure must be identical.
1085        assert_eq!(atom_shapes(&prog(flat)), atom_shapes(&prog(indented)));
1086    }
1087
1088    #[test]
1089    fn top_level_statements_may_be_indented() {
1090        // Leading indentation on the FACT/PREMISE/CHECK lines themselves is also
1091        // cosmetic (so a whole program can be pasted indented inside a here-doc).
1092        let flat = r#"
1093        FACT x a
1094        NOT x b
1095        CHECK x
1096        "#;
1097        let indented = r#"
1098            FACT x a
1099                NOT x b
1100            CHECK x
1101        "#;
1102        assert_eq!(atom_shapes(&prog(flat)), atom_shapes(&prog(indented)));
1103        assert_eq!(prog(indented).statements.len(), 3);
1104    }
1105
1106    #[test]
1107    fn full_creature_example_parses() {
1108        let src = include_str!("../../../docs/examples/creature.vrf");
1109        let p = prog(src);
1110        // 2 FACT + 3 PREMISE + 1 RULE + 1 CHECK = 7
1111        assert_eq!(p.statements.len(), 7);
1112    }
1113
1114    #[test]
1115    fn import_demo_example_parses() {
1116        let src = include_str!("../../../docs/examples/import-demo.vrf");
1117        let p = prog(src);
1118        assert!(matches!(p.statements[0], Statement::Import(_)));
1119    }
1120
1121    #[test]
1122    fn unicode_identifiers_any_script() {
1123        // Cyrillic subject/predicate/object, mixed with `_` and digits (not first).
1124        let p = prog(
1125            r#"
1126        FACT кот пушистый2
1127        NOT собака has крылья
1128        "#,
1129        );
1130        match &p.statements[0] {
1131            Statement::Fact(a) => {
1132                assert_eq!(a.data.subject, "кот");
1133                assert_eq!(a.data.predicate, "пушистый2");
1134                assert_eq!(a.data.object, None);
1135            }
1136            other => panic!("expected fact, got {:?}", other),
1137        }
1138        match &p.statements[1] {
1139            Statement::Negation(a) => {
1140                assert_eq!(a.data.subject, "собака");
1141                assert_eq!(a.data.object, Some("крылья"));
1142            }
1143            other => panic!("expected negation, got {:?}", other),
1144        }
1145    }
1146
1147    #[test]
1148    fn unicode_premise_name_and_body() {
1149        let src = r#"
1150        PREMISE правило_лая:
1151            WHEN собака has хвост
1152            THEN собака умеет_лаять
1153        "#;
1154        match &prog(src).statements[0] {
1155            Statement::Premise { name, body } => {
1156                assert_eq!(name.data, "правило_лая");
1157                match body {
1158                    Body::Impl {
1159                        antecedent,
1160                        consequent,
1161                        ..
1162                    } => {
1163                        assert_eq!(antecedent[0].data.atom.subject, "собака");
1164                        assert_eq!(consequent[0].data.atom.subject, "собака");
1165                        assert_eq!(consequent[0].data.atom.predicate, "умеет_лаять");
1166                    }
1167                    other => panic!("expected impl body, got {:?}", other),
1168                }
1169            }
1170            other => panic!("expected premise, got {:?}", other),
1171        }
1172    }
1173
1174    #[test]
1175    fn identifier_cannot_start_with_digit() {
1176        // `2cats` is not a valid subject — first char must be a letter.
1177        assert!(parse("FACT 2cats has fur\n").is_err());
1178    }
1179
1180    #[test]
1181    fn punctuation_is_rejected_in_identifier() {
1182        // `!` and other symbols are not identifier characters.
1183        assert!(parse("FACT cat! has fur\n").is_err());
1184    }
1185
1186    #[test]
1187    fn reserved_word_cannot_be_identifier() {
1188        // `WHEN` as a subject is illegal.
1189        assert!(parse("FACT WHEN has x\n").is_err());
1190    }
1191
1192    #[test]
1193    fn pretty_error_points_at_offending_line() {
1194        let src = r#"FACT a b
1195!garbage here
1196FACT c d
1197"#;
1198        let err = parse(src).expect_err("should fail");
1199        let shown = format!("{}", err);
1200        assert!(shown.contains("Syntax Error"));
1201        assert!(shown.contains("line 2"));
1202        assert!(shown.contains("!garbage here"));
1203        assert!(shown.contains("^--- here"));
1204    }
1205
1206    #[test]
1207    fn crlf_line_endings() {
1208        let p = prog(
1209            r#"
1210        FACT a b
1211        CHECK a
1212        "#,
1213        );
1214        assert_eq!(p.statements.len(), 2);
1215    }
1216
1217    #[test]
1218    fn tabs_as_indentation() {
1219        let p = prog(
1220            r#"
1221        PREMISE e:
1222	EXCLUSIVE
1223		x a
1224		x b
1225        "#,
1226        );
1227        assert!(matches!(
1228            p.statements[0],
1229            Statement::Premise {
1230                body: Body::List {
1231                    op: ListOp::Exclusive,
1232                    ..
1233                },
1234                ..
1235            }
1236        ));
1237    }
1238
1239    #[test]
1240    fn parses_all_list_ops() {
1241        for (kw, want) in [
1242            ("EXCLUSIVE", ListOp::Exclusive),
1243            ("FORBIDS", ListOp::Forbids),
1244            ("ONEOF", ListOp::OneOf),
1245            ("ATLEAST", ListOp::AtLeast),
1246        ] {
1247            let src = alloc::format!("PREMISE a:\n    {kw}\n        x a\n        x b\n");
1248            match &prog(&src).statements[0] {
1249                Statement::Premise {
1250                    body: Body::List { op, .. },
1251                    ..
1252                } => assert_eq!(*op, want),
1253                other => panic!("{kw}: unexpected {other:?}"),
1254            }
1255        }
1256    }
1257
1258    #[test]
1259    fn check_bidirectional_without_subject() {
1260        match &prog("CHECK BIDIRECTIONAL\n").statements[0] {
1261            Statement::Check {
1262                subject,
1263                bidirectional,
1264            } => {
1265                assert!(subject.is_none());
1266                assert!(bidirectional);
1267            }
1268            other => panic!("unexpected {other:?}"),
1269        }
1270    }
1271
1272    #[test]
1273    fn empty_and_comment_only_input_yield_no_statements() {
1274        assert_eq!(prog("").statements.len(), 0);
1275        assert_eq!(prog("// just a comment\n\n   \n").statements.len(), 0);
1276    }
1277
1278    #[test]
1279    fn negation_with_object() {
1280        match &prog("NOT Creature.A has wing\n").statements[0] {
1281            Statement::Negation(a) => {
1282                assert_eq!(a.data.subject, "Creature.A");
1283                assert_eq!(a.data.object, Some("wing"));
1284            }
1285            other => panic!("unexpected {other:?}"),
1286        }
1287    }
1288
1289    #[test]
1290    fn negated_consequent_then_not() {
1291        let src = r#"
1292        PREMISE a:
1293            WHEN x on
1294            THEN NOT x off
1295        "#;
1296        match &prog(src).statements[0] {
1297            Statement::Premise {
1298                body: Body::Impl { consequent, .. },
1299                ..
1300            } => {
1301                assert!(consequent[0].data.negated);
1302                assert_eq!(consequent[0].data.atom.predicate, "off");
1303            }
1304            other => panic!("unexpected {other:?}"),
1305        }
1306    }
1307
1308    #[test]
1309    fn multiple_imports_then_facts() {
1310        let p = prog(
1311            r#"
1312        IMPORT "a.vrf"
1313        IMPORT "b.vrf"
1314        FACT x y
1315        "#,
1316        );
1317        assert!(matches!(p.statements[0], Statement::Import(_)));
1318        assert!(matches!(p.statements[1], Statement::Import(_)));
1319        assert!(matches!(p.statements[2], Statement::Fact(_)));
1320    }
1321
1322    #[test]
1323    fn trailing_comment_without_final_newline() {
1324        let p = prog("FACT a b\n// trailing, no newline");
1325        assert_eq!(p.statements.len(), 1);
1326    }
1327}