Skip to main content

elenchus_parser/
lib.rs

1//! elenchus-parser — parses the English-like elenchus DSL into an AST.
2//!
3//! Style mirrors `vsm-parser`: zero-copy over `&str`, `nom` + `nom_locate`
4//! for line/column tracking, and a human-friendly `^--- here` error display.
5//! Syntax is line/keyword-oriented (not S-expressions) so small models cannot
6//! trip on parentheses or indentation.
7//!
8//! Grammar (see docs/SPEC.md, "Grammar (EBNF)"):
9//! - statements are newline-terminated; indentation is cosmetic, not significant;
10//! - keywords are ALWAYS CAPS (ASCII); identifiers are content (case-sensitive,
11//!   verbatim, any-script letters — e.g. `условие`, `名前`);
12//! - block boundaries (PREMISE/RULE bodies) are found by keywords, never by indent.
13#![no_std]
14// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
15#![warn(missing_docs)]
16
17extern crate alloc;
18
19use alloc::string::String;
20use alloc::vec;
21use alloc::vec::Vec;
22use core::fmt;
23
24use nom::{
25    IResult, Parser,
26    branch::alt,
27    bytes::complete::{tag, take_while},
28    character::complete::{char, line_ending, satisfy, space0, space1},
29    combinator::{eof, opt, recognize, value},
30    multi::many0,
31    sequence::{delimited, preceded, terminated},
32};
33use nom_locate::LocatedSpan;
34
35/// Source code fragment with line and column tracking.
36pub type Span<'a> = LocatedSpan<&'a str>;
37
38/// Container for data associated with its source location.
39#[derive(Debug, Clone, PartialEq)]
40pub struct Located<'a, T> {
41    /// The actual parsed data.
42    pub data: T,
43    /// The location in the source text (start of the construct).
44    pub span: Span<'a>,
45}
46
47// --- AST -------------------------------------------------------------------
48
49/// An atom is the triple `(subject, predicate, object?)` — the unit of identity.
50/// `Creature.A has flying` and `Creature.A has swimming` are DIFFERENT atoms.
51#[derive(Debug, Clone, PartialEq, Eq)]
52pub struct Atom<'a> {
53    /// The entity the claim is about, e.g. `Creature.A` or `Motor`.
54    pub subject: &'a str,
55    /// The relation or property asserted, e.g. `has` or `over_100`.
56    pub predicate: &'a str,
57    /// Optional value the predicate relates the subject to, e.g. `flying`.
58    /// `None` for two-word atoms such as `Motor over_100`. The object is part of
59    /// identity: `has flying` and `has swimming` are different atoms.
60    pub object: Option<&'a str>,
61}
62
63/// A literal is an atom, optionally negated (`NOT ...`).
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct Literal<'a> {
66    /// `true` when written with a leading `NOT` (asserts the atom is FALSE).
67    pub negated: bool,
68    /// The underlying atom being asserted true or false.
69    pub atom: Atom<'a>,
70}
71
72/// List constraint operators (body of a list-style `PREMISE`).
73///
74/// These are surface sugar; the compiler desugars each to `Impossible` clauses
75/// (see `elenchus-compiler`). The meanings below are *what the author asserts*.
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77pub enum ListOp {
78    /// `EXCLUSIVE` — at most one of the listed atoms may be TRUE (mutual
79    /// exclusion). For n > 2 this is pairwise, not "not all at once".
80    Exclusive,
81    /// `FORBIDS` — at most one may be TRUE; a synonym of [`ListOp::Exclusive`]
82    /// (same pairwise expansion), kept as a separate word for readability.
83    Forbids,
84    /// `ONEOF` — exactly one is TRUE: at-most-one (pairwise) plus at-least-one.
85    OneOf,
86    /// `ATLEAST` — at least one of the listed atoms is TRUE.
87    AtLeast,
88}
89
90/// How the literals in a `WHEN`/`THEN` group combine. A single-literal group is
91/// always [`Conn::And`] (the connective is irrelevant with one literal).
92#[derive(Debug, Clone, Copy, PartialEq, Eq)]
93pub enum Conn {
94    /// Continuation lines used `AND` — all literals must hold.
95    And,
96    /// Continuation lines used `OR` — at least one literal must hold.
97    Or,
98}
99
100/// The body of an `PREMISE` or `RULE`.
101#[derive(Debug, Clone, PartialEq)]
102pub enum Body<'a> {
103    /// `EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST` over >= 2 atoms.
104    List {
105        /// Which list constraint this is.
106        op: ListOp,
107        /// The atoms it ranges over (the parser guarantees at least two).
108        atoms: Vec<Located<'a, Atom<'a>>>,
109    },
110    /// `WHEN ... [AND|OR ...] THEN ... [AND|OR ...]` — antecedent + consequent.
111    /// Within one group the continuation keyword is uniform (no mixing `AND`/`OR`).
112    Impl {
113        /// `WHEN`/`AND`/`OR` conditions.
114        antecedent: Vec<Located<'a, Literal<'a>>>,
115        /// How the antecedent literals combine.
116        ante_conn: Conn,
117        /// `THEN`/`AND`/`OR` results that follow when the antecedent holds.
118        consequent: Vec<Located<'a, Literal<'a>>>,
119        /// How the consequent literals combine.
120        cons_conn: Conn,
121    },
122}
123
124/// A top-level statement.
125#[derive(Debug, Clone, PartialEq)]
126pub enum Statement<'a> {
127    /// `IMPORT "path"` — reuse another source (resolved by the compiler).
128    Import(Located<'a, &'a str>),
129    /// `FACT <atom>` — a TRUE assertion.
130    Fact(Located<'a, Atom<'a>>),
131    /// `NOT <atom>` — a FALSE assertion.
132    Negation(Located<'a, Atom<'a>>),
133    /// `PREMISE <name>: ...` — a checked first principle.
134    Premise {
135        /// The premise's label (a per-source name, not a global identifier).
136        name: Located<'a, &'a str>,
137        /// The constraint itself: a list body or a `WHEN … THEN` implication.
138        body: Body<'a>,
139    },
140    /// `RULE <name>: ...` — a fact-producing inference rule.
141    Rule {
142        /// The rule's label.
143        name: Located<'a, &'a str>,
144        /// Always an implication body (the grammar forbids a list body here).
145        body: Body<'a>,
146    },
147    /// `CHECK [subject] [BIDIRECTIONAL]` — a query.
148    Check {
149        /// Restrict the report to this subject; `None` checks everything.
150        subject: Option<Located<'a, &'a str>>,
151        /// `true` enables the backward (all-SAT) pass for UNDERDETERMINED.
152        bidirectional: bool,
153    },
154}
155
156/// A parsed program: a flat sequence of statements.
157#[derive(Debug, Clone, PartialEq)]
158pub struct Program<'a> {
159    /// Top-level statements in source order. The list is flat: PREMISE/RULE bodies
160    /// live inside their [`Statement`], not as separate entries.
161    pub statements: Vec<Statement<'a>>,
162}
163
164/// Reserved words — always CAPS, in full. An identifier may not equal any of these.
165pub const RESERVED: &[&str] = &[
166    "IMPORT",
167    "FACT",
168    "NOT",
169    "PREMISE",
170    "RULE",
171    "CHECK",
172    "BIDIRECTIONAL",
173    "WHEN",
174    "AND",
175    "OR",
176    "THEN",
177    "EXCLUSIVE",
178    "FORBIDS",
179    "ONEOF",
180    "ATLEAST",
181];
182
183/// Whether `word` is a reserved keyword.
184pub fn is_reserved(word: &str) -> bool {
185    RESERVED.contains(&word)
186}
187
188// --- Error -----------------------------------------------------------------
189
190/// A friendly error structure that can be displayed to the user.
191#[derive(Debug)]
192pub struct ParseError<'a> {
193    /// The original full input string (needed for context).
194    pub source: &'a str,
195    /// The span where the error occurred.
196    pub span: Span<'a>,
197    /// A description of the error.
198    pub message: String,
199}
200
201impl<'a> fmt::Display for ParseError<'a> {
202    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203        let line = self.span.location_line() as usize;
204        let column = self.span.get_column();
205        let full_line = self
206            .source
207            .lines()
208            .nth(line.saturating_sub(1))
209            .unwrap_or("");
210        let indent = " ".repeat(if column > 0 { column - 1 } else { 0 });
211
212        write!(
213            f,
214            "Syntax Error at line {}, col {}: {}\n  | {}\n  | {}^--- here",
215            line, column, self.message, full_line, indent
216        )
217    }
218}
219
220// --- Parser primitives -----------------------------------------------------
221
222/// Internal nom error carrying a human message and the precise failure span.
223/// Recoverable failures are `nom::Err::Error`; once a statement has committed
224/// (its leading keyword is recognized), failures are promoted to
225/// `nom::Err::Failure` so the error points at the *real* problem with a specific
226/// message instead of backtracking to a generic "expected a statement".
227#[derive(Debug, Clone)]
228struct Problem<'a> {
229    input: Span<'a>,
230    message: String,
231}
232
233impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
234    fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
235        Problem {
236            input,
237            message: String::from("unexpected token"),
238        }
239    }
240    fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
241        other
242    }
243}
244
245/// nom result specialized to our [`Problem`] error and located `Span` input.
246type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
247
248/// Turn a recoverable `Error` into a committed `Failure` at `at` with `msg`.
249/// Already-`Failure` (a deeper, more specific message) and `Ok` pass through.
250fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
251    match r {
252        Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
253            input: at,
254            message: String::from(msg),
255        })),
256        other => other,
257    }
258}
259
260/// A plain recoverable `Error` at `input` (lets an enclosing `alt` backtrack).
261fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
262    Err(nom::Err::Error(Problem {
263        input,
264        message: String::from("unexpected token"),
265    }))
266}
267
268/// Characters allowed *after* the first in an identifier. Letters and digits of
269/// *any* script are accepted (Unicode `is_alphanumeric`), so `условие` or `名前`
270/// are valid; `_` joins multi-word names and `.` makes dotted subjects like
271/// `Creature.A` a single token. Punctuation and other symbols are rejected.
272fn is_ident_char(c: char) -> bool {
273    c.is_alphanumeric() || c == '_' || c == '.'
274}
275
276/// A bare identifier (does not reject reserved words). The first character must
277/// be a letter of any script (`is_alphabetic`) — never a digit, `_`, `.`, or
278/// punctuation — so identifiers stay distinct from numbers and operators.
279fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
280    recognize((satisfy(|c| c.is_alphabetic()), take_while(is_ident_char))).parse(input)
281}
282
283/// An identifier that is not a reserved keyword.
284fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
285    let start = input;
286    let (rest, sp) = raw_identifier(input)?;
287    if is_reserved(sp.fragment()) {
288        return perr(start);
289    }
290    Ok((
291        rest,
292        Located {
293            data: *sp.fragment(),
294            span: start,
295        },
296    ))
297}
298
299/// A line comment `// ... ` up to (but not including) the line ending.
300fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
301    recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
302}
303
304/// End of a statement line: trailing spaces, an optional trailing comment,
305/// then a line ending or EOF.
306fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
307    value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
308}
309
310/// A blank line or a full-line comment (must consume a line ending to progress).
311fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
312    value((), (space0, opt(comment), line_ending)).parse(input)
313}
314
315/// Skip any number of blank / comment lines between statements.
316fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
317    value((), many0(noise_line)).parse(input)
318}
319
320// --- Atoms & literals ------------------------------------------------------
321
322/// `<subject> <predicate> [<object>]` — two or three space-separated identifiers.
323fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
324    let start = input;
325    let (input, subject) = identifier(input)?;
326    let (input, _) = space1(input)?;
327    let (input, predicate) = identifier(input)?;
328    let (input, object) = opt(preceded(space1, identifier)).parse(input)?;
329    Ok((
330        input,
331        Located {
332            data: Atom {
333                subject: subject.data,
334                predicate: predicate.data,
335                object: object.map(|o| o.data),
336            },
337            span: start,
338        },
339    ))
340}
341
342/// An optionally `NOT`-prefixed [`atom`] — a literal inside a `WHEN`/`THEN` body.
343fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
344    let start = input;
345    let (input, neg) = opt(terminated(tag("NOT"), space1)).parse(input)?;
346    let (input, a) = atom(input)?;
347    Ok((
348        input,
349        Located {
350            data: Literal {
351                negated: neg.is_some(),
352                atom: a.data,
353            },
354            span: start,
355        },
356    ))
357}
358
359/// An atom on its own (possibly indented) line: used inside list bodies.
360fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
361    let (input, _) = space0(input)?;
362    let (input, a) = atom(input)?;
363    let (input, _) = eol(input)?;
364    Ok((input, a))
365}
366
367// --- Bodies ----------------------------------------------------------------
368
369/// One of the list-constraint keywords (`EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST`).
370fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
371    alt((
372        value(ListOp::Exclusive, tag("EXCLUSIVE")),
373        value(ListOp::Forbids, tag("FORBIDS")),
374        value(ListOp::OneOf, tag("ONEOF")),
375        value(ListOp::AtLeast, tag("ATLEAST")),
376    ))
377    .parse(input)
378}
379
380/// A list-constraint body: a list operator on its own line, then one atom per
381/// line (at least two).
382///
383/// Commit strategy: the leading `list_op` failing stays a recoverable `Error`
384/// so the `PREMISE` `alt` can fall through and try [`impl_body`] instead. *Once*
385/// the operator matched we are committed, so every subsequent failure is
386/// [`promote`]d to a `Failure` with a specific message — no backtracking to a
387/// generic "expected a statement".
388fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
389    let (input, _) = space0(input)?;
390    // list_op failing stays Error so the PREMISE alt can try impl_body.
391    let (input, op) = list_op(input)?;
392    // Past here we are committed to a list body.
393    let (input, _) = promote(
394        eol(input),
395        input,
396        "expected a newline after the list operator",
397    )?;
398    let at = input;
399    let (input, first) = promote(
400        atom_line(input),
401        at,
402        "a list premise needs at least two atoms",
403    )?;
404    let at = input;
405    let (input, second) = promote(
406        atom_line(input),
407        at,
408        "a list premise needs at least two atoms",
409    )?;
410    let (input, rest) = many0(atom_line).parse(input)?;
411
412    let mut atoms = vec![first, second];
413    atoms.extend(rest);
414    Ok((input, Body::List { op, atoms }))
415}
416
417/// A continuation `AND <literal>` / `OR <literal>` line inside a `WHEN`/`THEN`
418/// block. Returns `(Conn, literal)`. A line that is neither `AND` nor `OR` yields
419/// a recoverable `Error` so `many0` stops cleanly (e.g. at `THEN`/EOF).
420fn cont_line<'a>(input: Span<'a>) -> PResult<'a, (Conn, Located<'a, Literal<'a>>)> {
421    let (input, _) = space0(input)?;
422    // Not an AND/OR line → Error so many0 stops cleanly.
423    let (input, conn) =
424        alt((value(Conn::And, tag("AND")), value(Conn::Or, tag("OR")))).parse(input)?;
425    let (input, _) = space1(input)?;
426    let at = input;
427    let (input, lit) = promote(
428        literal(input),
429        at,
430        "AND/OR expects a literal: [NOT] <Subject> <predicate> [<object>]",
431    )?;
432    let (input, _) = promote(
433        eol(input),
434        input,
435        "unexpected text after the AND/OR literal",
436    )?;
437    Ok((input, (conn, lit)))
438}
439
440/// Reduce a group's continuation lines to a single [`Conn`], rejecting a mix of
441/// `AND` and `OR` in one group (point the error at the first line that switches).
442fn group_conn<'a>(conts: &[(Conn, Located<'a, Literal<'a>>)]) -> Result<Conn, Span<'a>> {
443    let mut seen: Option<Conn> = None;
444    for (conn, lit) in conts {
445        match seen {
446            None => seen = Some(*conn),
447            Some(s) if s != *conn => return Err(lit.span),
448            _ => {}
449        }
450    }
451    Ok(seen.unwrap_or(Conn::And))
452}
453
454/// Fail with a committed message at `at` (for in-body semantic checks).
455fn fail_at<'a, T>(at: Span<'a>, msg: &str) -> PResult<'a, T> {
456    Err(nom::Err::Failure(Problem {
457        input: at,
458        message: String::from(msg),
459    }))
460}
461
462/// An implication body: `WHEN <lit> [AND <lit>]* THEN <lit> [AND <lit>]*`.
463///
464/// Like [`list_body`], a missing leading `WHEN` stays a recoverable `Error` (so
465/// the `PREMISE` `alt` can try a list body); after `WHEN` matches we are committed
466/// and use [`promote`] for precise errors. Antecedent and consequent each start
467/// with one mandatory literal followed by zero or more `AND` lines.
468fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
469    let (input, _) = space0(input)?;
470    // No WHEN → Error so the PREMISE alt can fall through to a list body.
471    let (input, _) = (tag("WHEN"), space1).parse(input)?;
472    // Committed to an implication body now.
473    let at = input;
474    let (input, when) = promote(
475        literal(input),
476        at,
477        "WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
478    )?;
479    let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
480    let (input, ante_rest) = many0(cont_line).parse(input)?;
481    let ante_conn = match group_conn(&ante_rest) {
482        Ok(c) => c,
483        Err(span) => {
484            return fail_at(
485                span,
486                "don't mix AND and OR in one WHEN group — split it into separate premises",
487            );
488        }
489    };
490
491    let (input, _) = space0(input)?;
492    let at = input;
493    let (input, _) = promote(
494        tag("THEN").parse(input),
495        at,
496        "expected THEN to complete the WHEN ... THEN implication",
497    )?;
498    let at = input;
499    let (input, then) = promote(
500        preceded(space1, literal).parse(input),
501        at,
502        "THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
503    )?;
504    let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
505    let (input, cons_rest) = many0(cont_line).parse(input)?;
506    let cons_conn = match group_conn(&cons_rest) {
507        Ok(c) => c,
508        Err(span) => {
509            return fail_at(
510                span,
511                "don't mix AND and OR in one THEN group — split it into separate premises",
512            );
513        }
514    };
515
516    let mut antecedent = vec![when];
517    antecedent.extend(ante_rest.into_iter().map(|(_, l)| l));
518    let mut consequent = vec![then];
519    consequent.extend(cons_rest.into_iter().map(|(_, l)| l));
520    Ok((
521        input,
522        Body::Impl {
523            antecedent,
524            ante_conn,
525            consequent,
526            cons_conn,
527        },
528    ))
529}
530
531// --- Statements ------------------------------------------------------------
532
533/// `IMPORT "<path>"` — a quoted path on one line.
534fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
535    let (input, _) = (tag("IMPORT"), space1).parse(input)?;
536    let start = input;
537    let (input, path) = promote(
538        delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
539        start,
540        "IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
541    )?;
542    let (input, _) = promote(eol(input), input, "unexpected text after the IMPORT path")?;
543    Ok((
544        input,
545        Statement::Import(Located {
546            data: *path.fragment(),
547            span: start,
548        }),
549    ))
550}
551
552/// `FACT <atom>` — a TRUE assertion.
553fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
554    let (input, _) = (tag("FACT"), space1).parse(input)?;
555    let at = input;
556    let (input, a) = promote(
557        atom(input),
558        at,
559        "FACT expects an atom: <Subject> <predicate> [<object>]",
560    )?;
561    let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
562    Ok((input, Statement::Fact(a)))
563}
564
565/// `NOT <atom>` — a FALSE assertion. Tried last among statements so a body-level
566/// `NOT` literal is never mistaken for a top-level negation.
567fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
568    let (input, _) = (tag("NOT"), space1).parse(input)?;
569    let at = input;
570    let (input, a) = promote(
571        atom(input),
572        at,
573        "NOT expects an atom: <Subject> <predicate> [<object>]",
574    )?;
575    let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
576    Ok((input, Statement::Negation(a)))
577}
578
579/// `CHECK [<subject>] [BIDIRECTIONAL]` — both modifiers optional.
580fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
581    let (input, _) = tag("CHECK").parse(input)?;
582    let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
583    let (input, bidir) = opt(preceded(space1, tag("BIDIRECTIONAL"))).parse(input)?;
584    let (input, _) = eol(input)?;
585    Ok((
586        input,
587        Statement::Check {
588            subject,
589            bidirectional: bidir.is_some(),
590        },
591    ))
592}
593
594/// `PREMISE <name>: <body>` where the body is a list or an implication.
595fn stmt_premise<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
596    let (input, _) = (tag("PREMISE"), space1).parse(input)?;
597    // Committed to a premise now.
598    let at = input;
599    let (input, name) = promote(
600        identifier(input),
601        at,
602        "expected a premise name (a lowercase identifier)",
603    )?;
604    let (input, _) = space0(input)?;
605    let (input, _) = promote(
606        char(':').parse(input),
607        input,
608        "expected ':' after the premise name",
609    )?;
610    let (input, _) = promote(eol(input), input, "unexpected text after 'PREMISE <name>:'")?;
611    let at = input;
612    let (input, body) = promote(
613        alt((list_body, impl_body)).parse(input),
614        at,
615        "a premise body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST) or WHEN ... THEN",
616    )?;
617    Ok((input, Statement::Premise { name, body }))
618}
619
620/// `RULE <name>: <implication>` — like a premise but the body must be `WHEN … THEN`.
621fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
622    let (input, _) = (tag("RULE"), space1).parse(input)?;
623    let at = input;
624    let (input, name) = promote(
625        identifier(input),
626        at,
627        "expected a rule name (a lowercase identifier)",
628    )?;
629    let (input, _) = space0(input)?;
630    let (input, _) = promote(
631        char(':').parse(input),
632        input,
633        "expected ':' after the rule name",
634    )?;
635    let (input, _) = promote(eol(input), input, "unexpected text after 'RULE <name>:'")?;
636    let at = input;
637    let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
638    Ok((input, Statement::Rule { name, body }))
639}
640
641/// One top-level statement. Order matters: each branch commits on its keyword,
642/// and `stmt_negation` comes last so a leading `NOT` is only read as a top-level
643/// negation when nothing else matched.
644fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
645    // Leading indentation is cosmetic everywhere, including on the top-level
646    // keyword line — so a whole program may be written indented (e.g. inside a
647    // host's here-doc) and parse identically.
648    let (input, _) = space0(input)?;
649    alt((
650        stmt_import,
651        stmt_fact,
652        stmt_premise,
653        stmt_rule,
654        stmt_check,
655        stmt_negation,
656    ))
657    .parse(input)
658}
659
660/// The whole program: leading noise, then statements each followed by noise
661/// (blank/comment lines). Stops at the first byte that is not a valid statement;
662/// [`parse`] then checks the tail is empty and otherwise reports an error there.
663fn program<'a>(input: Span<'a>) -> PResult<'a, Vec<Statement<'a>>> {
664    let (input, _) = skip_noise(input)?;
665    many0(terminated(statement, skip_noise)).parse(input)
666}
667
668/// Parse a full `.vrf` source into a [`Program`].
669///
670/// On error, returns a [`ParseError`] whose `Display` shows the offending line
671/// with a `^--- here` caret, mirroring vsm-parser.
672pub fn parse(src: &str) -> Result<Program<'_>, ParseError<'_>> {
673    let input = Span::new(src);
674    match program(input) {
675        Ok((rest, statements)) => {
676            if !trailing_is_empty(rest.fragment()) {
677                return Err(ParseError {
678                    source: src,
679                    span: rest,
680                    message: String::from(
681                        "expected a statement (IMPORT/FACT/NOT/PREMISE/RULE/CHECK)",
682                    ),
683                });
684            }
685            Ok(Program { statements })
686        }
687        Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => Err(ParseError {
688            source: src,
689            span: e.input,
690            message: e.message,
691        }),
692        Err(nom::Err::Incomplete(_)) => Err(ParseError {
693            source: src,
694            span: input,
695            message: String::from("incomplete input"),
696        }),
697    }
698}
699
700/// True if the unparsed tail is only whitespace and trailing comments.
701fn trailing_is_empty(tail: &str) -> bool {
702    for raw in tail.lines() {
703        let t = raw.trim();
704        if t.is_empty() || t.starts_with("//") {
705            continue;
706        }
707        return false;
708    }
709    true
710}
711
712#[cfg(test)]
713mod tests {
714    use super::*;
715    use alloc::format;
716
717    fn prog(src: &str) -> Program<'_> {
718        parse(src).expect("should parse")
719    }
720
721    /// `(subject, predicate, object?)` of one atom, borrowed from the source.
722    type AtomShape<'a> = (&'a str, &'a str, Option<&'a str>);
723    /// A list premise flattened to `(operator, its atoms)`.
724    type ListShape<'a> = (ListOp, Vec<AtomShape<'a>>);
725
726    /// Atom data flattened to owned tuples — span-independent, for structural
727    /// comparison (spans differ by offset, which is exactly what "cosmetic" means).
728    fn atom_shapes<'a>(p: &Program<'a>) -> Vec<ListShape<'a>> {
729        p.statements
730            .iter()
731            .filter_map(|s| match s {
732                Statement::Premise {
733                    body: Body::List { op, atoms },
734                    ..
735                } => Some((
736                    *op,
737                    atoms
738                        .iter()
739                        .map(|a| (a.data.subject, a.data.predicate, a.data.object))
740                        .collect(),
741                )),
742                _ => None,
743            })
744            .collect()
745    }
746
747    #[test]
748    fn parses_fact_and_negation() {
749        let p = prog(
750            r#"
751        FACT Creature.A has flying
752        NOT Creature.A has cold_blood
753        "#,
754        );
755        assert_eq!(p.statements.len(), 2);
756        match &p.statements[0] {
757            Statement::Fact(a) => {
758                assert_eq!(a.data.subject, "Creature.A");
759                assert_eq!(a.data.predicate, "has");
760                assert_eq!(a.data.object, Some("flying"));
761            }
762            other => panic!("expected fact, got {:?}", other),
763        }
764        match &p.statements[1] {
765            Statement::Negation(a) => {
766                assert_eq!(a.data.object, Some("cold_blood"));
767            }
768            other => panic!("expected negation, got {:?}", other),
769        }
770    }
771
772    #[test]
773    fn fact_without_object() {
774        let p = prog("FACT Motor over_100\n");
775        match &p.statements[0] {
776            Statement::Fact(a) => {
777                assert_eq!(a.data.subject, "Motor");
778                assert_eq!(a.data.predicate, "over_100");
779                assert_eq!(a.data.object, None);
780            }
781            other => panic!("expected fact, got {:?}", other),
782        }
783    }
784
785    #[test]
786    fn parses_import() {
787        let p = prog("IMPORT \"physics.vrf\"\n");
788        match &p.statements[0] {
789            Statement::Import(path) => assert_eq!(path.data, "physics.vrf"),
790            other => panic!("expected import, got {:?}", other),
791        }
792    }
793
794    #[test]
795    fn parses_exclusive_premise() {
796        let src = r#"
797        PREMISE fly_xor_swim:
798            EXCLUSIVE
799                Creature.A has flying
800                Creature.A has swimming
801        "#;
802        let p = prog(src);
803        match &p.statements[0] {
804            Statement::Premise { name, body } => {
805                assert_eq!(name.data, "fly_xor_swim");
806                match body {
807                    Body::List { op, atoms } => {
808                        assert_eq!(*op, ListOp::Exclusive);
809                        assert_eq!(atoms.len(), 2);
810                        assert_eq!(atoms[1].data.object, Some("swimming"));
811                    }
812                    other => panic!("expected list body, got {:?}", other),
813                }
814            }
815            other => panic!("expected premise, got {:?}", other),
816        }
817    }
818
819    #[test]
820    fn parses_implication_premise_with_and() {
821        let src = r#"
822        PREMISE wings_need_bone:
823            WHEN Creature.A has flying
824            THEN Creature.A has wing
825            AND  Creature.A has bone
826        "#;
827        let p = prog(src);
828        match &p.statements[0] {
829            Statement::Premise {
830                body:
831                    Body::Impl {
832                        antecedent,
833                        consequent,
834                        ..
835                    },
836                ..
837            } => {
838                assert_eq!(antecedent.len(), 1);
839                assert_eq!(antecedent[0].data.atom.object, Some("flying"));
840                assert_eq!(consequent.len(), 2);
841                assert_eq!(consequent[0].data.atom.object, Some("wing"));
842                assert_eq!(consequent[1].data.atom.object, Some("bone"));
843            }
844            other => panic!("expected impl premise, got {:?}", other),
845        }
846    }
847
848    #[test]
849    fn antecedent_and_goes_before_then() {
850        let src = r#"
851        PREMISE deploy:
852            WHEN s tested
853            AND s reviewed
854            THEN s can_deploy
855        "#;
856        let p = prog(src);
857        match &p.statements[0] {
858            Statement::Premise {
859                body:
860                    Body::Impl {
861                        antecedent,
862                        consequent,
863                        ..
864                    },
865                ..
866            } => {
867                assert_eq!(antecedent.len(), 2);
868                assert_eq!(consequent.len(), 1);
869            }
870            other => panic!("unexpected: {:?}", other),
871        }
872    }
873
874    #[test]
875    fn when_or_sets_disjunctive_antecedent() {
876        let src = r#"
877        PREMISE p:
878            WHEN x a
879            OR x b
880            THEN x c
881        "#;
882        match &prog(src).statements[0] {
883            Statement::Premise {
884                body:
885                    Body::Impl {
886                        antecedent,
887                        ante_conn,
888                        consequent,
889                        cons_conn,
890                    },
891                ..
892            } => {
893                assert_eq!(antecedent.len(), 2);
894                assert_eq!(*ante_conn, Conn::Or);
895                assert_eq!(consequent.len(), 1);
896                assert_eq!(*cons_conn, Conn::And); // single consequent → AND
897            }
898            other => panic!("expected impl premise, got {:?}", other),
899        }
900    }
901
902    #[test]
903    fn then_or_sets_disjunctive_consequent() {
904        let src = r#"
905        PREMISE p:
906            WHEN x a
907            THEN x b
908            OR x c
909        "#;
910        match &prog(src).statements[0] {
911            Statement::Premise {
912                body:
913                    Body::Impl {
914                        consequent,
915                        cons_conn,
916                        ..
917                    },
918                ..
919            } => {
920                assert_eq!(consequent.len(), 2);
921                assert_eq!(*cons_conn, Conn::Or);
922            }
923            other => panic!("expected impl premise, got {:?}", other),
924        }
925    }
926
927    #[test]
928    fn mixing_and_or_in_one_group_is_an_error() {
929        let mixed_when = r#"
930        PREMISE p:
931            WHEN x a
932            AND x b
933            OR x c
934            THEN x d
935        "#;
936        let mixed_then = r#"
937        PREMISE p:
938            WHEN x a
939            THEN x b
940            AND x c
941            OR x d
942        "#;
943        assert!(parse(mixed_when).is_err());
944        assert!(parse(mixed_then).is_err());
945    }
946
947    #[test]
948    fn or_is_a_reserved_word() {
949        assert!(parse("FACT OR has x\n").is_err());
950    }
951
952    #[test]
953    fn parses_negated_literal_in_rule() {
954        let src = r#"
955        RULE pick_slow:
956            WHEN NOT Motor over_100
957            THEN Motor uses slow_path
958        "#;
959        let p = prog(src);
960        match &p.statements[0] {
961            Statement::Rule {
962                body: Body::Impl { antecedent, .. },
963                ..
964            } => {
965                assert!(antecedent[0].data.negated);
966                assert_eq!(antecedent[0].data.atom.predicate, "over_100");
967            }
968            other => panic!("expected rule, got {:?}", other),
969        }
970    }
971
972    #[test]
973    fn parses_check_variants() {
974        let p = prog("CHECK Creature.A BIDIRECTIONAL\n");
975        match &p.statements[0] {
976            Statement::Check {
977                subject,
978                bidirectional,
979            } => {
980                assert_eq!(subject.as_ref().unwrap().data, "Creature.A");
981                assert!(bidirectional);
982            }
983            other => panic!("expected check, got {:?}", other),
984        }
985
986        let p = prog("CHECK\n");
987        match &p.statements[0] {
988            Statement::Check {
989                subject,
990                bidirectional,
991            } => {
992                assert!(subject.is_none());
993                assert!(!bidirectional);
994            }
995            other => panic!("expected check, got {:?}", other),
996        }
997    }
998
999    #[test]
1000    fn comments_and_blanks_are_ignored() {
1001        let src = "// header\n\nFACT a b   // trailing comment\n\n// tail\n";
1002        let p = prog(src);
1003        assert_eq!(p.statements.len(), 1);
1004    }
1005
1006    #[test]
1007    fn indentation_is_cosmetic() {
1008        let flat = r#"
1009        PREMISE x:
1010        EXCLUSIVE
1011        a b
1012        a c
1013        "#;
1014        let indented = r#"
1015        PREMISE x:
1016                EXCLUSIVE
1017          a b
1018                    a c
1019        "#;
1020        // Spans differ by offset (cosmetic); the parsed structure must be identical.
1021        assert_eq!(atom_shapes(&prog(flat)), atom_shapes(&prog(indented)));
1022    }
1023
1024    #[test]
1025    fn top_level_statements_may_be_indented() {
1026        // Leading indentation on the FACT/PREMISE/CHECK lines themselves is also
1027        // cosmetic (so a whole program can be pasted indented inside a here-doc).
1028        let flat = r#"
1029        FACT x a
1030        NOT x b
1031        CHECK x
1032        "#;
1033        let indented = r#"
1034            FACT x a
1035                NOT x b
1036            CHECK x
1037        "#;
1038        assert_eq!(atom_shapes(&prog(flat)), atom_shapes(&prog(indented)));
1039        assert_eq!(prog(indented).statements.len(), 3);
1040    }
1041
1042    #[test]
1043    fn full_creature_example_parses() {
1044        let src = include_str!("../../../docs/examples/creature.vrf");
1045        let p = prog(src);
1046        // 2 FACT + 3 PREMISE + 1 RULE + 1 CHECK = 7
1047        assert_eq!(p.statements.len(), 7);
1048    }
1049
1050    #[test]
1051    fn import_demo_example_parses() {
1052        let src = include_str!("../../../docs/examples/import-demo.vrf");
1053        let p = prog(src);
1054        assert!(matches!(p.statements[0], Statement::Import(_)));
1055    }
1056
1057    #[test]
1058    fn unicode_identifiers_any_script() {
1059        // Cyrillic subject/predicate/object, mixed with `_` and digits (not first).
1060        let p = prog(
1061            r#"
1062        FACT кот пушистый2
1063        NOT собака has крылья
1064        "#,
1065        );
1066        match &p.statements[0] {
1067            Statement::Fact(a) => {
1068                assert_eq!(a.data.subject, "кот");
1069                assert_eq!(a.data.predicate, "пушистый2");
1070                assert_eq!(a.data.object, None);
1071            }
1072            other => panic!("expected fact, got {:?}", other),
1073        }
1074        match &p.statements[1] {
1075            Statement::Negation(a) => {
1076                assert_eq!(a.data.subject, "собака");
1077                assert_eq!(a.data.object, Some("крылья"));
1078            }
1079            other => panic!("expected negation, got {:?}", other),
1080        }
1081    }
1082
1083    #[test]
1084    fn unicode_premise_name_and_body() {
1085        let src = r#"
1086        PREMISE правило_лая:
1087            WHEN собака has хвост
1088            THEN собака умеет_лаять
1089        "#;
1090        match &prog(src).statements[0] {
1091            Statement::Premise { name, body } => {
1092                assert_eq!(name.data, "правило_лая");
1093                match body {
1094                    Body::Impl {
1095                        antecedent,
1096                        consequent,
1097                        ..
1098                    } => {
1099                        assert_eq!(antecedent[0].data.atom.subject, "собака");
1100                        assert_eq!(consequent[0].data.atom.subject, "собака");
1101                        assert_eq!(consequent[0].data.atom.predicate, "умеет_лаять");
1102                    }
1103                    other => panic!("expected impl body, got {:?}", other),
1104                }
1105            }
1106            other => panic!("expected premise, got {:?}", other),
1107        }
1108    }
1109
1110    #[test]
1111    fn identifier_cannot_start_with_digit() {
1112        // `2cats` is not a valid subject — first char must be a letter.
1113        assert!(parse("FACT 2cats has fur\n").is_err());
1114    }
1115
1116    #[test]
1117    fn punctuation_is_rejected_in_identifier() {
1118        // `!` and other symbols are not identifier characters.
1119        assert!(parse("FACT cat! has fur\n").is_err());
1120    }
1121
1122    #[test]
1123    fn reserved_word_cannot_be_identifier() {
1124        // `WHEN` as a subject is illegal.
1125        assert!(parse("FACT WHEN has x\n").is_err());
1126    }
1127
1128    #[test]
1129    fn pretty_error_points_at_offending_line() {
1130        let src = r#"FACT a b
1131!garbage here
1132FACT c d
1133"#;
1134        let err = parse(src).expect_err("should fail");
1135        let shown = format!("{}", err);
1136        assert!(shown.contains("Syntax Error"));
1137        assert!(shown.contains("line 2"));
1138        assert!(shown.contains("!garbage here"));
1139        assert!(shown.contains("^--- here"));
1140    }
1141
1142    #[test]
1143    fn crlf_line_endings() {
1144        let p = prog(
1145            r#"
1146        FACT a b
1147        CHECK a
1148        "#,
1149        );
1150        assert_eq!(p.statements.len(), 2);
1151    }
1152
1153    #[test]
1154    fn tabs_as_indentation() {
1155        let p = prog(
1156            r#"
1157        PREMISE e:
1158	EXCLUSIVE
1159		x a
1160		x b
1161        "#,
1162        );
1163        assert!(matches!(
1164            p.statements[0],
1165            Statement::Premise {
1166                body: Body::List {
1167                    op: ListOp::Exclusive,
1168                    ..
1169                },
1170                ..
1171            }
1172        ));
1173    }
1174
1175    #[test]
1176    fn parses_all_list_ops() {
1177        for (kw, want) in [
1178            ("EXCLUSIVE", ListOp::Exclusive),
1179            ("FORBIDS", ListOp::Forbids),
1180            ("ONEOF", ListOp::OneOf),
1181            ("ATLEAST", ListOp::AtLeast),
1182        ] {
1183            let src = alloc::format!("PREMISE a:\n    {kw}\n        x a\n        x b\n");
1184            match &prog(&src).statements[0] {
1185                Statement::Premise {
1186                    body: Body::List { op, .. },
1187                    ..
1188                } => assert_eq!(*op, want),
1189                other => panic!("{kw}: unexpected {other:?}"),
1190            }
1191        }
1192    }
1193
1194    #[test]
1195    fn check_bidirectional_without_subject() {
1196        match &prog("CHECK BIDIRECTIONAL\n").statements[0] {
1197            Statement::Check {
1198                subject,
1199                bidirectional,
1200            } => {
1201                assert!(subject.is_none());
1202                assert!(bidirectional);
1203            }
1204            other => panic!("unexpected {other:?}"),
1205        }
1206    }
1207
1208    #[test]
1209    fn empty_and_comment_only_input_yield_no_statements() {
1210        assert_eq!(prog("").statements.len(), 0);
1211        assert_eq!(prog("// just a comment\n\n   \n").statements.len(), 0);
1212    }
1213
1214    #[test]
1215    fn negation_with_object() {
1216        match &prog("NOT Creature.A has wing\n").statements[0] {
1217            Statement::Negation(a) => {
1218                assert_eq!(a.data.subject, "Creature.A");
1219                assert_eq!(a.data.object, Some("wing"));
1220            }
1221            other => panic!("unexpected {other:?}"),
1222        }
1223    }
1224
1225    #[test]
1226    fn negated_consequent_then_not() {
1227        let src = r#"
1228        PREMISE a:
1229            WHEN x on
1230            THEN NOT x off
1231        "#;
1232        match &prog(src).statements[0] {
1233            Statement::Premise {
1234                body: Body::Impl { consequent, .. },
1235                ..
1236            } => {
1237                assert!(consequent[0].data.negated);
1238                assert_eq!(consequent[0].data.atom.predicate, "off");
1239            }
1240            other => panic!("unexpected {other:?}"),
1241        }
1242    }
1243
1244    #[test]
1245    fn multiple_imports_then_facts() {
1246        let p = prog(
1247            r#"
1248        IMPORT "a.vrf"
1249        IMPORT "b.vrf"
1250        FACT x y
1251        "#,
1252        );
1253        assert!(matches!(p.statements[0], Statement::Import(_)));
1254        assert!(matches!(p.statements[1], Statement::Import(_)));
1255        assert!(matches!(p.statements[2], Statement::Fact(_)));
1256    }
1257
1258    #[test]
1259    fn trailing_comment_without_final_newline() {
1260        let p = prog("FACT a b\n// trailing, no newline");
1261        assert_eq!(p.statements.len(), 1);
1262    }
1263}