Skip to main content

elenchus_parser/
lib.rs

1//! elenchus-parser — parses the English-like elenchus DSL into an AST.
2//!
3//! Style mirrors `vsm-parser`: zero-copy over `&str`, `nom` + `nom_locate`
4//! for line/column tracking, and a human-friendly `^--- here` error display.
5//! Syntax is line/keyword-oriented (not S-expressions) so small models cannot
6//! trip on parentheses or indentation.
7//!
8//! Grammar (see docs/SPEC.md, "Grammar (EBNF)"):
9//! - statements are newline-terminated; indentation is cosmetic, not significant;
10//! - keywords are ALWAYS CAPS, identifiers are content (case-sensitive, verbatim);
11//! - block boundaries (AXIOM/RULE bodies) are found by keywords, never by indent.
12#![no_std]
13// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
14#![warn(missing_docs)]
15
16extern crate alloc;
17
18use alloc::string::String;
19use alloc::vec;
20use alloc::vec::Vec;
21use core::fmt;
22
23use nom::{
24    IResult, Parser,
25    branch::alt,
26    bytes::complete::{tag, take_while},
27    character::complete::{char, line_ending, satisfy, space0, space1},
28    combinator::{eof, opt, recognize, value},
29    multi::many0,
30    sequence::{delimited, preceded, terminated},
31};
32use nom_locate::LocatedSpan;
33
34/// Source code fragment with line and column tracking.
35pub type Span<'a> = LocatedSpan<&'a str>;
36
37/// Container for data associated with its source location.
38#[derive(Debug, Clone, PartialEq)]
39pub struct Located<'a, T> {
40    /// The actual parsed data.
41    pub data: T,
42    /// The location in the source text (start of the construct).
43    pub span: Span<'a>,
44}
45
46// --- AST -------------------------------------------------------------------
47
48/// An atom is the triple `(subject, predicate, object?)` — the unit of identity.
49/// `Creature.A has flying` and `Creature.A has swimming` are DIFFERENT atoms.
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct Atom<'a> {
52    /// The entity the claim is about, e.g. `Creature.A` or `Motor`.
53    pub subject: &'a str,
54    /// The relation or property asserted, e.g. `has` or `over_100`.
55    pub predicate: &'a str,
56    /// Optional value the predicate relates the subject to, e.g. `flying`.
57    /// `None` for two-word atoms such as `Motor over_100`. The object is part of
58    /// identity: `has flying` and `has swimming` are different atoms.
59    pub object: Option<&'a str>,
60}
61
62/// A literal is an atom, optionally negated (`NOT ...`).
63#[derive(Debug, Clone, PartialEq, Eq)]
64pub struct Literal<'a> {
65    /// `true` when written with a leading `NOT` (asserts the atom is FALSE).
66    pub negated: bool,
67    /// The underlying atom being asserted true or false.
68    pub atom: Atom<'a>,
69}
70
71/// List constraint operators (body of a list-style `AXIOM`).
72///
73/// These are surface sugar; the compiler desugars each to `Impossible` clauses
74/// (see `elenchus-compiler`). The meanings below are *what the author asserts*.
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum ListOp {
77    /// `EXCLUSIVE` — at most one of the listed atoms may be TRUE (mutual
78    /// exclusion). For n > 2 this is pairwise, not "not all at once".
79    Exclusive,
80    /// `FORBIDS` — at most one may be TRUE; a synonym of [`ListOp::Exclusive`]
81    /// (same pairwise expansion), kept as a separate word for readability.
82    Forbids,
83    /// `ONEOF` — exactly one is TRUE: at-most-one (pairwise) plus at-least-one.
84    OneOf,
85    /// `ATLEAST` — at least one of the listed atoms is TRUE.
86    AtLeast,
87}
88
89/// The body of an `AXIOM` or `RULE`.
90#[derive(Debug, Clone, PartialEq)]
91pub enum Body<'a> {
92    /// `EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST` over >= 2 atoms.
93    List {
94        /// Which list constraint this is.
95        op: ListOp,
96        /// The atoms it ranges over (the parser guarantees at least two).
97        atoms: Vec<Located<'a, Atom<'a>>>,
98    },
99    /// `WHEN ... [AND ...] THEN ... [AND ...]` — antecedent + consequent literals.
100    Impl {
101        /// `WHEN`/`AND` conditions; all must hold (logical AND).
102        antecedent: Vec<Located<'a, Literal<'a>>>,
103        /// `THEN`/`AND` results that follow when the antecedent holds.
104        consequent: Vec<Located<'a, Literal<'a>>>,
105    },
106}
107
108/// A top-level statement.
109#[derive(Debug, Clone, PartialEq)]
110pub enum Statement<'a> {
111    /// `IMPORT "path"` — reuse another source (resolved by the compiler).
112    Import(Located<'a, &'a str>),
113    /// `FACT <atom>` — a TRUE assertion.
114    Fact(Located<'a, Atom<'a>>),
115    /// `NOT <atom>` — a FALSE assertion.
116    Negation(Located<'a, Atom<'a>>),
117    /// `AXIOM <name>: ...` — a checked first principle.
118    Axiom {
119        /// The axiom's label (a per-source name, not a global identifier).
120        name: Located<'a, &'a str>,
121        /// The constraint itself: a list body or a `WHEN … THEN` implication.
122        body: Body<'a>,
123    },
124    /// `RULE <name>: ...` — a fact-producing inference rule.
125    Rule {
126        /// The rule's label.
127        name: Located<'a, &'a str>,
128        /// Always an implication body (the grammar forbids a list body here).
129        body: Body<'a>,
130    },
131    /// `CHECK [subject] [BIDIRECTIONAL]` — a query.
132    Check {
133        /// Restrict the report to this subject; `None` checks everything.
134        subject: Option<Located<'a, &'a str>>,
135        /// `true` enables the backward (all-SAT) pass for UNDERDETERMINED.
136        bidirectional: bool,
137    },
138}
139
140/// A parsed program: a flat sequence of statements.
141#[derive(Debug, Clone, PartialEq)]
142pub struct Program<'a> {
143    /// Top-level statements in source order. The list is flat: AXIOM/RULE bodies
144    /// live inside their [`Statement`], not as separate entries.
145    pub statements: Vec<Statement<'a>>,
146}
147
148/// Reserved words — always CAPS, in full. An identifier may not equal any of these.
149pub const RESERVED: &[&str] = &[
150    "IMPORT",
151    "FACT",
152    "NOT",
153    "AXIOM",
154    "RULE",
155    "CHECK",
156    "BIDIRECTIONAL",
157    "WHEN",
158    "AND",
159    "THEN",
160    "EXCLUSIVE",
161    "FORBIDS",
162    "ONEOF",
163    "ATLEAST",
164];
165
166/// Whether `word` is a reserved keyword.
167pub fn is_reserved(word: &str) -> bool {
168    RESERVED.contains(&word)
169}
170
171// --- Error -----------------------------------------------------------------
172
173/// A friendly error structure that can be displayed to the user.
174#[derive(Debug)]
175pub struct ParseError<'a> {
176    /// The original full input string (needed for context).
177    pub source: &'a str,
178    /// The span where the error occurred.
179    pub span: Span<'a>,
180    /// A description of the error.
181    pub message: String,
182}
183
184impl<'a> fmt::Display for ParseError<'a> {
185    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
186        let line = self.span.location_line() as usize;
187        let column = self.span.get_column();
188        let full_line = self
189            .source
190            .lines()
191            .nth(line.saturating_sub(1))
192            .unwrap_or("");
193        let indent = " ".repeat(if column > 0 { column - 1 } else { 0 });
194
195        write!(
196            f,
197            "Syntax Error at line {}, col {}: {}\n  | {}\n  | {}^--- here",
198            line, column, self.message, full_line, indent
199        )
200    }
201}
202
203// --- Parser primitives -----------------------------------------------------
204
205/// Internal nom error carrying a human message and the precise failure span.
206/// Recoverable failures are `nom::Err::Error`; once a statement has committed
207/// (its leading keyword is recognized), failures are promoted to
208/// `nom::Err::Failure` so the error points at the *real* problem with a specific
209/// message instead of backtracking to a generic "expected a statement".
210#[derive(Debug, Clone)]
211struct Problem<'a> {
212    input: Span<'a>,
213    message: String,
214}
215
216impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
217    fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
218        Problem {
219            input,
220            message: String::from("unexpected token"),
221        }
222    }
223    fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
224        other
225    }
226}
227
228/// nom result specialized to our [`Problem`] error and located `Span` input.
229type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
230
231/// Turn a recoverable `Error` into a committed `Failure` at `at` with `msg`.
232/// Already-`Failure` (a deeper, more specific message) and `Ok` pass through.
233fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
234    match r {
235        Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
236            input: at,
237            message: String::from(msg),
238        })),
239        other => other,
240    }
241}
242
243/// A plain recoverable `Error` at `input` (lets an enclosing `alt` backtrack).
244fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
245    Err(nom::Err::Error(Problem {
246        input,
247        message: String::from("unexpected token"),
248    }))
249}
250
251/// Characters allowed *after* the first in an identifier. `.` is allowed so
252/// dotted subjects like `Creature.A` are a single token.
253fn is_ident_char(c: char) -> bool {
254    c.is_ascii_alphanumeric() || c == '_' || c == '.'
255}
256
257/// A bare identifier (does not reject reserved words).
258fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
259    recognize((
260        satisfy(|c| c.is_ascii_alphabetic()),
261        take_while(is_ident_char),
262    ))
263    .parse(input)
264}
265
266/// An identifier that is not a reserved keyword.
267fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
268    let start = input;
269    let (rest, sp) = raw_identifier(input)?;
270    if is_reserved(sp.fragment()) {
271        return perr(start);
272    }
273    Ok((
274        rest,
275        Located {
276            data: *sp.fragment(),
277            span: start,
278        },
279    ))
280}
281
282/// A line comment `// ... ` up to (but not including) the line ending.
283fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
284    recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
285}
286
287/// End of a statement line: trailing spaces, an optional trailing comment,
288/// then a line ending or EOF.
289fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
290    value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
291}
292
293/// A blank line or a full-line comment (must consume a line ending to progress).
294fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
295    value((), (space0, opt(comment), line_ending)).parse(input)
296}
297
298/// Skip any number of blank / comment lines between statements.
299fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
300    value((), many0(noise_line)).parse(input)
301}
302
303// --- Atoms & literals ------------------------------------------------------
304
305/// `<subject> <predicate> [<object>]` — two or three space-separated identifiers.
306fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
307    let start = input;
308    let (input, subject) = identifier(input)?;
309    let (input, _) = space1(input)?;
310    let (input, predicate) = identifier(input)?;
311    let (input, object) = opt(preceded(space1, identifier)).parse(input)?;
312    Ok((
313        input,
314        Located {
315            data: Atom {
316                subject: subject.data,
317                predicate: predicate.data,
318                object: object.map(|o| o.data),
319            },
320            span: start,
321        },
322    ))
323}
324
325/// An optionally `NOT`-prefixed [`atom`] — a literal inside a `WHEN`/`THEN` body.
326fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
327    let start = input;
328    let (input, neg) = opt(terminated(tag("NOT"), space1)).parse(input)?;
329    let (input, a) = atom(input)?;
330    Ok((
331        input,
332        Located {
333            data: Literal {
334                negated: neg.is_some(),
335                atom: a.data,
336            },
337            span: start,
338        },
339    ))
340}
341
342/// An atom on its own (possibly indented) line: used inside list bodies.
343fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
344    let (input, _) = space0(input)?;
345    let (input, a) = atom(input)?;
346    let (input, _) = eol(input)?;
347    Ok((input, a))
348}
349
350// --- Bodies ----------------------------------------------------------------
351
352/// One of the list-constraint keywords (`EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST`).
353fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
354    alt((
355        value(ListOp::Exclusive, tag("EXCLUSIVE")),
356        value(ListOp::Forbids, tag("FORBIDS")),
357        value(ListOp::OneOf, tag("ONEOF")),
358        value(ListOp::AtLeast, tag("ATLEAST")),
359    ))
360    .parse(input)
361}
362
363/// A list-constraint body: a list operator on its own line, then one atom per
364/// line (at least two).
365///
366/// Commit strategy: the leading `list_op` failing stays a recoverable `Error`
367/// so the `AXIOM` `alt` can fall through and try [`impl_body`] instead. *Once*
368/// the operator matched we are committed, so every subsequent failure is
369/// [`promote`]d to a `Failure` with a specific message — no backtracking to a
370/// generic "expected a statement".
371fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
372    let (input, _) = space0(input)?;
373    // list_op failing stays Error so the AXIOM alt can try impl_body.
374    let (input, op) = list_op(input)?;
375    // Past here we are committed to a list body.
376    let (input, _) = promote(
377        eol(input),
378        input,
379        "expected a newline after the list operator",
380    )?;
381    let at = input;
382    let (input, first) = promote(
383        atom_line(input),
384        at,
385        "a list axiom needs at least two atoms",
386    )?;
387    let at = input;
388    let (input, second) = promote(
389        atom_line(input),
390        at,
391        "a list axiom needs at least two atoms",
392    )?;
393    let (input, rest) = many0(atom_line).parse(input)?;
394
395    let mut atoms = vec![first, second];
396    atoms.extend(rest);
397    Ok((input, Body::List { op, atoms }))
398}
399
400/// A continuation `AND <literal>` line inside a `WHEN`/`THEN` block. Returns a
401/// recoverable `Error` when the line is not an `AND` so `many0` stops cleanly.
402fn and_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
403    let (input, _) = space0(input)?;
404    // Not an AND line → Error so many0 stops cleanly.
405    let (input, _) = (tag("AND"), space1).parse(input)?;
406    let at = input;
407    let (input, lit) = promote(
408        literal(input),
409        at,
410        "AND expects a literal: [NOT] <Subject> <predicate> [<object>]",
411    )?;
412    let (input, _) = promote(eol(input), input, "unexpected text after the AND literal")?;
413    Ok((input, lit))
414}
415
416/// An implication body: `WHEN <lit> [AND <lit>]* THEN <lit> [AND <lit>]*`.
417///
418/// Like [`list_body`], a missing leading `WHEN` stays a recoverable `Error` (so
419/// the `AXIOM` `alt` can try a list body); after `WHEN` matches we are committed
420/// and use [`promote`] for precise errors. Antecedent and consequent each start
421/// with one mandatory literal followed by zero or more `AND` lines.
422fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
423    let (input, _) = space0(input)?;
424    // No WHEN → Error so the AXIOM alt can fall through to a list body.
425    let (input, _) = (tag("WHEN"), space1).parse(input)?;
426    // Committed to an implication body now.
427    let at = input;
428    let (input, when) = promote(
429        literal(input),
430        at,
431        "WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
432    )?;
433    let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
434    let (input, ante_rest) = many0(and_line).parse(input)?;
435
436    let (input, _) = space0(input)?;
437    let at = input;
438    let (input, _) = promote(
439        tag("THEN").parse(input),
440        at,
441        "expected THEN to complete the WHEN ... THEN implication",
442    )?;
443    let at = input;
444    let (input, then) = promote(
445        preceded(space1, literal).parse(input),
446        at,
447        "THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
448    )?;
449    let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
450    let (input, cons_rest) = many0(and_line).parse(input)?;
451
452    let mut antecedent = vec![when];
453    antecedent.extend(ante_rest);
454    let mut consequent = vec![then];
455    consequent.extend(cons_rest);
456    Ok((
457        input,
458        Body::Impl {
459            antecedent,
460            consequent,
461        },
462    ))
463}
464
465// --- Statements ------------------------------------------------------------
466
467/// `IMPORT "<path>"` — a quoted path on one line.
468fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
469    let (input, _) = (tag("IMPORT"), space1).parse(input)?;
470    let start = input;
471    let (input, path) = promote(
472        delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
473        start,
474        "IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
475    )?;
476    let (input, _) = promote(eol(input), input, "unexpected text after the IMPORT path")?;
477    Ok((
478        input,
479        Statement::Import(Located {
480            data: *path.fragment(),
481            span: start,
482        }),
483    ))
484}
485
486/// `FACT <atom>` — a TRUE assertion.
487fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
488    let (input, _) = (tag("FACT"), space1).parse(input)?;
489    let at = input;
490    let (input, a) = promote(
491        atom(input),
492        at,
493        "FACT expects an atom: <Subject> <predicate> [<object>]",
494    )?;
495    let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
496    Ok((input, Statement::Fact(a)))
497}
498
499/// `NOT <atom>` — a FALSE assertion. Tried last among statements so a body-level
500/// `NOT` literal is never mistaken for a top-level negation.
501fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
502    let (input, _) = (tag("NOT"), space1).parse(input)?;
503    let at = input;
504    let (input, a) = promote(
505        atom(input),
506        at,
507        "NOT expects an atom: <Subject> <predicate> [<object>]",
508    )?;
509    let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
510    Ok((input, Statement::Negation(a)))
511}
512
513/// `CHECK [<subject>] [BIDIRECTIONAL]` — both modifiers optional.
514fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
515    let (input, _) = tag("CHECK").parse(input)?;
516    let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
517    let (input, bidir) = opt(preceded(space1, tag("BIDIRECTIONAL"))).parse(input)?;
518    let (input, _) = eol(input)?;
519    Ok((
520        input,
521        Statement::Check {
522            subject,
523            bidirectional: bidir.is_some(),
524        },
525    ))
526}
527
528/// `AXIOM <name>: <body>` where the body is a list or an implication.
529fn stmt_axiom<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
530    let (input, _) = (tag("AXIOM"), space1).parse(input)?;
531    // Committed to an axiom now.
532    let at = input;
533    let (input, name) = promote(
534        identifier(input),
535        at,
536        "expected an axiom name (a lowercase identifier)",
537    )?;
538    let (input, _) = space0(input)?;
539    let (input, _) = promote(
540        char(':').parse(input),
541        input,
542        "expected ':' after the axiom name",
543    )?;
544    let (input, _) = promote(eol(input), input, "unexpected text after 'AXIOM <name>:'")?;
545    let at = input;
546    let (input, body) = promote(
547        alt((list_body, impl_body)).parse(input),
548        at,
549        "an axiom body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST) or WHEN ... THEN",
550    )?;
551    Ok((input, Statement::Axiom { name, body }))
552}
553
554/// `RULE <name>: <implication>` — like an axiom but the body must be `WHEN … THEN`.
555fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
556    let (input, _) = (tag("RULE"), space1).parse(input)?;
557    let at = input;
558    let (input, name) = promote(
559        identifier(input),
560        at,
561        "expected a rule name (a lowercase identifier)",
562    )?;
563    let (input, _) = space0(input)?;
564    let (input, _) = promote(
565        char(':').parse(input),
566        input,
567        "expected ':' after the rule name",
568    )?;
569    let (input, _) = promote(eol(input), input, "unexpected text after 'RULE <name>:'")?;
570    let at = input;
571    let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
572    Ok((input, Statement::Rule { name, body }))
573}
574
575/// One top-level statement. Order matters: each branch commits on its keyword,
576/// and `stmt_negation` comes last so a leading `NOT` is only read as a top-level
577/// negation when nothing else matched.
578fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
579    alt((
580        stmt_import,
581        stmt_fact,
582        stmt_axiom,
583        stmt_rule,
584        stmt_check,
585        stmt_negation,
586    ))
587    .parse(input)
588}
589
590/// The whole program: leading noise, then statements each followed by noise
591/// (blank/comment lines). Stops at the first byte that is not a valid statement;
592/// [`parse`] then checks the tail is empty and otherwise reports an error there.
593fn program<'a>(input: Span<'a>) -> PResult<'a, Vec<Statement<'a>>> {
594    let (input, _) = skip_noise(input)?;
595    many0(terminated(statement, skip_noise)).parse(input)
596}
597
598/// Parse a full `.vrf` source into a [`Program`].
599///
600/// On error, returns a [`ParseError`] whose `Display` shows the offending line
601/// with a `^--- here` caret, mirroring vsm-parser.
602pub fn parse(src: &str) -> Result<Program<'_>, ParseError<'_>> {
603    let input = Span::new(src);
604    match program(input) {
605        Ok((rest, statements)) => {
606            if !trailing_is_empty(rest.fragment()) {
607                return Err(ParseError {
608                    source: src,
609                    span: rest,
610                    message: String::from(
611                        "expected a statement (IMPORT/FACT/NOT/AXIOM/RULE/CHECK)",
612                    ),
613                });
614            }
615            Ok(Program { statements })
616        }
617        Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => Err(ParseError {
618            source: src,
619            span: e.input,
620            message: e.message,
621        }),
622        Err(nom::Err::Incomplete(_)) => Err(ParseError {
623            source: src,
624            span: input,
625            message: String::from("incomplete input"),
626        }),
627    }
628}
629
630/// True if the unparsed tail is only whitespace and trailing comments.
631fn trailing_is_empty(tail: &str) -> bool {
632    for raw in tail.lines() {
633        let t = raw.trim();
634        if t.is_empty() || t.starts_with("//") {
635            continue;
636        }
637        return false;
638    }
639    true
640}
641
642#[cfg(test)]
643mod tests {
644    use super::*;
645    use alloc::format;
646
647    fn prog(src: &str) -> Program<'_> {
648        parse(src).expect("should parse")
649    }
650
651    /// `(subject, predicate, object?)` of one atom, borrowed from the source.
652    type AtomShape<'a> = (&'a str, &'a str, Option<&'a str>);
653    /// A list axiom flattened to `(operator, its atoms)`.
654    type ListShape<'a> = (ListOp, Vec<AtomShape<'a>>);
655
656    /// Atom data flattened to owned tuples — span-independent, for structural
657    /// comparison (spans differ by offset, which is exactly what "cosmetic" means).
658    fn atom_shapes<'a>(p: &Program<'a>) -> Vec<ListShape<'a>> {
659        p.statements
660            .iter()
661            .filter_map(|s| match s {
662                Statement::Axiom {
663                    body: Body::List { op, atoms },
664                    ..
665                } => Some((
666                    *op,
667                    atoms
668                        .iter()
669                        .map(|a| (a.data.subject, a.data.predicate, a.data.object))
670                        .collect(),
671                )),
672                _ => None,
673            })
674            .collect()
675    }
676
677    #[test]
678    fn parses_fact_and_negation() {
679        let p = prog("FACT Creature.A has flying\nNOT Creature.A has cold_blood\n");
680        assert_eq!(p.statements.len(), 2);
681        match &p.statements[0] {
682            Statement::Fact(a) => {
683                assert_eq!(a.data.subject, "Creature.A");
684                assert_eq!(a.data.predicate, "has");
685                assert_eq!(a.data.object, Some("flying"));
686            }
687            other => panic!("expected fact, got {:?}", other),
688        }
689        match &p.statements[1] {
690            Statement::Negation(a) => {
691                assert_eq!(a.data.object, Some("cold_blood"));
692            }
693            other => panic!("expected negation, got {:?}", other),
694        }
695    }
696
697    #[test]
698    fn fact_without_object() {
699        let p = prog("FACT Motor over_100\n");
700        match &p.statements[0] {
701            Statement::Fact(a) => {
702                assert_eq!(a.data.subject, "Motor");
703                assert_eq!(a.data.predicate, "over_100");
704                assert_eq!(a.data.object, None);
705            }
706            other => panic!("expected fact, got {:?}", other),
707        }
708    }
709
710    #[test]
711    fn parses_import() {
712        let p = prog("IMPORT \"physics.vrf\"\n");
713        match &p.statements[0] {
714            Statement::Import(path) => assert_eq!(path.data, "physics.vrf"),
715            other => panic!("expected import, got {:?}", other),
716        }
717    }
718
719    #[test]
720    fn parses_exclusive_axiom() {
721        let src = "AXIOM fly_xor_swim:\n    EXCLUSIVE\n        Creature.A has flying\n        Creature.A has swimming\n";
722        let p = prog(src);
723        match &p.statements[0] {
724            Statement::Axiom { name, body } => {
725                assert_eq!(name.data, "fly_xor_swim");
726                match body {
727                    Body::List { op, atoms } => {
728                        assert_eq!(*op, ListOp::Exclusive);
729                        assert_eq!(atoms.len(), 2);
730                        assert_eq!(atoms[1].data.object, Some("swimming"));
731                    }
732                    other => panic!("expected list body, got {:?}", other),
733                }
734            }
735            other => panic!("expected axiom, got {:?}", other),
736        }
737    }
738
739    #[test]
740    fn parses_implication_axiom_with_and() {
741        let src = "AXIOM wings_need_bone:\n    WHEN Creature.A has flying\n    THEN Creature.A has wing\n    AND  Creature.A has bone\n";
742        let p = prog(src);
743        match &p.statements[0] {
744            Statement::Axiom {
745                body:
746                    Body::Impl {
747                        antecedent,
748                        consequent,
749                    },
750                ..
751            } => {
752                assert_eq!(antecedent.len(), 1);
753                assert_eq!(antecedent[0].data.atom.object, Some("flying"));
754                assert_eq!(consequent.len(), 2);
755                assert_eq!(consequent[0].data.atom.object, Some("wing"));
756                assert_eq!(consequent[1].data.atom.object, Some("bone"));
757            }
758            other => panic!("expected impl axiom, got {:?}", other),
759        }
760    }
761
762    #[test]
763    fn antecedent_and_goes_before_then() {
764        let src = "AXIOM deploy:\n    WHEN s tested\n    AND s reviewed\n    THEN s can_deploy\n";
765        let p = prog(src);
766        match &p.statements[0] {
767            Statement::Axiom {
768                body:
769                    Body::Impl {
770                        antecedent,
771                        consequent,
772                    },
773                ..
774            } => {
775                assert_eq!(antecedent.len(), 2);
776                assert_eq!(consequent.len(), 1);
777            }
778            other => panic!("unexpected: {:?}", other),
779        }
780    }
781
782    #[test]
783    fn parses_negated_literal_in_rule() {
784        let src = "RULE pick_slow:\n    WHEN NOT Motor over_100\n    THEN Motor uses slow_path\n";
785        let p = prog(src);
786        match &p.statements[0] {
787            Statement::Rule {
788                body: Body::Impl { antecedent, .. },
789                ..
790            } => {
791                assert!(antecedent[0].data.negated);
792                assert_eq!(antecedent[0].data.atom.predicate, "over_100");
793            }
794            other => panic!("expected rule, got {:?}", other),
795        }
796    }
797
798    #[test]
799    fn parses_check_variants() {
800        let p = prog("CHECK Creature.A BIDIRECTIONAL\n");
801        match &p.statements[0] {
802            Statement::Check {
803                subject,
804                bidirectional,
805            } => {
806                assert_eq!(subject.as_ref().unwrap().data, "Creature.A");
807                assert!(bidirectional);
808            }
809            other => panic!("expected check, got {:?}", other),
810        }
811
812        let p = prog("CHECK\n");
813        match &p.statements[0] {
814            Statement::Check {
815                subject,
816                bidirectional,
817            } => {
818                assert!(subject.is_none());
819                assert!(!bidirectional);
820            }
821            other => panic!("expected check, got {:?}", other),
822        }
823    }
824
825    #[test]
826    fn comments_and_blanks_are_ignored() {
827        let src = "// header\n\nFACT a b   // trailing comment\n\n// tail\n";
828        let p = prog(src);
829        assert_eq!(p.statements.len(), 1);
830    }
831
832    #[test]
833    fn indentation_is_cosmetic() {
834        let flat = "AXIOM x:\nEXCLUSIVE\na b\na c\n";
835        let indented = "AXIOM x:\n        EXCLUSIVE\n  a b\n            a c\n";
836        // Spans differ by offset (cosmetic); the parsed structure must be identical.
837        assert_eq!(atom_shapes(&prog(flat)), atom_shapes(&prog(indented)));
838    }
839
840    #[test]
841    fn full_creature_example_parses() {
842        let src = include_str!("../../../docs/examples/creature.vrf");
843        let p = prog(src);
844        // 2 FACT + 3 AXIOM + 1 RULE + 1 CHECK = 7
845        assert_eq!(p.statements.len(), 7);
846    }
847
848    #[test]
849    fn import_demo_example_parses() {
850        let src = include_str!("../../../docs/examples/import-demo.vrf");
851        let p = prog(src);
852        assert!(matches!(p.statements[0], Statement::Import(_)));
853    }
854
855    #[test]
856    fn reserved_word_cannot_be_identifier() {
857        // `WHEN` as a subject is illegal.
858        assert!(parse("FACT WHEN has x\n").is_err());
859    }
860
861    #[test]
862    fn pretty_error_points_at_offending_line() {
863        let src = "FACT a b\n!garbage here\nFACT c d\n";
864        let err = parse(src).expect_err("should fail");
865        let shown = format!("{}", err);
866        assert!(shown.contains("Syntax Error"));
867        assert!(shown.contains("line 2"));
868        assert!(shown.contains("!garbage here"));
869        assert!(shown.contains("^--- here"));
870    }
871
872    #[test]
873    fn crlf_line_endings() {
874        let p = prog("FACT a b\r\nCHECK a\r\n");
875        assert_eq!(p.statements.len(), 2);
876    }
877
878    #[test]
879    fn tabs_as_indentation() {
880        let p = prog("AXIOM e:\n\tEXCLUSIVE\n\t\tx a\n\t\tx b\n");
881        assert!(matches!(
882            p.statements[0],
883            Statement::Axiom {
884                body: Body::List {
885                    op: ListOp::Exclusive,
886                    ..
887                },
888                ..
889            }
890        ));
891    }
892
893    #[test]
894    fn parses_all_list_ops() {
895        for (kw, want) in [
896            ("EXCLUSIVE", ListOp::Exclusive),
897            ("FORBIDS", ListOp::Forbids),
898            ("ONEOF", ListOp::OneOf),
899            ("ATLEAST", ListOp::AtLeast),
900        ] {
901            let src = alloc::format!("AXIOM a:\n    {kw}\n        x a\n        x b\n");
902            match &prog(&src).statements[0] {
903                Statement::Axiom {
904                    body: Body::List { op, .. },
905                    ..
906                } => assert_eq!(*op, want),
907                other => panic!("{kw}: unexpected {other:?}"),
908            }
909        }
910    }
911
912    #[test]
913    fn check_bidirectional_without_subject() {
914        match &prog("CHECK BIDIRECTIONAL\n").statements[0] {
915            Statement::Check {
916                subject,
917                bidirectional,
918            } => {
919                assert!(subject.is_none());
920                assert!(bidirectional);
921            }
922            other => panic!("unexpected {other:?}"),
923        }
924    }
925
926    #[test]
927    fn empty_and_comment_only_input_yield_no_statements() {
928        assert_eq!(prog("").statements.len(), 0);
929        assert_eq!(prog("// just a comment\n\n   \n").statements.len(), 0);
930    }
931
932    #[test]
933    fn negation_with_object() {
934        match &prog("NOT Creature.A has wing\n").statements[0] {
935            Statement::Negation(a) => {
936                assert_eq!(a.data.subject, "Creature.A");
937                assert_eq!(a.data.object, Some("wing"));
938            }
939            other => panic!("unexpected {other:?}"),
940        }
941    }
942
943    #[test]
944    fn negated_consequent_then_not() {
945        let src = "AXIOM a:\n    WHEN x on\n    THEN NOT x off\n";
946        match &prog(src).statements[0] {
947            Statement::Axiom {
948                body: Body::Impl { consequent, .. },
949                ..
950            } => {
951                assert!(consequent[0].data.negated);
952                assert_eq!(consequent[0].data.atom.predicate, "off");
953            }
954            other => panic!("unexpected {other:?}"),
955        }
956    }
957
958    #[test]
959    fn multiple_imports_then_facts() {
960        let p = prog("IMPORT \"a.vrf\"\nIMPORT \"b.vrf\"\nFACT x y\n");
961        assert!(matches!(p.statements[0], Statement::Import(_)));
962        assert!(matches!(p.statements[1], Statement::Import(_)));
963        assert!(matches!(p.statements[2], Statement::Fact(_)));
964    }
965
966    #[test]
967    fn trailing_comment_without_final_newline() {
968        let p = prog("FACT a b\n// trailing, no newline");
969        assert_eq!(p.statements.len(), 1);
970    }
971}