Skip to main content

elenchus_parser/
lib.rs

1//! elenchus-parser — parses the English-like elenchus DSL into an AST.
2//!
3//! Style mirrors `vsm-parser`: zero-copy over `&str`, `nom` + `nom_locate`
4//! for line/column tracking, and a human-friendly `^--- here` error display.
5//! Syntax is line/keyword-oriented (not S-expressions) so small models cannot
6//! trip on parentheses or indentation.
7//!
8//! Grammar (see docs/SPEC.md, "Grammar (EBNF)"):
9//! - statements are newline-terminated; indentation is cosmetic, not significant;
10//! - keywords are ALWAYS CAPS (ASCII); identifiers are content (case-sensitive,
11//!   verbatim, any-script letters — e.g. `условие`, `名前`);
12//! - block boundaries (PREMISE/RULE bodies) are found by keywords, never by indent.
13#![no_std]
14// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
15#![warn(missing_docs)]
16
17extern crate alloc;
18
19use alloc::string::String;
20use alloc::vec;
21use alloc::vec::Vec;
22use core::fmt;
23
24use nom::{
25    IResult, Parser,
26    branch::alt,
27    bytes::complete::{tag, take_while},
28    character::complete::{char, line_ending, satisfy, space0, space1},
29    combinator::{eof, opt, recognize, value},
30    multi::many0,
31    sequence::{delimited, preceded, terminated},
32};
33use nom_locate::LocatedSpan;
34
35/// Source code fragment with line and column tracking.
36pub type Span<'a> = LocatedSpan<&'a str>;
37
38/// Container for data associated with its source location.
39#[derive(Debug, Clone, PartialEq)]
40pub struct Located<'a, T> {
41    /// The actual parsed data.
42    pub data: T,
43    /// The location in the source text (start of the construct).
44    pub span: Span<'a>,
45}
46
47// --- AST -------------------------------------------------------------------
48
49/// An atom is the triple `(subject, predicate, object?)` — the unit of identity.
50/// `Creature.A has flying` and `Creature.A has swimming` are DIFFERENT atoms.
51#[derive(Debug, Clone, PartialEq, Eq)]
52pub struct Atom<'a> {
53    /// The entity the claim is about, e.g. `Creature.A` or `Motor`.
54    pub subject: &'a str,
55    /// The relation or property asserted, e.g. `has` or `over_100`.
56    pub predicate: &'a str,
57    /// Optional value the predicate relates the subject to, e.g. `flying`.
58    /// `None` for two-word atoms such as `Motor over_100`. The object is part of
59    /// identity: `has flying` and `has swimming` are different atoms.
60    pub object: Option<&'a str>,
61}
62
63/// A literal is an atom, optionally negated (`NOT ...`).
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct Literal<'a> {
66    /// `true` when written with a leading `NOT` (asserts the atom is FALSE).
67    pub negated: bool,
68    /// The underlying atom being asserted true or false.
69    pub atom: Atom<'a>,
70}
71
72/// List constraint operators (body of a list-style `PREMISE`).
73///
74/// These are surface sugar; the compiler desugars each to `Impossible` clauses
75/// (see `elenchus-compiler`). The meanings below are *what the author asserts*.
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77pub enum ListOp {
78    /// `EXCLUSIVE` — at most one of the listed atoms may be TRUE (mutual
79    /// exclusion). For n > 2 this is pairwise, not "not all at once".
80    Exclusive,
81    /// `FORBIDS` — at most one may be TRUE; a synonym of [`ListOp::Exclusive`]
82    /// (same pairwise expansion), kept as a separate word for readability.
83    Forbids,
84    /// `ONEOF` — exactly one is TRUE: at-most-one (pairwise) plus at-least-one.
85    OneOf,
86    /// `ATLEAST` — at least one of the listed atoms is TRUE.
87    AtLeast,
88}
89
90/// The body of an `PREMISE` or `RULE`.
91#[derive(Debug, Clone, PartialEq)]
92pub enum Body<'a> {
93    /// `EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST` over >= 2 atoms.
94    List {
95        /// Which list constraint this is.
96        op: ListOp,
97        /// The atoms it ranges over (the parser guarantees at least two).
98        atoms: Vec<Located<'a, Atom<'a>>>,
99    },
100    /// `WHEN ... [AND ...] THEN ... [AND ...]` — antecedent + consequent literals.
101    Impl {
102        /// `WHEN`/`AND` conditions; all must hold (logical AND).
103        antecedent: Vec<Located<'a, Literal<'a>>>,
104        /// `THEN`/`AND` results that follow when the antecedent holds.
105        consequent: Vec<Located<'a, Literal<'a>>>,
106    },
107}
108
109/// A top-level statement.
110#[derive(Debug, Clone, PartialEq)]
111pub enum Statement<'a> {
112    /// `IMPORT "path"` — reuse another source (resolved by the compiler).
113    Import(Located<'a, &'a str>),
114    /// `FACT <atom>` — a TRUE assertion.
115    Fact(Located<'a, Atom<'a>>),
116    /// `NOT <atom>` — a FALSE assertion.
117    Negation(Located<'a, Atom<'a>>),
118    /// `PREMISE <name>: ...` — a checked first principle.
119    Premise {
120        /// The premise's label (a per-source name, not a global identifier).
121        name: Located<'a, &'a str>,
122        /// The constraint itself: a list body or a `WHEN … THEN` implication.
123        body: Body<'a>,
124    },
125    /// `RULE <name>: ...` — a fact-producing inference rule.
126    Rule {
127        /// The rule's label.
128        name: Located<'a, &'a str>,
129        /// Always an implication body (the grammar forbids a list body here).
130        body: Body<'a>,
131    },
132    /// `CHECK [subject] [BIDIRECTIONAL]` — a query.
133    Check {
134        /// Restrict the report to this subject; `None` checks everything.
135        subject: Option<Located<'a, &'a str>>,
136        /// `true` enables the backward (all-SAT) pass for UNDERDETERMINED.
137        bidirectional: bool,
138    },
139}
140
141/// A parsed program: a flat sequence of statements.
142#[derive(Debug, Clone, PartialEq)]
143pub struct Program<'a> {
144    /// Top-level statements in source order. The list is flat: PREMISE/RULE bodies
145    /// live inside their [`Statement`], not as separate entries.
146    pub statements: Vec<Statement<'a>>,
147}
148
149/// Reserved words — always CAPS, in full. An identifier may not equal any of these.
150pub const RESERVED: &[&str] = &[
151    "IMPORT",
152    "FACT",
153    "NOT",
154    "PREMISE",
155    "RULE",
156    "CHECK",
157    "BIDIRECTIONAL",
158    "WHEN",
159    "AND",
160    "THEN",
161    "EXCLUSIVE",
162    "FORBIDS",
163    "ONEOF",
164    "ATLEAST",
165];
166
167/// Whether `word` is a reserved keyword.
168pub fn is_reserved(word: &str) -> bool {
169    RESERVED.contains(&word)
170}
171
172// --- Error -----------------------------------------------------------------
173
174/// A friendly error structure that can be displayed to the user.
175#[derive(Debug)]
176pub struct ParseError<'a> {
177    /// The original full input string (needed for context).
178    pub source: &'a str,
179    /// The span where the error occurred.
180    pub span: Span<'a>,
181    /// A description of the error.
182    pub message: String,
183}
184
185impl<'a> fmt::Display for ParseError<'a> {
186    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
187        let line = self.span.location_line() as usize;
188        let column = self.span.get_column();
189        let full_line = self
190            .source
191            .lines()
192            .nth(line.saturating_sub(1))
193            .unwrap_or("");
194        let indent = " ".repeat(if column > 0 { column - 1 } else { 0 });
195
196        write!(
197            f,
198            "Syntax Error at line {}, col {}: {}\n  | {}\n  | {}^--- here",
199            line, column, self.message, full_line, indent
200        )
201    }
202}
203
204// --- Parser primitives -----------------------------------------------------
205
206/// Internal nom error carrying a human message and the precise failure span.
207/// Recoverable failures are `nom::Err::Error`; once a statement has committed
208/// (its leading keyword is recognized), failures are promoted to
209/// `nom::Err::Failure` so the error points at the *real* problem with a specific
210/// message instead of backtracking to a generic "expected a statement".
211#[derive(Debug, Clone)]
212struct Problem<'a> {
213    input: Span<'a>,
214    message: String,
215}
216
217impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
218    fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
219        Problem {
220            input,
221            message: String::from("unexpected token"),
222        }
223    }
224    fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
225        other
226    }
227}
228
229/// nom result specialized to our [`Problem`] error and located `Span` input.
230type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
231
232/// Turn a recoverable `Error` into a committed `Failure` at `at` with `msg`.
233/// Already-`Failure` (a deeper, more specific message) and `Ok` pass through.
234fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
235    match r {
236        Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
237            input: at,
238            message: String::from(msg),
239        })),
240        other => other,
241    }
242}
243
244/// A plain recoverable `Error` at `input` (lets an enclosing `alt` backtrack).
245fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
246    Err(nom::Err::Error(Problem {
247        input,
248        message: String::from("unexpected token"),
249    }))
250}
251
252/// Characters allowed *after* the first in an identifier. Letters and digits of
253/// *any* script are accepted (Unicode `is_alphanumeric`), so `условие` or `名前`
254/// are valid; `_` joins multi-word names and `.` makes dotted subjects like
255/// `Creature.A` a single token. Punctuation and other symbols are rejected.
256fn is_ident_char(c: char) -> bool {
257    c.is_alphanumeric() || c == '_' || c == '.'
258}
259
260/// A bare identifier (does not reject reserved words). The first character must
261/// be a letter of any script (`is_alphabetic`) — never a digit, `_`, `.`, or
262/// punctuation — so identifiers stay distinct from numbers and operators.
263fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
264    recognize((satisfy(|c| c.is_alphabetic()), take_while(is_ident_char))).parse(input)
265}
266
267/// An identifier that is not a reserved keyword.
268fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
269    let start = input;
270    let (rest, sp) = raw_identifier(input)?;
271    if is_reserved(sp.fragment()) {
272        return perr(start);
273    }
274    Ok((
275        rest,
276        Located {
277            data: *sp.fragment(),
278            span: start,
279        },
280    ))
281}
282
283/// A line comment `// ... ` up to (but not including) the line ending.
284fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
285    recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
286}
287
288/// End of a statement line: trailing spaces, an optional trailing comment,
289/// then a line ending or EOF.
290fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
291    value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
292}
293
294/// A blank line or a full-line comment (must consume a line ending to progress).
295fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
296    value((), (space0, opt(comment), line_ending)).parse(input)
297}
298
299/// Skip any number of blank / comment lines between statements.
300fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
301    value((), many0(noise_line)).parse(input)
302}
303
304// --- Atoms & literals ------------------------------------------------------
305
306/// `<subject> <predicate> [<object>]` — two or three space-separated identifiers.
307fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
308    let start = input;
309    let (input, subject) = identifier(input)?;
310    let (input, _) = space1(input)?;
311    let (input, predicate) = identifier(input)?;
312    let (input, object) = opt(preceded(space1, identifier)).parse(input)?;
313    Ok((
314        input,
315        Located {
316            data: Atom {
317                subject: subject.data,
318                predicate: predicate.data,
319                object: object.map(|o| o.data),
320            },
321            span: start,
322        },
323    ))
324}
325
326/// An optionally `NOT`-prefixed [`atom`] — a literal inside a `WHEN`/`THEN` body.
327fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
328    let start = input;
329    let (input, neg) = opt(terminated(tag("NOT"), space1)).parse(input)?;
330    let (input, a) = atom(input)?;
331    Ok((
332        input,
333        Located {
334            data: Literal {
335                negated: neg.is_some(),
336                atom: a.data,
337            },
338            span: start,
339        },
340    ))
341}
342
343/// An atom on its own (possibly indented) line: used inside list bodies.
344fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
345    let (input, _) = space0(input)?;
346    let (input, a) = atom(input)?;
347    let (input, _) = eol(input)?;
348    Ok((input, a))
349}
350
351// --- Bodies ----------------------------------------------------------------
352
353/// One of the list-constraint keywords (`EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST`).
354fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
355    alt((
356        value(ListOp::Exclusive, tag("EXCLUSIVE")),
357        value(ListOp::Forbids, tag("FORBIDS")),
358        value(ListOp::OneOf, tag("ONEOF")),
359        value(ListOp::AtLeast, tag("ATLEAST")),
360    ))
361    .parse(input)
362}
363
364/// A list-constraint body: a list operator on its own line, then one atom per
365/// line (at least two).
366///
367/// Commit strategy: the leading `list_op` failing stays a recoverable `Error`
368/// so the `PREMISE` `alt` can fall through and try [`impl_body`] instead. *Once*
369/// the operator matched we are committed, so every subsequent failure is
370/// [`promote`]d to a `Failure` with a specific message — no backtracking to a
371/// generic "expected a statement".
372fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
373    let (input, _) = space0(input)?;
374    // list_op failing stays Error so the PREMISE alt can try impl_body.
375    let (input, op) = list_op(input)?;
376    // Past here we are committed to a list body.
377    let (input, _) = promote(
378        eol(input),
379        input,
380        "expected a newline after the list operator",
381    )?;
382    let at = input;
383    let (input, first) = promote(
384        atom_line(input),
385        at,
386        "a list premise needs at least two atoms",
387    )?;
388    let at = input;
389    let (input, second) = promote(
390        atom_line(input),
391        at,
392        "a list premise needs at least two atoms",
393    )?;
394    let (input, rest) = many0(atom_line).parse(input)?;
395
396    let mut atoms = vec![first, second];
397    atoms.extend(rest);
398    Ok((input, Body::List { op, atoms }))
399}
400
401/// A continuation `AND <literal>` line inside a `WHEN`/`THEN` block. Returns a
402/// recoverable `Error` when the line is not an `AND` so `many0` stops cleanly.
403fn and_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
404    let (input, _) = space0(input)?;
405    // Not an AND line → Error so many0 stops cleanly.
406    let (input, _) = (tag("AND"), space1).parse(input)?;
407    let at = input;
408    let (input, lit) = promote(
409        literal(input),
410        at,
411        "AND expects a literal: [NOT] <Subject> <predicate> [<object>]",
412    )?;
413    let (input, _) = promote(eol(input), input, "unexpected text after the AND literal")?;
414    Ok((input, lit))
415}
416
417/// An implication body: `WHEN <lit> [AND <lit>]* THEN <lit> [AND <lit>]*`.
418///
419/// Like [`list_body`], a missing leading `WHEN` stays a recoverable `Error` (so
420/// the `PREMISE` `alt` can try a list body); after `WHEN` matches we are committed
421/// and use [`promote`] for precise errors. Antecedent and consequent each start
422/// with one mandatory literal followed by zero or more `AND` lines.
423fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
424    let (input, _) = space0(input)?;
425    // No WHEN → Error so the PREMISE alt can fall through to a list body.
426    let (input, _) = (tag("WHEN"), space1).parse(input)?;
427    // Committed to an implication body now.
428    let at = input;
429    let (input, when) = promote(
430        literal(input),
431        at,
432        "WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
433    )?;
434    let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
435    let (input, ante_rest) = many0(and_line).parse(input)?;
436
437    let (input, _) = space0(input)?;
438    let at = input;
439    let (input, _) = promote(
440        tag("THEN").parse(input),
441        at,
442        "expected THEN to complete the WHEN ... THEN implication",
443    )?;
444    let at = input;
445    let (input, then) = promote(
446        preceded(space1, literal).parse(input),
447        at,
448        "THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
449    )?;
450    let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
451    let (input, cons_rest) = many0(and_line).parse(input)?;
452
453    let mut antecedent = vec![when];
454    antecedent.extend(ante_rest);
455    let mut consequent = vec![then];
456    consequent.extend(cons_rest);
457    Ok((
458        input,
459        Body::Impl {
460            antecedent,
461            consequent,
462        },
463    ))
464}
465
466// --- Statements ------------------------------------------------------------
467
468/// `IMPORT "<path>"` — a quoted path on one line.
469fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
470    let (input, _) = (tag("IMPORT"), space1).parse(input)?;
471    let start = input;
472    let (input, path) = promote(
473        delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
474        start,
475        "IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
476    )?;
477    let (input, _) = promote(eol(input), input, "unexpected text after the IMPORT path")?;
478    Ok((
479        input,
480        Statement::Import(Located {
481            data: *path.fragment(),
482            span: start,
483        }),
484    ))
485}
486
487/// `FACT <atom>` — a TRUE assertion.
488fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
489    let (input, _) = (tag("FACT"), space1).parse(input)?;
490    let at = input;
491    let (input, a) = promote(
492        atom(input),
493        at,
494        "FACT expects an atom: <Subject> <predicate> [<object>]",
495    )?;
496    let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
497    Ok((input, Statement::Fact(a)))
498}
499
500/// `NOT <atom>` — a FALSE assertion. Tried last among statements so a body-level
501/// `NOT` literal is never mistaken for a top-level negation.
502fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
503    let (input, _) = (tag("NOT"), space1).parse(input)?;
504    let at = input;
505    let (input, a) = promote(
506        atom(input),
507        at,
508        "NOT expects an atom: <Subject> <predicate> [<object>]",
509    )?;
510    let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
511    Ok((input, Statement::Negation(a)))
512}
513
514/// `CHECK [<subject>] [BIDIRECTIONAL]` — both modifiers optional.
515fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
516    let (input, _) = tag("CHECK").parse(input)?;
517    let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
518    let (input, bidir) = opt(preceded(space1, tag("BIDIRECTIONAL"))).parse(input)?;
519    let (input, _) = eol(input)?;
520    Ok((
521        input,
522        Statement::Check {
523            subject,
524            bidirectional: bidir.is_some(),
525        },
526    ))
527}
528
529/// `PREMISE <name>: <body>` where the body is a list or an implication.
530fn stmt_premise<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
531    let (input, _) = (tag("PREMISE"), space1).parse(input)?;
532    // Committed to a premise now.
533    let at = input;
534    let (input, name) = promote(
535        identifier(input),
536        at,
537        "expected a premise name (a lowercase identifier)",
538    )?;
539    let (input, _) = space0(input)?;
540    let (input, _) = promote(
541        char(':').parse(input),
542        input,
543        "expected ':' after the premise name",
544    )?;
545    let (input, _) = promote(eol(input), input, "unexpected text after 'PREMISE <name>:'")?;
546    let at = input;
547    let (input, body) = promote(
548        alt((list_body, impl_body)).parse(input),
549        at,
550        "a premise body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST) or WHEN ... THEN",
551    )?;
552    Ok((input, Statement::Premise { name, body }))
553}
554
555/// `RULE <name>: <implication>` — like a premise but the body must be `WHEN … THEN`.
556fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
557    let (input, _) = (tag("RULE"), space1).parse(input)?;
558    let at = input;
559    let (input, name) = promote(
560        identifier(input),
561        at,
562        "expected a rule name (a lowercase identifier)",
563    )?;
564    let (input, _) = space0(input)?;
565    let (input, _) = promote(
566        char(':').parse(input),
567        input,
568        "expected ':' after the rule name",
569    )?;
570    let (input, _) = promote(eol(input), input, "unexpected text after 'RULE <name>:'")?;
571    let at = input;
572    let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
573    Ok((input, Statement::Rule { name, body }))
574}
575
576/// One top-level statement. Order matters: each branch commits on its keyword,
577/// and `stmt_negation` comes last so a leading `NOT` is only read as a top-level
578/// negation when nothing else matched.
579fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
580    alt((
581        stmt_import,
582        stmt_fact,
583        stmt_premise,
584        stmt_rule,
585        stmt_check,
586        stmt_negation,
587    ))
588    .parse(input)
589}
590
591/// The whole program: leading noise, then statements each followed by noise
592/// (blank/comment lines). Stops at the first byte that is not a valid statement;
593/// [`parse`] then checks the tail is empty and otherwise reports an error there.
594fn program<'a>(input: Span<'a>) -> PResult<'a, Vec<Statement<'a>>> {
595    let (input, _) = skip_noise(input)?;
596    many0(terminated(statement, skip_noise)).parse(input)
597}
598
599/// Parse a full `.vrf` source into a [`Program`].
600///
601/// On error, returns a [`ParseError`] whose `Display` shows the offending line
602/// with a `^--- here` caret, mirroring vsm-parser.
603pub fn parse(src: &str) -> Result<Program<'_>, ParseError<'_>> {
604    let input = Span::new(src);
605    match program(input) {
606        Ok((rest, statements)) => {
607            if !trailing_is_empty(rest.fragment()) {
608                return Err(ParseError {
609                    source: src,
610                    span: rest,
611                    message: String::from(
612                        "expected a statement (IMPORT/FACT/NOT/PREMISE/RULE/CHECK)",
613                    ),
614                });
615            }
616            Ok(Program { statements })
617        }
618        Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => Err(ParseError {
619            source: src,
620            span: e.input,
621            message: e.message,
622        }),
623        Err(nom::Err::Incomplete(_)) => Err(ParseError {
624            source: src,
625            span: input,
626            message: String::from("incomplete input"),
627        }),
628    }
629}
630
631/// True if the unparsed tail is only whitespace and trailing comments.
632fn trailing_is_empty(tail: &str) -> bool {
633    for raw in tail.lines() {
634        let t = raw.trim();
635        if t.is_empty() || t.starts_with("//") {
636            continue;
637        }
638        return false;
639    }
640    true
641}
642
643#[cfg(test)]
644mod tests {
645    use super::*;
646    use alloc::format;
647
648    fn prog(src: &str) -> Program<'_> {
649        parse(src).expect("should parse")
650    }
651
652    /// `(subject, predicate, object?)` of one atom, borrowed from the source.
653    type AtomShape<'a> = (&'a str, &'a str, Option<&'a str>);
654    /// A list premise flattened to `(operator, its atoms)`.
655    type ListShape<'a> = (ListOp, Vec<AtomShape<'a>>);
656
657    /// Atom data flattened to owned tuples — span-independent, for structural
658    /// comparison (spans differ by offset, which is exactly what "cosmetic" means).
659    fn atom_shapes<'a>(p: &Program<'a>) -> Vec<ListShape<'a>> {
660        p.statements
661            .iter()
662            .filter_map(|s| match s {
663                Statement::Premise {
664                    body: Body::List { op, atoms },
665                    ..
666                } => Some((
667                    *op,
668                    atoms
669                        .iter()
670                        .map(|a| (a.data.subject, a.data.predicate, a.data.object))
671                        .collect(),
672                )),
673                _ => None,
674            })
675            .collect()
676    }
677
678    #[test]
679    fn parses_fact_and_negation() {
680        let p = prog("FACT Creature.A has flying\nNOT Creature.A has cold_blood\n");
681        assert_eq!(p.statements.len(), 2);
682        match &p.statements[0] {
683            Statement::Fact(a) => {
684                assert_eq!(a.data.subject, "Creature.A");
685                assert_eq!(a.data.predicate, "has");
686                assert_eq!(a.data.object, Some("flying"));
687            }
688            other => panic!("expected fact, got {:?}", other),
689        }
690        match &p.statements[1] {
691            Statement::Negation(a) => {
692                assert_eq!(a.data.object, Some("cold_blood"));
693            }
694            other => panic!("expected negation, got {:?}", other),
695        }
696    }
697
698    #[test]
699    fn fact_without_object() {
700        let p = prog("FACT Motor over_100\n");
701        match &p.statements[0] {
702            Statement::Fact(a) => {
703                assert_eq!(a.data.subject, "Motor");
704                assert_eq!(a.data.predicate, "over_100");
705                assert_eq!(a.data.object, None);
706            }
707            other => panic!("expected fact, got {:?}", other),
708        }
709    }
710
711    #[test]
712    fn parses_import() {
713        let p = prog("IMPORT \"physics.vrf\"\n");
714        match &p.statements[0] {
715            Statement::Import(path) => assert_eq!(path.data, "physics.vrf"),
716            other => panic!("expected import, got {:?}", other),
717        }
718    }
719
720    #[test]
721    fn parses_exclusive_premise() {
722        let src = "PREMISE fly_xor_swim:\n    EXCLUSIVE\n        Creature.A has flying\n        Creature.A has swimming\n";
723        let p = prog(src);
724        match &p.statements[0] {
725            Statement::Premise { name, body } => {
726                assert_eq!(name.data, "fly_xor_swim");
727                match body {
728                    Body::List { op, atoms } => {
729                        assert_eq!(*op, ListOp::Exclusive);
730                        assert_eq!(atoms.len(), 2);
731                        assert_eq!(atoms[1].data.object, Some("swimming"));
732                    }
733                    other => panic!("expected list body, got {:?}", other),
734                }
735            }
736            other => panic!("expected premise, got {:?}", other),
737        }
738    }
739
740    #[test]
741    fn parses_implication_premise_with_and() {
742        let src = "PREMISE wings_need_bone:\n    WHEN Creature.A has flying\n    THEN Creature.A has wing\n    AND  Creature.A has bone\n";
743        let p = prog(src);
744        match &p.statements[0] {
745            Statement::Premise {
746                body:
747                    Body::Impl {
748                        antecedent,
749                        consequent,
750                    },
751                ..
752            } => {
753                assert_eq!(antecedent.len(), 1);
754                assert_eq!(antecedent[0].data.atom.object, Some("flying"));
755                assert_eq!(consequent.len(), 2);
756                assert_eq!(consequent[0].data.atom.object, Some("wing"));
757                assert_eq!(consequent[1].data.atom.object, Some("bone"));
758            }
759            other => panic!("expected impl premise, got {:?}", other),
760        }
761    }
762
763    #[test]
764    fn antecedent_and_goes_before_then() {
765        let src = "PREMISE deploy:\n    WHEN s tested\n    AND s reviewed\n    THEN s can_deploy\n";
766        let p = prog(src);
767        match &p.statements[0] {
768            Statement::Premise {
769                body:
770                    Body::Impl {
771                        antecedent,
772                        consequent,
773                    },
774                ..
775            } => {
776                assert_eq!(antecedent.len(), 2);
777                assert_eq!(consequent.len(), 1);
778            }
779            other => panic!("unexpected: {:?}", other),
780        }
781    }
782
783    #[test]
784    fn parses_negated_literal_in_rule() {
785        let src = "RULE pick_slow:\n    WHEN NOT Motor over_100\n    THEN Motor uses slow_path\n";
786        let p = prog(src);
787        match &p.statements[0] {
788            Statement::Rule {
789                body: Body::Impl { antecedent, .. },
790                ..
791            } => {
792                assert!(antecedent[0].data.negated);
793                assert_eq!(antecedent[0].data.atom.predicate, "over_100");
794            }
795            other => panic!("expected rule, got {:?}", other),
796        }
797    }
798
799    #[test]
800    fn parses_check_variants() {
801        let p = prog("CHECK Creature.A BIDIRECTIONAL\n");
802        match &p.statements[0] {
803            Statement::Check {
804                subject,
805                bidirectional,
806            } => {
807                assert_eq!(subject.as_ref().unwrap().data, "Creature.A");
808                assert!(bidirectional);
809            }
810            other => panic!("expected check, got {:?}", other),
811        }
812
813        let p = prog("CHECK\n");
814        match &p.statements[0] {
815            Statement::Check {
816                subject,
817                bidirectional,
818            } => {
819                assert!(subject.is_none());
820                assert!(!bidirectional);
821            }
822            other => panic!("expected check, got {:?}", other),
823        }
824    }
825
826    #[test]
827    fn comments_and_blanks_are_ignored() {
828        let src = "// header\n\nFACT a b   // trailing comment\n\n// tail\n";
829        let p = prog(src);
830        assert_eq!(p.statements.len(), 1);
831    }
832
833    #[test]
834    fn indentation_is_cosmetic() {
835        let flat = "PREMISE x:\nEXCLUSIVE\na b\na c\n";
836        let indented = "PREMISE x:\n        EXCLUSIVE\n  a b\n            a c\n";
837        // Spans differ by offset (cosmetic); the parsed structure must be identical.
838        assert_eq!(atom_shapes(&prog(flat)), atom_shapes(&prog(indented)));
839    }
840
841    #[test]
842    fn full_creature_example_parses() {
843        let src = include_str!("../../../docs/examples/creature.vrf");
844        let p = prog(src);
845        // 2 FACT + 3 PREMISE + 1 RULE + 1 CHECK = 7
846        assert_eq!(p.statements.len(), 7);
847    }
848
849    #[test]
850    fn import_demo_example_parses() {
851        let src = include_str!("../../../docs/examples/import-demo.vrf");
852        let p = prog(src);
853        assert!(matches!(p.statements[0], Statement::Import(_)));
854    }
855
856    #[test]
857    fn unicode_identifiers_any_script() {
858        // Cyrillic subject/predicate/object, mixed with `_` and digits (not first).
859        let p = prog("FACT кот пушистый2\nNOT собака has крылья\n");
860        match &p.statements[0] {
861            Statement::Fact(a) => {
862                assert_eq!(a.data.subject, "кот");
863                assert_eq!(a.data.predicate, "пушистый2");
864                assert_eq!(a.data.object, None);
865            }
866            other => panic!("expected fact, got {:?}", other),
867        }
868        match &p.statements[1] {
869            Statement::Negation(a) => {
870                assert_eq!(a.data.subject, "собака");
871                assert_eq!(a.data.object, Some("крылья"));
872            }
873            other => panic!("expected negation, got {:?}", other),
874        }
875    }
876
877    #[test]
878    fn unicode_premise_name_and_body() {
879        let src = "PREMISE правило_лая:\n    WHEN собака has хвост\n    THEN собака умеет_лаять\n";
880        match &prog(src).statements[0] {
881            Statement::Premise { name, body } => {
882                assert_eq!(name.data, "правило_лая");
883                match body {
884                    Body::Impl {
885                        antecedent,
886                        consequent,
887                    } => {
888                        assert_eq!(antecedent[0].data.atom.subject, "собака");
889                        assert_eq!(consequent[0].data.atom.subject, "собака");
890                        assert_eq!(consequent[0].data.atom.predicate, "умеет_лаять");
891                    }
892                    other => panic!("expected impl body, got {:?}", other),
893                }
894            }
895            other => panic!("expected premise, got {:?}", other),
896        }
897    }
898
899    #[test]
900    fn identifier_cannot_start_with_digit() {
901        // `2cats` is not a valid subject — first char must be a letter.
902        assert!(parse("FACT 2cats has fur\n").is_err());
903    }
904
905    #[test]
906    fn punctuation_is_rejected_in_identifier() {
907        // `!` and other symbols are not identifier characters.
908        assert!(parse("FACT cat! has fur\n").is_err());
909    }
910
911    #[test]
912    fn reserved_word_cannot_be_identifier() {
913        // `WHEN` as a subject is illegal.
914        assert!(parse("FACT WHEN has x\n").is_err());
915    }
916
917    #[test]
918    fn pretty_error_points_at_offending_line() {
919        let src = "FACT a b\n!garbage here\nFACT c d\n";
920        let err = parse(src).expect_err("should fail");
921        let shown = format!("{}", err);
922        assert!(shown.contains("Syntax Error"));
923        assert!(shown.contains("line 2"));
924        assert!(shown.contains("!garbage here"));
925        assert!(shown.contains("^--- here"));
926    }
927
928    #[test]
929    fn crlf_line_endings() {
930        let p = prog("FACT a b\r\nCHECK a\r\n");
931        assert_eq!(p.statements.len(), 2);
932    }
933
934    #[test]
935    fn tabs_as_indentation() {
936        let p = prog("PREMISE e:\n\tEXCLUSIVE\n\t\tx a\n\t\tx b\n");
937        assert!(matches!(
938            p.statements[0],
939            Statement::Premise {
940                body: Body::List {
941                    op: ListOp::Exclusive,
942                    ..
943                },
944                ..
945            }
946        ));
947    }
948
949    #[test]
950    fn parses_all_list_ops() {
951        for (kw, want) in [
952            ("EXCLUSIVE", ListOp::Exclusive),
953            ("FORBIDS", ListOp::Forbids),
954            ("ONEOF", ListOp::OneOf),
955            ("ATLEAST", ListOp::AtLeast),
956        ] {
957            let src = alloc::format!("PREMISE a:\n    {kw}\n        x a\n        x b\n");
958            match &prog(&src).statements[0] {
959                Statement::Premise {
960                    body: Body::List { op, .. },
961                    ..
962                } => assert_eq!(*op, want),
963                other => panic!("{kw}: unexpected {other:?}"),
964            }
965        }
966    }
967
968    #[test]
969    fn check_bidirectional_without_subject() {
970        match &prog("CHECK BIDIRECTIONAL\n").statements[0] {
971            Statement::Check {
972                subject,
973                bidirectional,
974            } => {
975                assert!(subject.is_none());
976                assert!(bidirectional);
977            }
978            other => panic!("unexpected {other:?}"),
979        }
980    }
981
982    #[test]
983    fn empty_and_comment_only_input_yield_no_statements() {
984        assert_eq!(prog("").statements.len(), 0);
985        assert_eq!(prog("// just a comment\n\n   \n").statements.len(), 0);
986    }
987
988    #[test]
989    fn negation_with_object() {
990        match &prog("NOT Creature.A has wing\n").statements[0] {
991            Statement::Negation(a) => {
992                assert_eq!(a.data.subject, "Creature.A");
993                assert_eq!(a.data.object, Some("wing"));
994            }
995            other => panic!("unexpected {other:?}"),
996        }
997    }
998
999    #[test]
1000    fn negated_consequent_then_not() {
1001        let src = "PREMISE a:\n    WHEN x on\n    THEN NOT x off\n";
1002        match &prog(src).statements[0] {
1003            Statement::Premise {
1004                body: Body::Impl { consequent, .. },
1005                ..
1006            } => {
1007                assert!(consequent[0].data.negated);
1008                assert_eq!(consequent[0].data.atom.predicate, "off");
1009            }
1010            other => panic!("unexpected {other:?}"),
1011        }
1012    }
1013
1014    #[test]
1015    fn multiple_imports_then_facts() {
1016        let p = prog("IMPORT \"a.vrf\"\nIMPORT \"b.vrf\"\nFACT x y\n");
1017        assert!(matches!(p.statements[0], Statement::Import(_)));
1018        assert!(matches!(p.statements[1], Statement::Import(_)));
1019        assert!(matches!(p.statements[2], Statement::Fact(_)));
1020    }
1021
1022    #[test]
1023    fn trailing_comment_without_final_newline() {
1024        let p = prog("FACT a b\n// trailing, no newline");
1025        assert_eq!(p.statements.len(), 1);
1026    }
1027}