Skip to main content

elenchus_parser/
ast.rs

1//! The abstract syntax tree: the typed shape a `.vrf` source parses into.
2//!
3//! Everything here is zero-copy over the source `&str` (atoms/names borrow their
4//! slices) and every node that can be pointed at in an error carries its
5//! [`Span`] via [`Located`].
6
7use alloc::vec::Vec;
8
9use nom_locate::LocatedSpan;
10
11/// Source code fragment with line and column tracking.
12pub type Span<'a> = LocatedSpan<&'a str>;
13
14/// Container for data associated with its source location.
15#[derive(Debug, Clone, PartialEq)]
16pub struct Located<'a, T> {
17    /// The actual parsed data.
18    pub data: T,
19    /// The location in the source text (start of the construct).
20    pub span: Span<'a>,
21}
22
23/// An atom is the triple `(subject, predicate, object?)` — the unit of identity —
24/// optionally qualified by a domain (`physics.engine has fuel`).
25/// `Creature_A has flying` and `Creature_A has swimming` are DIFFERENT atoms.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct Atom<'a> {
28    /// The domain this atom is qualified into, written as a `domain.` prefix on
29    /// the subject (e.g. `physics` in `physics.engine has fuel`). `None` means the
30    /// atom belongs to the current file's own declared domain (no prefix).
31    pub domain: Option<&'a str>,
32    /// The entity the claim is about, e.g. `Creature_A` or `Motor`.
33    pub subject: &'a str,
34    /// The relation or property asserted, e.g. `has` or `over_100`.
35    pub predicate: &'a str,
36    /// Optional value the predicate relates the subject to, e.g. `flying`.
37    /// `None` for two-word atoms such as `Motor over_100`. The object is part of
38    /// identity: `has flying` and `has swimming` are different atoms.
39    pub object: Option<&'a str>,
40}
41
42/// A literal is an atom, optionally negated (`NOT ...`).
43#[derive(Debug, Clone, PartialEq, Eq)]
44pub struct Literal<'a> {
45    /// `true` when written with a leading `NOT` (asserts the atom is FALSE).
46    pub negated: bool,
47    /// The underlying atom being asserted true or false.
48    pub atom: Atom<'a>,
49}
50
51/// List constraint operators (body of a list-style `PREMISE`).
52///
53/// These are surface sugar; the compiler desugars each to `Impossible` clauses
54/// (see `elenchus-compiler`). The meanings below are *what the author asserts*.
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum ListOp {
57    /// `EXCLUSIVE` — at most one of the listed atoms may be TRUE (mutual
58    /// exclusion). For n > 2 this is pairwise, not "not all at once".
59    Exclusive,
60    /// `FORBIDS` — at most one may be TRUE; a synonym of [`ListOp::Exclusive`]
61    /// (same pairwise expansion), kept as a separate word for readability.
62    Forbids,
63    /// `ONEOF` — exactly one is TRUE: at-most-one (pairwise) plus at-least-one.
64    OneOf,
65    /// `ATLEAST` — at least one of the listed atoms is TRUE.
66    AtLeast,
67}
68
69/// How the literals in a `WHEN`/`THEN` group combine. A single-literal group is
70/// always [`Conn::And`] (the connective is irrelevant with one literal).
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum Conn {
73    /// Continuation lines used `AND` — all literals must hold.
74    And,
75    /// Continuation lines used `OR` — at least one literal must hold.
76    Or,
77}
78
79/// A `FOR EACH` quantifier on a `PREMISE`/`RULE` header. It instantiates the
80/// premise's whole body **once per element**, substituting the binder. The
81/// quantifier lives in the *header* (not the body), so there is exactly one per
82/// statement and the body grammar is untouched — a second `FOR EACH` is
83/// structurally unrepresentable, which is what bounds the desugar to a linear
84/// number of clauses (no domain products).
85#[derive(Debug, Clone, PartialEq)]
86pub enum Quant<'a> {
87    /// `FOR EACH <binder> IN <set>` — range over the elements of a declared
88    /// [`Statement::Set`]. The binder is the name the body's atoms refer to.
89    InSet {
90        /// The name bound inside the body (substituted per element).
91        binder: Located<'a, &'a str>,
92        /// The declared set this ranges over.
93        set: Located<'a, &'a str>,
94    },
95    /// `FOR EACH <left> <predicate> <right>` — range over the declared `FACT`
96    /// pairs of that relation (e.g. every `FACT a linked b`), binding `left` to a
97    /// pair's subject and `right` to its object. This is the channel for
98    /// multi-element constraints (graphs, dependencies): the pair is *pinned by
99    /// data*, so the desugar is linear in the number of facts, never a product.
100    Relation {
101        /// Bound to each matching fact's subject.
102        left: Located<'a, &'a str>,
103        /// The relation predicate whose facts are ranged over.
104        predicate: Located<'a, &'a str>,
105        /// Bound to each matching fact's object.
106        right: Located<'a, &'a str>,
107    },
108}
109
110/// Which closure a `CLOSE` statement applies to a relation.
111#[derive(Debug, Clone, Copy, PartialEq, Eq)]
112pub enum CloseKind {
113    /// `TRANSITIVE` — add `a→c` whenever `a→b` and `b→c` hold (requires a DAG).
114    Transitive,
115}
116
117/// The body of an `PREMISE` or `RULE`.
118#[derive(Debug, Clone, PartialEq)]
119pub enum Body<'a> {
120    /// `EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST` over >= 2 atoms.
121    List {
122        /// Which list constraint this is.
123        op: ListOp,
124        /// The atoms it ranges over (the parser guarantees at least two).
125        atoms: Vec<Located<'a, Atom<'a>>>,
126    },
127    /// `WHEN ... [AND|OR ...] THEN ... [AND|OR ...]` — antecedent + consequent.
128    /// Within one group the continuation keyword is uniform (no mixing `AND`/`OR`).
129    Impl {
130        /// `WHEN`/`AND`/`OR` conditions.
131        antecedent: Vec<Located<'a, Literal<'a>>>,
132        /// How the antecedent literals combine.
133        ante_conn: Conn,
134        /// `THEN`/`AND`/`OR` results that follow when the antecedent holds.
135        consequent: Vec<Located<'a, Literal<'a>>>,
136        /// How the consequent literals combine.
137        cons_conn: Conn,
138    },
139}
140
141/// A top-level statement.
142#[derive(Debug, Clone, PartialEq)]
143pub enum Statement<'a> {
144    /// `DOMAIN <name>` — declare the domain this file's atoms belong to. Required
145    /// once per file, as the first statement; it is the identity namespace into
146    /// which bare atoms fall.
147    Domain(Located<'a, &'a str>),
148    /// `IMPORT "path" [AS <alias>]` — reuse another source (resolved by the
149    /// compiler). The optional `alias` is the local name the imported domain is
150    /// referenced by; without it, the imported file's own declared domain name is
151    /// used.
152    Import {
153        /// The quoted source path.
154        path: Located<'a, &'a str>,
155        /// The local alias for the imported domain, if `AS <alias>` was given.
156        alias: Option<Located<'a, &'a str>>,
157    },
158    /// `FACT <atom>` — a TRUE assertion.
159    Fact(Located<'a, Atom<'a>>),
160    /// `NOT <atom>` — a FALSE assertion.
161    Negation(Located<'a, Atom<'a>>),
162    /// `ASSUME [NOT] <atom>` — a *soft* (retractable) assertion. Same shape as a
163    /// `FACT`/`NOT`, but it is a hypothesis, not a commitment: when the
164    /// assumptions cannot all hold the solver names which to drop, and it never
165    /// blames a `FACT`/`PREMISE`. The `Literal` carries the optional `NOT`.
166    Assume(Located<'a, Literal<'a>>),
167    /// `SET <name>` then one element identifier per line — declare a finite set
168    /// to quantify a `PREMISE`/`RULE` over via `FOR EACH <binder> IN <name>`.
169    Set {
170        /// The set's name (referenced by `FOR EACH … IN <name>`).
171        name: Located<'a, &'a str>,
172        /// Its elements, one per line (at least one).
173        elements: Vec<Located<'a, &'a str>>,
174    },
175    /// `CLOSE <relation> TRANSITIVE` — close a relation's `FACT` pairs under the
176    /// given kind at compile time (a graph operation, no solver cost). A cycle is
177    /// a compile error.
178    Close {
179        /// The relation predicate to close (e.g. `depends_on`).
180        relation: Located<'a, &'a str>,
181        /// Which closure to apply.
182        kind: CloseKind,
183    },
184    /// `PREMISE <name> [FOR EACH …]: ...` — a checked first principle, optionally
185    /// quantified over a declared set.
186    Premise {
187        /// The premise's label (a per-source name, not a global identifier).
188        name: Located<'a, &'a str>,
189        /// An optional `FOR EACH … IN …` header quantifier (at most one).
190        quant: Option<Quant<'a>>,
191        /// The constraint itself: a list body or a `WHEN … THEN` implication.
192        body: Body<'a>,
193    },
194    /// `RULE <name> [FOR EACH …]: ...` — a fact-producing inference rule.
195    Rule {
196        /// The rule's label.
197        name: Located<'a, &'a str>,
198        /// An optional `FOR EACH … IN …` header quantifier (at most one).
199        quant: Option<Quant<'a>>,
200        /// Always an implication body (the grammar forbids a list body here).
201        body: Body<'a>,
202    },
203    /// `CHECK [subject] [BIDIRECTIONAL]` — a query.
204    Check {
205        /// Restrict the report to this subject; `None` checks everything.
206        subject: Option<Located<'a, &'a str>>,
207        /// `true` enables the backward (all-SAT) pass for UNDERDETERMINED.
208        bidirectional: bool,
209    },
210}
211
212/// A parsed program: a flat sequence of statements.
213#[derive(Debug, Clone, PartialEq)]
214pub struct Program<'a> {
215    /// Top-level statements in source order. The list is flat: PREMISE/RULE bodies
216    /// live inside their [`Statement`], not as separate entries.
217    pub statements: Vec<Statement<'a>>,
218}