Skip to main content

elenchus_parser/
ast.rs

1//! The abstract syntax tree: the typed shape a `.vrf` source parses into.
2//!
3//! Everything here is zero-copy over the source `&str` (atoms/names borrow their
4//! slices) and every node that can be pointed at in an error carries its
5//! [`Span`] via [`Located`].
6
7use alloc::vec::Vec;
8
9use nom_locate::LocatedSpan;
10
11/// Source code fragment with line and column tracking.
12pub type Span<'a> = LocatedSpan<&'a str>;
13
14/// Container for data associated with its source location.
15#[derive(Debug, Clone, PartialEq)]
16pub struct Located<'a, T> {
17    /// The actual parsed data.
18    pub data: T,
19    /// The location in the source text (start of the construct).
20    pub span: Span<'a>,
21}
22
23/// An atom is the triple `(subject, predicate, object?)` — the unit of identity —
24/// optionally qualified by a domain (`physics.engine has fuel`).
25/// `Creature_A has flying` and `Creature_A has swimming` are DIFFERENT atoms.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct Atom<'a> {
28    /// The domain this atom is qualified into, written as a `domain.` prefix on
29    /// the subject (e.g. `physics` in `physics.engine has fuel`). `None` means the
30    /// atom belongs to the current file's own declared domain (no prefix).
31    pub domain: Option<&'a str>,
32    /// The entity the claim is about, e.g. `Creature_A` or `Motor`.
33    pub subject: &'a str,
34    /// The relation or property asserted, e.g. `has` or `over_100`. `None` for a
35    /// **bare proposition** — a single-word atom such as `db_ready`, introduced by
36    /// a `VAR` port and used directly in bodies (`WHEN db_ready THEN …`). The
37    /// compiler requires any bare-proposition atom to be a declared `VAR`.
38    pub predicate: Option<&'a str>,
39    /// Optional value the predicate relates the subject to, e.g. `flying`.
40    /// `None` for two-word atoms such as `Motor over_100`. The object is part of
41    /// identity: `has flying` and `has swimming` are different atoms. Always `None`
42    /// when `predicate` is `None` (a bare proposition has neither).
43    pub object: Option<&'a str>,
44}
45
46/// A literal is an atom, optionally negated (`NOT ...`).
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub struct Literal<'a> {
49    /// `true` when written with a leading `NOT` (asserts the atom is FALSE).
50    pub negated: bool,
51    /// The underlying atom being asserted true or false.
52    pub atom: Atom<'a>,
53}
54
55/// List constraint operators (body of a list-style `PREMISE`).
56///
57/// These are surface sugar; the compiler desugars each to `Impossible` clauses
58/// (see `elenchus-compiler`). The meanings below are *what the author asserts*.
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub enum ListOp {
61    /// `EXCLUSIVE` — at most one of the listed atoms may be TRUE (mutual
62    /// exclusion). For n > 2 this is pairwise, not "not all at once".
63    Exclusive,
64    /// `FORBIDS` — at most one may be TRUE; a synonym of [`ListOp::Exclusive`]
65    /// (same pairwise expansion), kept as a separate word for readability.
66    Forbids,
67    /// `ONEOF` — exactly one is TRUE: at-most-one (pairwise) plus at-least-one.
68    OneOf,
69    /// `ATLEAST` — at least one of the listed atoms is TRUE.
70    AtLeast,
71}
72
73/// How the literals in a `WHEN`/`THEN` group combine. A single-literal group is
74/// always [`Conn::And`] (the connective is irrelevant with one literal).
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum Conn {
77    /// Continuation lines used `AND` — all literals must hold.
78    And,
79    /// Continuation lines used `OR` — at least one literal must hold.
80    Or,
81}
82
83/// A `FOR EACH` quantifier on a `PREMISE`/`RULE` header. It instantiates the
84/// premise's whole body **once per element**, substituting the binder. The
85/// quantifier lives in the *header* (not the body), so there is exactly one per
86/// statement and the body grammar is untouched — a second `FOR EACH` is
87/// structurally unrepresentable, which is what bounds the desugar to a linear
88/// number of clauses (no domain products).
89#[derive(Debug, Clone, PartialEq)]
90pub enum Quant<'a> {
91    /// `FOR EACH <binder> IN <set>` — range over the elements of a declared
92    /// [`Statement::Set`]. The binder is the name the body's atoms refer to.
93    InSet {
94        /// The name bound inside the body (substituted per element).
95        binder: Located<'a, &'a str>,
96        /// The declared set this ranges over.
97        set: Located<'a, &'a str>,
98    },
99    /// `FOR EACH <left> <predicate> <right>` — range over the declared `FACT`
100    /// pairs of that relation (e.g. every `FACT a linked b`), binding `left` to a
101    /// pair's subject and `right` to its object. This is the channel for
102    /// multi-element constraints (graphs, dependencies): the pair is *pinned by
103    /// data*, so the desugar is linear in the number of facts, never a product.
104    Relation {
105        /// Bound to each matching fact's subject.
106        left: Located<'a, &'a str>,
107        /// The relation predicate whose facts are ranged over.
108        predicate: Located<'a, &'a str>,
109        /// Bound to each matching fact's object.
110        right: Located<'a, &'a str>,
111    },
112}
113
114/// Which closure a `CLOSE` statement applies to a relation. Each is a compile-time
115/// graph operation over the relation's `FACT` pairs (no solver cost); only
116/// `Transitive` rejects a cycle (it requires a DAG) — the others either expect or
117/// produce self/back pairs by design.
118#[derive(Debug, Clone, Copy, PartialEq, Eq)]
119pub enum CloseKind {
120    /// `TRANSITIVE` — add `a→c` whenever `a→b` and `b→c` hold (requires a DAG).
121    Transitive,
122    /// `SYMMETRIC` — add `b→a` whenever `a→b` holds (a two-way relation).
123    Symmetric,
124    /// `REFLEXIVE` — add `x→x` for every node the relation mentions.
125    Reflexive,
126    /// `EQUIVALENCE` — reflexive + symmetric + transitive closure: groups nodes
127    /// into classes (a→b iff they are connected ignoring direction).
128    Equivalence,
129    /// `SCC` — strongly-connected grouping: `a→b` iff `a` and `b` reach each other
130    /// (mutual reachability), plus each node to itself. Isolates directed cycles.
131    Scc,
132}
133
134/// The body of an `PREMISE` or `RULE`.
135#[derive(Debug, Clone, PartialEq)]
136pub enum Body<'a> {
137    /// `EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST` over >= 2 atoms.
138    List {
139        /// Which list constraint this is.
140        op: ListOp,
141        /// The atoms it ranges over (the parser guarantees at least two).
142        atoms: Vec<Located<'a, Atom<'a>>>,
143    },
144    /// `EXISTS <binder> IN <set>` then one condition line using the binder — at
145    /// least one element of the set satisfies the condition. Desugars to an
146    /// at-least-one over the per-element instantiations (the dual of a `FOR EACH`
147    /// over a set, which is an "all"). Premise-only: `∃` checks, it derives nothing.
148    Exists {
149        /// The bound variable substituted into `atom` once per set element.
150        binder: Located<'a, &'a str>,
151        /// The declared `SET` the binder ranges over.
152        set: Located<'a, &'a str>,
153        /// The condition, carrying the binder in some position.
154        atom: Located<'a, Atom<'a>>,
155    },
156    /// `WHEN ... [AND|OR ...] THEN ... [AND|OR ...]` — antecedent + consequent.
157    /// Within one group the continuation keyword is uniform (no mixing `AND`/`OR`).
158    Impl {
159        /// `WHEN`/`AND`/`OR` conditions.
160        antecedent: Vec<Located<'a, Literal<'a>>>,
161        /// How the antecedent literals combine.
162        ante_conn: Conn,
163        /// `THEN`/`AND`/`OR` results that follow when the antecedent holds.
164        consequent: Vec<Located<'a, Literal<'a>>>,
165        /// How the consequent literals combine.
166        cons_conn: Conn,
167    },
168}
169
170/// A top-level statement.
171#[derive(Debug, Clone, PartialEq)]
172pub enum Statement<'a> {
173    /// `DOMAIN <name>` — declare the domain this file's atoms belong to. Required
174    /// once per file, as the first statement; it is the identity namespace into
175    /// which bare atoms fall.
176    Domain(Located<'a, &'a str>),
177    /// `IMPORT "path" [AS <alias>]` — reuse another source (resolved by the
178    /// compiler). The optional `alias` is the local name the imported domain is
179    /// referenced by; without it, the imported file's own declared domain name is
180    /// used.
181    Import {
182        /// The quoted source path.
183        path: Located<'a, &'a str>,
184        /// The local alias for the imported domain, if `AS <alias>` was given.
185        alias: Option<Located<'a, &'a str>>,
186    },
187    /// `FACT <atom>` — a TRUE assertion.
188    Fact(Located<'a, Atom<'a>>),
189    /// `NOT <atom>` — a FALSE assertion.
190    Negation(Located<'a, Atom<'a>>),
191    /// `ASSUME [NOT] <atom>` — a *soft* (retractable) assertion. Same shape as a
192    /// `FACT`/`NOT`, but it is a hypothesis, not a commitment: when the
193    /// assumptions cannot all hold the solver names which to drop, and it never
194    /// blames a `FACT`/`PREMISE`. The `Literal` carries the optional `NOT`.
195    Assume(Located<'a, Literal<'a>>),
196    /// `SET <name>` then one element identifier per line — declare a finite set
197    /// to quantify a `PREMISE`/`RULE` over via `FOR EACH <binder> IN <name>`.
198    Set {
199        /// The set's name (referenced by `FOR EACH … IN <name>`).
200        name: Located<'a, &'a str>,
201        /// Its elements, one per line (at least one).
202        elements: Vec<Located<'a, &'a str>>,
203    },
204    /// `CLOSE <relation> <kind>` — close a relation's `FACT` pairs under the given
205    /// [`CloseKind`] at compile time (a graph operation, no solver cost). With
206    /// `TRANSITIVE` a cycle is a compile error (DAG required); the other kinds
207    /// allow cycles.
208    Close {
209        /// The relation predicate to close (e.g. `depends_on`).
210        relation: Located<'a, &'a str>,
211        /// Which closure to apply.
212        kind: CloseKind,
213    },
214    /// `VAR <name> [DEFAULT true|false]` — declare an external boolean **port**: a
215    /// single-word proposition whose truth is supplied from outside (CLI/API/data).
216    /// `<name>` doubles as the proposition usable in bodies (`WHEN <name> THEN …`)
217    /// and as the key external values bind to. With no supplied value it falls back
218    /// to `default`, or stays UNKNOWN when there is none.
219    Var {
220        /// The port's name — both the bound proposition and the external key.
221        name: Located<'a, &'a str>,
222        /// The `DEFAULT true|false` fallback, if written.
223        default: Option<bool>,
224    },
225    /// `PROVIDE [<domain>.]<port|atom>: true|false` — bind an external value. The
226    /// data-carrying counterpart of `VAR`: it supplies a value (like a CLI/API
227    /// binding) rather than declaring a port. The target is parsed as a full
228    /// [`Atom`], so a lone word binds a `VAR` port (`PROVIDE db_ready: true`), a
229    /// multi-word form asserts an atom (`PROVIDE engine has_fuel: true`), and a
230    /// `domain.` prefix disambiguates across imports (`PROVIDE self.has_vision:
231    /// true`). Used in a data-only file (loaded via `--data`) or alongside the
232    /// program. Conflicting bindings for one target are a hard error.
233    Provide {
234        /// The port or atom this binds (a lone-subject atom is a `VAR` port).
235        atom: Located<'a, Atom<'a>>,
236        /// The boolean value supplied for it.
237        value: bool,
238    },
239    /// `PREMISE <name> [FOR EACH …]: ...` — a checked first principle, optionally
240    /// quantified over a declared set.
241    Premise {
242        /// The premise's label (a per-source name, not a global identifier).
243        name: Located<'a, &'a str>,
244        /// An optional `FOR EACH … IN …` header quantifier (at most one).
245        quant: Option<Quant<'a>>,
246        /// The constraint itself: a list body or a `WHEN … THEN` implication.
247        body: Body<'a>,
248    },
249    /// `RULE <name> [FOR EACH …]: ...` — a fact-producing inference rule.
250    Rule {
251        /// The rule's label.
252        name: Located<'a, &'a str>,
253        /// An optional `FOR EACH … IN …` header quantifier (at most one).
254        quant: Option<Quant<'a>>,
255        /// Always an implication body (the grammar forbids a list body here).
256        body: Body<'a>,
257    },
258    /// `CHECK [subject] [BIDIRECTIONAL]` — a query.
259    Check {
260        /// Restrict the report to this subject; `None` checks everything.
261        subject: Option<Located<'a, &'a str>>,
262        /// `true` enables the backward (all-SAT) pass for UNDERDETERMINED.
263        bidirectional: bool,
264    },
265}
266
267/// A parsed program: a flat sequence of statements.
268#[derive(Debug, Clone, PartialEq)]
269pub struct Program<'a> {
270    /// Top-level statements in source order. The list is flat: PREMISE/RULE bodies
271    /// live inside their [`Statement`], not as separate entries.
272    pub statements: Vec<Statement<'a>>,
273}