Skip to main content

elenchus_compiler/
lib.rs

1//! elenchus-compiler — compiles parsed elenchus DSL into a canonical clause IR.
2//!
3//! This crate is **preparation, not solving**. It takes the AST (from
4//! `elenchus-parser`) and produces a deterministic, solver-ready intermediate
5//! representation:
6//!
7//! - **atom interner**: `(subject, predicate, object?)` → dense `u32` id,
8//!   canonically sorted so ids (and any later enumeration) are deterministic;
9//! - **desugaring**: surface CAPS sugar → `Impossible` clauses
10//!   (`EXCLUSIVE` pairwise, `WHEN…THEN` → `Impossible([A, …, NOT C])`, etc.);
11//! - **content-addressing** (sha256, mirroring vsm-guard's CAS): identical
12//!   clauses are deduped (idempotent — `P ∧ P ≡ P`), and a named construct
13//!   redefined with a different body is a `PremiseRedefinition` error.
14//!
15//! The actual reasoning (3-valued forward chaining, SAT, all-SAT, the WARNING
16//! pool, the four results) lives in `elenchus-solver`. `IMPORT` resolution is a
17//! source-agnostic [`Resolver`] that flat-merges another source into the shared
18//! atom universe ([`compile`] resolves imports; [`compile_source`] leaves them
19//! pending).
20//!
21//! # Example
22//!
23//! ```
24//! use elenchus_compiler::compile_source;
25//!
26//! // `ASSUME` lowers to a *soft* fact: the same atom universe as a `FACT`, but
27//! // one the solver may retract. Here `x a` is asserted both ways (hard + soft).
28//! let ir = compile_source("demo.vrf", "DOMAIN d\nFACT x a\nASSUME NOT x a\nCHECK x\n").unwrap();
29//! assert_eq!(ir.facts.len(), 2);
30//! assert!(ir.facts.iter().any(|f| f.soft)); // the ASSUME is the soft one
31//! ```
32#![no_std]
33// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
34#![warn(missing_docs)]
35
36extern crate alloc;
37
38#[cfg(feature = "std")]
39extern crate std;
40
41use alloc::boxed::Box;
42use alloc::collections::{BTreeMap, BTreeSet};
43use alloc::string::{String, ToString};
44use alloc::vec;
45use alloc::vec::Vec;
46use core::fmt::Write as _;
47
48use elenchus_parser::{Atom, Body, CloseKind, Conn, ListOp, Literal, Located, Quant, Statement};
49/// Re-exported so downstream crates name one source of truth: [`Diagnostics`] for
50/// the syntax errors carried by [`CompileError::Parse`], and [`kw`] for the
51/// keyword spellings an [`Origin::kind`] is built from (so the solver matches a
52/// `kind` against `kw::PREMISE`, not a re-typed `"PREMISE"`).
53pub use elenchus_parser::{Diagnostics, kw};
54use sha2::{Digest, Sha256};
55use thiserror::Error;
56
57// --- content-addressing (mirrors vsm-guard::hashing) -----------------------
58
59/// SHA-256 content addressing. Used only for dedup / redefinition / provenance,
60/// never for namespacing atoms.
61pub fn hash_hex(data: &[u8]) -> String {
62    let mut hasher = Sha256::new();
63    hasher.update(data);
64    let out = hasher.finalize();
65    let mut s = String::with_capacity(out.len() * 2);
66    for b in out {
67        let _ = write!(s, "{:02x}", b);
68    }
69    s
70}
71
72// --- IR types --------------------------------------------------------------
73
74/// Dense atom identifier (also the SAT variable number).
75pub type AtomId = u32;
76
77/// The identity of an atom: the `domain` plus the triple
78/// `(subject, predicate, object?)`, owned so it survives across merged sources.
79/// The domain is the leading sort key, so atoms group by domain; ordering is
80/// otherwise lexicographic → canonical. Two atoms with the same triple in
81/// *different* domains are distinct (no cross-domain unification).
82#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
83pub struct AtomKey {
84    /// The domain this atom belongs to (the resolved namespace, never a raw
85    /// alias). `physics.engine` and `plan.engine` are different atoms.
86    pub domain: String,
87    /// The entity the claim is about (owned copy of the parser's `subject`).
88    pub subject: String,
89    /// The relation or property asserted. `None` for a **bare proposition** — a
90    /// single-word atom introduced by a `VAR` port (e.g. `db_ready`). `None`
91    /// sorts before any `Some`, so existing (always-`Some`) atoms keep their order.
92    pub predicate: Option<String>,
93    /// Optional object; part of identity, so `has flying` ≠ `has swimming`. Always
94    /// `None` when `predicate` is `None`.
95    pub object: Option<String>,
96}
97
98/// The domain context of one file being compiled: its own declared domain (where
99/// bare atoms fall) and the local names — aliases or imported domain names — it
100/// may reference other domains by. Resolving an atom's optional `domain.` prefix
101/// against this context yields its canonical [`AtomKey`] domain.
102struct DomainCtx {
103    /// The file's own declared domain (the target for unqualified atoms).
104    current: String,
105    /// `local name -> canonical domain` for every name visible in this file
106    /// (always includes `current -> current`, plus one entry per `IMPORT`).
107    aliases: BTreeMap<String, String>,
108}
109
110impl DomainCtx {
111    /// Resolve an atom's optional `domain.` prefix to a canonical domain name.
112    /// `None` → the file's own domain; a prefix not imported here is an error.
113    fn resolve(&self, prefix: Option<&str>) -> Result<String, CompileError> {
114        match prefix {
115            None => Ok(self.current.clone()),
116            Some(p) => self
117                .aliases
118                .get(p)
119                .cloned()
120                .ok_or_else(|| CompileError::UnknownDomain {
121                    domain: p.to_string(),
122                }),
123        }
124    }
125
126    /// Build the owned [`AtomKey`] for a borrowed parser [`Atom`], resolving its
127    /// domain prefix against this file's context.
128    fn key(&self, a: &Atom) -> Result<AtomKey, CompileError> {
129        Ok(AtomKey {
130            domain: self.resolve(a.domain)?,
131            subject: a.subject.to_string(),
132            predicate: a.predicate.map(|p| p.to_string()),
133            object: a.object.map(|o| o.to_string()),
134        })
135    }
136}
137
138/// A literal as it appears *inside* an `Impossible` clause: an atom, optionally
139/// negated. `negated = true` means the literal is `NOT atom`.
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141pub struct Lit {
142    /// Interned id of the atom (also its SAT variable number).
143    pub atom: AtomId,
144    /// `true` means this literal is `NOT atom` inside the clause.
145    pub negated: bool,
146}
147
148/// A confident truth value. UNKNOWN is the *absence* of a fact, never stored.
149#[derive(Debug, Clone, Copy, PartialEq, Eq)]
150pub enum Value {
151    /// The atom is asserted TRUE (from `FACT`).
152    True,
153    /// The atom is asserted FALSE (from `NOT`).
154    False,
155}
156
157/// Where a piece of IR came from — for readable conflict/warning pools.
158#[derive(Debug, Clone, PartialEq, Eq)]
159pub struct Origin {
160    /// The source label this came from (file name or `"<root>"`/`"<text>"`).
161    pub source: String,
162    /// 1-based line number of the originating statement.
163    pub line: u32,
164    /// The premise/rule name, if it came from a named construct.
165    pub premise: Option<String>,
166    /// Surface kind for the report. A surface keyword (a [`kw`] constant such as
167    /// `kw::FACT` / `kw::PREMISE`) for source constructs, or [`KIND_UNSAT`] for
168    /// the synthetic origin the solver attaches to a global unsatisfiability.
169    pub kind: &'static str,
170}
171
172/// The [`Origin::kind`] the solver stamps on a conflict that is not pinned to one
173/// source construct but to the program being jointly unsatisfiable. Not a
174/// keyword — so it lives here, next to the other kinds, as the one spelling both
175/// the solver (which sets it) and any reader (which matches it) share.
176pub const KIND_UNSAT: &str = "UNSAT";
177
178/// A confident fact (from `FACT` / `NOT`). Conflicting facts on the same atom
179/// are preserved (both kept) — the solver reports that as a CONFLICT.
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub struct Fact {
182    /// The atom this fact pins down.
183    pub atom: AtomId,
184    /// The asserted truth value.
185    pub value: Value,
186    /// Where it came from (for the report).
187    pub origin: Origin,
188    /// `true` for an `ASSUME` (a *soft*, retractable hypothesis). A soft fact
189    /// behaves like a normal fact in the forward pass, but when the assumptions
190    /// cannot all hold the solver may drop it (and only it) to explain the
191    /// contradiction — a `FACT`/`NOT` is never retractable.
192    pub soft: bool,
193}
194
195/// An `Impossible` clause: the listed literals cannot all hold simultaneously.
196#[derive(Debug, Clone, PartialEq, Eq)]
197pub struct Clause {
198    /// The literals that cannot all hold at once (an `Impossible([...])`).
199    pub lits: Vec<Lit>,
200    /// Where it came from (for the report).
201    pub origin: Origin,
202}
203
204/// A forward-chaining rule (from `RULE`): if all antecedent literals hold, derive
205/// the consequent literals.
206#[derive(Debug, Clone, PartialEq, Eq)]
207pub struct Rule {
208    /// Literals that must all hold for the rule to fire.
209    pub antecedent: Vec<Lit>,
210    /// Literals derived (asserted) when the antecedent holds.
211    pub consequent: Vec<Lit>,
212    /// Where it came from (for the report).
213    pub origin: Origin,
214}
215
216/// A `CHECK` query.
217#[derive(Debug, Clone, PartialEq, Eq)]
218pub struct Check {
219    /// Restrict the report to this subject; `None` means check everything.
220    pub subject: Option<String>,
221    /// `true` runs the backward (all-SAT) pass to detect UNDERDETERMINED.
222    pub bidirectional: bool,
223}
224
225/// The compiled IR: the solver's input.
226#[derive(Debug, Clone, PartialEq, Eq)]
227pub struct Compiled {
228    /// Indexed by [`AtomId`]; canonically sorted.
229    pub atoms: Vec<AtomKey>,
230    /// Confident assertions from `FACT`/`NOT`.
231    pub facts: Vec<Fact>,
232    /// `Impossible` clauses (desugared premises + the built-in non-contradiction).
233    pub clauses: Vec<Clause>,
234    /// Forward-chaining rules from `RULE`.
235    pub rules: Vec<Rule>,
236    /// `CHECK` queries.
237    pub checks: Vec<Check>,
238    /// Imports seen but not yet resolved (only populated by [`compile_source`];
239    /// [`compile`] resolves them, leaving this empty).
240    pub pending_imports: Vec<String>,
241    /// Advisory: imports that a file makes but never references (no `domain.atom`
242    /// from that file uses the imported domain). Structural, per-file, and inert —
243    /// it never affects the solve. Only populated by [`compile`] (an unresolved
244    /// import in [`compile_source`] cannot be classified). See [`UnusedImport`].
245    pub unused_imports: Vec<UnusedImport>,
246    /// Atoms consumed as data by a relation `FOR EACH` (the edge facts, e.g. each
247    /// `a linked b`). They are read by the quantifier, so the solver must not
248    /// report them as ORPHAN facts even though no clause references them.
249    pub consumed: Vec<AtomId>,
250    /// One record per declared `VAR` port: how it resolved (supplied / default /
251    /// unset), its value and origin. Drives the report's PLACEHOLDERS section;
252    /// purely advisory. Filled by `compile_source_with` / `compile_with` after
253    /// [`Compiler::resolve_ports`]; empty when no port was declared.
254    pub placeholders: Vec<PlaceholderInfo>,
255}
256
257/// An advisory record: a file `IMPORT`s a domain it never references. Such an
258/// import is inert — no `domain.atom` in that file mentions it, so removing it
259/// would not change the result. It is almost always a leftover or a forgotten
260/// `domain.` prefix. **Purely informational** — it never changes the verdict.
261#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
262pub struct UnusedImport {
263    /// The source that declared the unused `IMPORT`.
264    pub file: String,
265    /// The imported domain that is never referenced from `file`.
266    pub domain: String,
267    /// The local alias, if the import used `AS <alias>`.
268    pub alias: Option<String>,
269    /// 1-based line of the `IMPORT` statement in `file`.
270    pub line: u32,
271}
272
273/// One external value bound to a port `key`, supplied from outside the program
274/// (CLI / API / a data file). The `origin` is a short human tag used both in the
275/// placeholders report and in a [`CompileError::PortConflict`] message.
276#[derive(Debug, Clone, PartialEq, Eq)]
277pub struct PortBinding {
278    /// The boolean truth supplied for the port.
279    pub value: bool,
280    /// Where it came from: `"CLI"`, `"api"`, `"data:<file>"`, or `"PROVIDE <file>"`.
281    pub origin: String,
282}
283
284/// How a declared `VAR` port got (or did not get) its value — the per-port status
285/// shown in the report's PLACEHOLDERS section. Advisory only; never affects the
286/// verdict.
287#[derive(Debug, Clone, Copy, PartialEq, Eq)]
288pub enum PlaceholderStatus {
289    /// An external value (CLI/API/data) was supplied.
290    Supplied,
291    /// No external value; the port's `DEFAULT` was used.
292    DefaultUsed,
293    /// No external value and no `DEFAULT` — the port stays UNKNOWN.
294    Unset,
295}
296
297/// A reporting record for one declared `VAR` port: its key, how it resolved, the
298/// value it took (if any), and where that value came from.
299#[derive(Debug, Clone, PartialEq, Eq)]
300pub struct PlaceholderInfo {
301    /// The port's name (the external key).
302    pub key: String,
303    /// How it resolved (supplied / default / unset).
304    pub status: PlaceholderStatus,
305    /// The resolved boolean, or `None` when unset.
306    pub value: Option<bool>,
307    /// The origin of a supplied value (`None` for default/unset).
308    pub origin: Option<String>,
309}
310
311/// Anything that can go wrong while compiling (and resolving imports).
312#[derive(Debug, Error, PartialEq, Eq)]
313pub enum CompileError {
314    /// A source failed to parse; carries the full syntax diagnostics (every
315    /// error, each as a caret block with the keyword's correct syntax). The
316    /// source label is already inside the [`Diagnostics`] header.
317    #[error("{0}")]
318    Parse(elenchus_parser::Diagnostics),
319    /// A name was reused with a different body *within the same source*.
320    #[error("'{name}' redefined with a different body")]
321    PremiseRedefinition {
322        /// The clashing premise/rule name.
323        name: String,
324    },
325    /// A source did not declare its `DOMAIN` (required, once, as the first
326    /// statement).
327    #[error("{file}: missing a DOMAIN declaration (every file must start with `DOMAIN <name>`)")]
328    MissingDomain {
329        /// The source label that lacked a `DOMAIN`.
330        file: String,
331    },
332    /// A source declared `DOMAIN` more than once (a file has exactly one domain).
333    #[error("{file}: more than one DOMAIN declaration (a file has exactly one domain)")]
334    DuplicateDomain {
335        /// The source label with the duplicate `DOMAIN`.
336        file: String,
337    },
338    /// An atom referenced a `domain.` prefix that is not the file's own domain and
339    /// was not imported in this file.
340    #[error("unknown domain '{domain}' — declare it with DOMAIN, or IMPORT it in this file")]
341    UnknownDomain {
342        /// The unresolved domain prefix.
343        domain: String,
344    },
345    /// Two imports bound the same local domain name to different domains (use a
346    /// distinct `AS <alias>` to tell them apart).
347    #[error("domain name '{alias}' is bound to two different imports (disambiguate with AS)")]
348    DomainAliasClash {
349        /// The clashing local domain name.
350        alias: String,
351    },
352    /// An `IMPORT` target could not be loaded by the [`Resolver`].
353    #[error("import not found: {0}")]
354    ImportNotFound(String),
355    /// Imports form a cycle (a source transitively imports itself).
356    #[error("circular import: {0}")]
357    CircularImport(String),
358    /// A `RULE` used `OR` in its `THEN`: forward chaining cannot derive a
359    /// disjunction (it would not know which literal to assert). Model it as a
360    /// `PREMISE` constraint instead.
361    #[error("rule '{name}' cannot derive a disjunction (OR in THEN); use a PREMISE instead")]
362    RuleDisjunctiveConsequent {
363        /// The offending rule name.
364        name: String,
365    },
366    /// A reference used a value outside the closed set an `ONEOF` declared for that
367    /// variable. Almost always a typo: the misspelling would otherwise mint a new
368    /// atom that hangs in the air as UNKNOWN. Closed-world is opt-in — it only
369    /// applies to a `(subject, predicate)` whose values an `ONEOF` enumerated.
370    /// Boxed so this comparatively large payload does not bloat every `Result`.
371    #[error(transparent)]
372    UnknownValue(Box<UnknownValue>),
373    /// A `FOR EACH … IN <set>` named a set that was never declared with `SET`.
374    /// Usually a typo in the set name; the suggestion offers the nearest declared
375    /// set when one is close.
376    #[error("{file}:{line}: FOR EACH ranges over '{set}', which is not a declared SET{suggestion}")]
377    UnknownSet {
378        /// The source the offending `FOR EACH` is in.
379        file: String,
380        /// 1-based line of the `FOR EACH`.
381        line: u32,
382        /// The undeclared set name that was referenced.
383        set: String,
384        /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
385        suggestion: String,
386    },
387    /// `CLOSE <relation> TRANSITIVE` found a cycle: a node transitively reaches
388    /// itself. Transitive closure requires a DAG (e.g. a dependency graph).
389    #[error(
390        "{file}:{line}: relation '{relation}' has a cycle (`{node}` reaches itself) \
391         — CLOSE … TRANSITIVE requires a DAG"
392    )]
393    CyclicRelation {
394        /// The source the `CLOSE` is in.
395        file: String,
396        /// 1-based line of the `CLOSE`.
397        line: u32,
398        /// The relation predicate being closed.
399        relation: String,
400        /// A node on the cycle (reaches itself).
401        node: String,
402    },
403    /// A bare proposition (a single-word atom like `db_ready`) was used in the
404    /// program but never declared with `VAR`. Almost always a typo or a forgotten
405    /// declaration; the suggestion offers the nearest declared port when close.
406    #[error(
407        "{file}:{line}: '{name}' is a bare proposition but no VAR declares it \
408         — add `VAR {name}`{suggestion}"
409    )]
410    UndeclaredPort {
411        /// The source the offending reference is in.
412        file: String,
413        /// 1-based line of the offending reference.
414        line: u32,
415        /// The undeclared bare-proposition name.
416        name: String,
417        /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
418        suggestion: String,
419    },
420    /// An external value (`--set`, API, or `PROVIDE`) named a port that no `VAR`
421    /// declares. Strict by design: silently ignoring an unknown key would hide a
422    /// mistake.
423    #[error("no VAR declares the port '{name}' that an external value sets{suggestion}")]
424    UnknownPort {
425        /// The supplied key that matches no declared port.
426        name: String,
427        /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
428        suggestion: String,
429    },
430    /// An external value named a port whose bare name is declared in more than one
431    /// domain, so which one it sets is ambiguous. (Phase-1 ports match by bare
432    /// name; a richer domain-qualified form may lift this later.)
433    #[error(
434        "the port '{name}' is declared in multiple domains ({domains}); external value is ambiguous"
435    )]
436    AmbiguousPort {
437        /// The ambiguous bare port name.
438        name: String,
439        /// The domains that declare a port with this name, comma-joined (sorted).
440        domains: String,
441    },
442    /// Two sources supplied different values for the same port. Ambiguity is a hard
443    /// error: the engine is about determinism, so it never silently picks one.
444    #[error(
445        "the port '{name}' is set to two different values: {a_value} (from {a_origin}) \
446         and {b_value} (from {b_origin})"
447    )]
448    PortConflict {
449        /// The port set inconsistently.
450        name: String,
451        /// The first binding's value.
452        a_value: bool,
453        /// The first binding's origin tag.
454        a_origin: String,
455        /// The second (conflicting) binding's value.
456        b_value: bool,
457        /// The second binding's origin tag.
458        b_origin: String,
459    },
460    /// A data file (loaded via `--data`) contained a statement other than `PROVIDE`
461    /// (or `DOMAIN`). Data files carry only values, never logic.
462    #[error("{file}:{line}: a data file may only contain PROVIDE (and DOMAIN), not this statement")]
463    DataFileStatement {
464        /// The data file with the offending statement.
465        file: String,
466        /// 1-based line of the offending statement.
467        line: u32,
468    },
469}
470
471/// Details of a closed-world violation (see [`CompileError::UnknownValue`]). Kept
472/// in its own (boxed) struct so the common error path stays small.
473#[derive(Debug, Error, PartialEq, Eq)]
474#[error(
475    "{file}:{line}: '{value}' is not a declared value of '{subject} {predicate}' \
476     — ONEOF declares {{ {declared} }}{suggestion}"
477)]
478pub struct UnknownValue {
479    /// The source the offending reference is in.
480    pub file: String,
481    /// 1-based line of the offending reference.
482    pub line: u32,
483    /// The variable's subject.
484    pub subject: String,
485    /// The variable's predicate.
486    pub predicate: String,
487    /// The out-of-set value that was used.
488    pub value: String,
489    /// The declared legal values, comma-joined (sorted).
490    pub declared: String,
491    /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
492    pub suggestion: String,
493}
494
495// --- raw (key-based) intermediate, before interning ------------------------
496// While accumulating we key everything by `AtomKey` (the owned triple) rather
497// than by `AtomId`, because ids only become stable once *all* sources are merged
498// and the atom set is sorted in `finalize`. These mirror the public IR types but
499// hold keys instead of ids.
500
501/// A literal keyed by atom identity (pre-interning counterpart of [`Lit`]).
502#[derive(Clone)]
503struct RawLit {
504    key: AtomKey,
505    negated: bool,
506}
507
508/// A fact keyed by atom identity (pre-interning counterpart of [`Fact`]).
509struct RawFact {
510    key: AtomKey,
511    value: Value,
512    origin: Origin,
513    soft: bool,
514}
515
516/// A clause keyed by atom identity (pre-interning counterpart of [`Clause`]).
517struct RawClause {
518    lits: Vec<RawLit>,
519    origin: Origin,
520}
521
522/// A rule keyed by atom identity (pre-interning counterpart of [`Rule`]).
523struct RawRule {
524    antecedent: Vec<RawLit>,
525    consequent: Vec<RawLit>,
526    origin: Origin,
527}
528
529/// A declared `VAR` port, keyed in the compiler by `(domain, name)`: its optional
530/// `DEFAULT` fallback and where it was declared (for provenance on the synthetic
531/// fact and the placeholders report).
532struct PortDecl {
533    default: Option<bool>,
534    source: String,
535    line: u32,
536}
537
538// --- compiler --------------------------------------------------------------
539
540/// Accumulates statements from one or more sources, then interns + emits the IR.
541#[derive(Default)]
542pub struct Compiler {
543    keys: BTreeSet<AtomKey>,
544    facts: Vec<RawFact>,
545    clauses: Vec<RawClause>,
546    rules: Vec<RawRule>,
547    checks: Vec<Check>,
548    pending_imports: Vec<String>,
549    /// (source, name) → content hash of its body, for redefinition detection.
550    /// Scoped per source: premise/rule names are labels, not global identifiers,
551    /// so different files (domains) may reuse a name. A clash is only an error
552    /// *within the same source*.
553    defined: BTreeMap<(String, String), String>,
554    /// dedup of identical clauses by canonical content hash.
555    clause_sigs: BTreeSet<String>,
556    /// dedup of identical facts by (key, value).
557    fact_sigs: BTreeSet<String>,
558    /// Closed-world value sets declared by `ONEOF`: `(domain, subject, predicate)`
559    /// → the set of legal objects. Once a variable's values are enumerated by a
560    /// `ONEOF`, a reference to that variable with an object *outside* the set is a
561    /// compile error (a likely typo), not a silent new atom. Only `ONEOF` members
562    /// that carry an object register here (binary atoms have no value slot to
563    /// close). See [`Compiler::validate_closed_world`].
564    oneof_values: BTreeMap<(String, String, String), BTreeSet<String>>,
565    /// Declared `SET <name>` collections: name → elements, used to ground a
566    /// `FOR EACH <binder> IN <name>` quantifier by instantiating the body once
567    /// per element. Populated in a pre-pass so a `FOR EACH` may reference a set
568    /// declared later in the file.
569    sets: BTreeMap<String, Vec<String>>,
570    /// Declared relation pairs: predicate → `(subject, object)` of every 3-part
571    /// `FACT`, used to ground a `FOR EACH <a> <predicate> <b>` quantifier. Also a
572    /// pre-pass, so the edges may be declared after the quantifier.
573    relations: BTreeMap<String, Vec<(String, String)>>,
574    /// Edge atoms consumed by a relation `FOR EACH` (e.g. each `a linked b`).
575    /// They are *read as data* by the quantifier, so they are not idle facts —
576    /// [`Compiler::finalize`] passes them to the report to suppress the ORPHAN
577    /// lint.
578    relation_consumed: BTreeSet<AtomKey>,
579    /// Declared `VAR` ports, keyed by `(domain, name)`. Collected as statements are
580    /// added (across every imported file), then resolved against external values in
581    /// [`Compiler::resolve_ports`]. The first declaration of a `(domain, name)`
582    /// wins; later duplicates are ignored.
583    ports: BTreeMap<(String, String), PortDecl>,
584    /// `PROVIDE` bindings seen in compiled sources (origin `"PROVIDE <file>"`).
585    /// They join the same conflict pool as external `--set`/API values in
586    /// [`Compiler::resolve_ports`].
587    provides: Vec<(String, PortBinding)>,
588}
589
590impl Compiler {
591    /// A fresh, empty compiler.
592    pub fn new() -> Self {
593        Self::default()
594    }
595
596    /// Parse one source and accumulate its statements. `source` is a label used
597    /// in provenance (e.g. a file name or `"<root>"`). The source must declare its
598    /// `DOMAIN`; `IMPORT`s are recorded as pending (their domains cannot be bound
599    /// without a [`Resolver`]), so a single source may only reference its own
600    /// domain. Use [`compile`] for cross-domain references.
601    pub fn add_source(&mut self, source: &str, src: &str) -> Result<(), CompileError> {
602        let program = parse_tagged(source, src)?;
603        let domain = extract_domain(&program, source)?;
604        let mut aliases = BTreeMap::new();
605        aliases.insert(domain.clone(), domain.clone());
606        let ctx = DomainCtx {
607            current: domain,
608            aliases,
609        };
610        self.collect_decls(&program);
611        self.apply_closures(&program, source)?;
612        for stmt in &program.statements {
613            match stmt {
614                Statement::Domain(_) => {}
615                Statement::Import { path, .. } => {
616                    self.pending_imports.push(path.data.to_string());
617                }
618                other => self.add_statement(source, other, &ctx)?,
619            }
620        }
621        Ok(())
622    }
623
624    /// Apply every `CLOSE … TRANSITIVE`: replace the relation's pairs with their
625    /// transitive closure so a relation `FOR EACH` ranges over reachability, and
626    /// reject a cycle (a node reaching itself). A pure compile-time graph pass —
627    /// the solver never sees it. Runs after [`collect_decls`] (the direct edges
628    /// must be known) and before grounding.
629    fn apply_closures(
630        &mut self,
631        program: &elenchus_parser::Program,
632        source: &str,
633    ) -> Result<(), CompileError> {
634        for stmt in &program.statements {
635            if let Statement::Close { relation, kind } = stmt {
636                let CloseKind::Transitive = kind;
637                let pairs = self
638                    .relations
639                    .get(relation.data)
640                    .cloned()
641                    .unwrap_or_default();
642                let closed = transitive_closure(pairs);
643                if let Some((node, _)) = closed.iter().find(|(a, b)| a == b) {
644                    return Err(CompileError::CyclicRelation {
645                        file: source.to_string(),
646                        line: relation.span.location_line(),
647                        relation: relation.data.to_string(),
648                        node: node.clone(),
649                    });
650                }
651                self.relations.insert(relation.data.to_string(), closed);
652            }
653        }
654        Ok(())
655    }
656
657    /// Pre-pass: record every `SET` and every relation pair (3-part `FACT`) so a
658    /// `FOR EACH` may reference a set or relation declared anywhere in the same
659    /// source, including after the quantifier.
660    fn collect_decls(&mut self, program: &elenchus_parser::Program) {
661        for stmt in &program.statements {
662            match stmt {
663                Statement::Set { name, elements } => {
664                    self.sets.insert(
665                        name.data.to_string(),
666                        elements.iter().map(|e| e.data.to_string()).collect(),
667                    );
668                }
669                Statement::Fact(a) => {
670                    // Only a 3-part fact (`a rel b`) declares a relation pair; a bare
671                    // proposition or a 2-word fact has no object (hence no predicate
672                    // pair to record).
673                    if let (Some(pred), Some(obj)) = (a.data.predicate, a.data.object) {
674                        self.relations
675                            .entry(pred.to_string())
676                            .or_default()
677                            .push((a.data.subject.to_string(), obj.to_string()));
678                    }
679                }
680                _ => {}
681            }
682        }
683    }
684
685    /// Compile one already-resolved file's statements under its domain context.
686    fn add_resolved(&mut self, file: &ResolvedFile) -> Result<(), CompileError> {
687        let program = parse_tagged(&file.path, &file.content)?;
688        self.collect_decls(&program);
689        self.apply_closures(&program, &file.path)?;
690        for stmt in &program.statements {
691            match stmt {
692                Statement::Import { .. } | Statement::Domain(_) => {}
693                other => self.add_statement(&file.path, other, &file.ctx)?,
694            }
695        }
696        Ok(())
697    }
698
699    /// Route one statement (never `IMPORT`/`DOMAIN` — handled by the loaders) to
700    /// the right accumulator, resolving atom domains through `ctx`.
701    fn add_statement(
702        &mut self,
703        source: &str,
704        stmt: &Statement,
705        ctx: &DomainCtx,
706    ) -> Result<(), CompileError> {
707        match stmt {
708            // Handled by `add_source` / `load_recursive`, never reach here.
709            Statement::Import { .. } | Statement::Domain(_) => {}
710            Statement::Fact(a) => self.add_fact(source, a, Value::True, kw::FACT, false, ctx)?,
711            Statement::Negation(a) => {
712                self.add_fact(source, a, Value::False, kw::NOT, false, ctx)?
713            }
714            Statement::Assume(l) => {
715                let value = if l.data.negated {
716                    Value::False
717                } else {
718                    Value::True
719                };
720                // A soft assertion shares the FACT accumulator; the atom is the
721                // literal's atom, the polarity its `NOT`, and `soft` marks it
722                // retractable. The span is the whole `ASSUME` line.
723                let located = elenchus_parser::Located {
724                    data: l.data.atom.clone(),
725                    span: l.span,
726                };
727                self.add_fact(source, &located, value, kw::ASSUME, true, ctx)?;
728            }
729            Statement::Check {
730                subject,
731                bidirectional,
732            } => self.checks.push(Check {
733                subject: subject.as_ref().map(|s| s.data.to_string()),
734                bidirectional: *bidirectional,
735            }),
736            // Declared in the `collect_decls` / `apply_closures` pre-passes;
737            // nothing to emit here.
738            Statement::Set { .. } | Statement::Close { .. } => {}
739            // A port declaration: record it under this file's domain (the first
740            // declaration of a `(domain, name)` wins). It is resolved against
741            // external values later, in `resolve_ports`.
742            Statement::Var { name, default } => {
743                self.ports
744                    .entry((ctx.current.clone(), name.data.to_string()))
745                    .or_insert(PortDecl {
746                        default: *default,
747                        source: source.to_string(),
748                        line: name.span.location_line(),
749                    });
750            }
751            // A value binding: queue it into the conflict pool, tagged with its
752            // file, alongside any external `--set`/API values.
753            Statement::Provide { name, value } => {
754                self.provides.push((
755                    name.data.to_string(),
756                    PortBinding {
757                        value: *value,
758                        origin: alloc::format!("PROVIDE {source}"),
759                    },
760                ));
761            }
762            Statement::Premise { name, quant, body } => {
763                self.add_named(source, name, quant.as_ref(), body, false, ctx)?;
764            }
765            Statement::Rule { name, quant, body } => {
766                self.add_named(source, name, quant.as_ref(), body, true, ctx)?;
767            }
768        }
769        Ok(())
770    }
771
772    /// Record an atom identity in the shared universe (deduped by the `BTreeSet`).
773    fn intern(&mut self, key: &AtomKey) {
774        if !self.keys.contains(key) {
775            self.keys.insert(key.clone());
776        }
777    }
778
779    /// Accumulate a `FACT`/`NOT`; exact duplicates (same key+value+kind) are
780    /// dropped as idempotent, while a `FACT` and a `NOT` on the same atom are
781    /// both kept so the solver can report the CONFLICT.
782    fn add_fact(
783        &mut self,
784        source: &str,
785        a: &elenchus_parser::Located<Atom>,
786        value: Value,
787        kind: &'static str,
788        soft: bool,
789        ctx: &DomainCtx,
790    ) -> Result<(), CompileError> {
791        let key = ctx.key(&a.data)?;
792        self.intern(&key);
793        let sig = alloc::format!(
794            "{}|{}|{}|{}",
795            key_sig(&key),
796            matches!(value, Value::True) as u8,
797            kind,
798            "" // facts dedup ignores line; identical FACT twice is idempotent
799        );
800        if !self.fact_sigs.insert(sig) {
801            return Ok(()); // exact duplicate fact — idempotent
802        }
803        self.facts.push(RawFact {
804            key,
805            value,
806            origin: Origin {
807                source: source.to_string(),
808                line: a.span.location_line(),
809                premise: None,
810                kind,
811            },
812            soft,
813        });
814        Ok(())
815    }
816
817    /// Handle a named construct (`PREMISE` or `RULE`). `is_rule` selects derivation
818    /// vs checking. Returns an error on redefinition with a different body.
819    fn add_named(
820        &mut self,
821        source: &str,
822        name: &Located<&str>,
823        quant: Option<&Quant>,
824        body: &Body,
825        is_rule: bool,
826        ctx: &DomainCtx,
827    ) -> Result<(), CompileError> {
828        let line = name.span.location_line();
829        let name = name.data;
830        // The redefinition hash covers the quantifier too, so two same-named
831        // premises that differ only in their `FOR EACH` are still a redefinition.
832        let mut canon = canonical_body(name, body, is_rule, ctx)?;
833        if let Some(q) = quant {
834            canon.push_str(&quant_sig(q));
835        }
836        let body_hash = hash_hex(canon.as_bytes());
837        let key = (source.to_string(), name.to_string());
838        match self.defined.get(&key) {
839            Some(prev) if *prev == body_hash => return Ok(()), // identical → idempotent
840            Some(_) => {
841                // Same name + different body *in the same source* — a real mistake.
842                return Err(CompileError::PremiseRedefinition {
843                    name: name.to_string(),
844                });
845            }
846            None => {
847                self.defined.insert(key, body_hash);
848            }
849        }
850
851        match quant {
852            // Unquantified: emit the body once, as before.
853            None => self.emit_named(source, name, line, body, is_rule, ctx),
854            // `FOR EACH <binder> IN <set>`: instantiate the body once per element,
855            // substituting the binder. Grounding is exactly `|set|` repetitions of
856            // the *same* desugar — linear, never a domain product (a second binder
857            // is unrepresentable in the grammar).
858            Some(Quant::InSet { binder, set }) => {
859                let elements = match self.sets.get(set.data) {
860                    Some(els) => els.clone(),
861                    None => {
862                        return Err(CompileError::UnknownSet {
863                            file: source.to_string(),
864                            line: set.span.location_line(),
865                            set: set.data.to_string(),
866                            suggestion: nearest_set_suggestion(set.data, &self.sets),
867                        });
868                    }
869                };
870                for el in &elements {
871                    let grounded = subst_body(body, &[(binder.data, el)]);
872                    self.emit_named(source, name, line, &grounded, is_rule, ctx)?;
873                }
874                Ok(())
875            }
876            // `FOR EACH <a> <relation> <b>`: instantiate the body once per declared
877            // FACT pair of that relation, binding `a`→subject, `b`→object. The pair
878            // is pinned by data, so this is linear in the number of facts — never a
879            // product of the domain with itself.
880            Some(Quant::Relation {
881                left,
882                predicate,
883                right,
884            }) => {
885                let pairs = self
886                    .relations
887                    .get(predicate.data)
888                    .cloned()
889                    .unwrap_or_default();
890                for (subj, obj) in &pairs {
891                    let grounded = subst_body(body, &[(left.data, subj), (right.data, obj)]);
892                    self.emit_named(source, name, line, &grounded, is_rule, ctx)?;
893                    // The edge atom is read as data by the quantifier, not idle —
894                    // record it so the ORPHAN lint does not flag it.
895                    if let Ok(k) = ctx.key(&Atom {
896                        domain: None,
897                        subject: subj,
898                        predicate: Some(predicate.data),
899                        object: Some(obj),
900                    }) {
901                        self.relation_consumed.insert(k);
902                    }
903                }
904                Ok(())
905            }
906        }
907    }
908
909    /// Emit the clauses/rule for one (already-grounded) named construct's body.
910    /// Split out of [`Compiler::add_named`] so a `FOR EACH` can call it once per
911    /// element with the binder substituted, reusing the exact same desugar.
912    fn emit_named(
913        &mut self,
914        source: &str,
915        name: &str,
916        line: u32,
917        body: &Body,
918        is_rule: bool,
919        ctx: &DomainCtx,
920    ) -> Result<(), CompileError> {
921        if is_rule {
922            // RULE always has an implication body (guaranteed by the grammar).
923            if let Body::Impl {
924                antecedent,
925                ante_conn,
926                consequent,
927                cons_conn,
928            } = body
929            {
930                // A rule *derives* its consequent; an `OR` consequent is not a
931                // single fact to assert, so reject it (use a PREMISE instead).
932                if *cons_conn == Conn::Or {
933                    return Err(CompileError::RuleDisjunctiveConsequent {
934                        name: name.to_string(),
935                    });
936                }
937                let (ante, cons) = (raw_lits(antecedent, ctx)?, raw_lits(consequent, ctx)?);
938                for l in ante.iter().chain(cons.iter()) {
939                    self.intern(&l.key);
940                }
941                let origin = self.origin(source, line, Some(name), kw::RULE);
942                match ante_conn {
943                    // a ∧ b → C : one rule firing on the whole antecedent.
944                    Conn::And => self.rules.push(RawRule {
945                        antecedent: ante,
946                        consequent: cons,
947                        origin,
948                    }),
949                    // (a ∨ b) → C == (a → C) ∧ (b → C): one rule per antecedent.
950                    Conn::Or => {
951                        for a in &ante {
952                            self.rules.push(RawRule {
953                                antecedent: vec![a.clone()],
954                                consequent: cons.clone(),
955                                origin: origin.clone(),
956                            });
957                        }
958                    }
959                }
960            }
961            return Ok(());
962        }
963
964        match body {
965            Body::List { op, atoms } => {
966                let keys: Vec<AtomKey> = atoms
967                    .iter()
968                    .map(|a| ctx.key(&a.data))
969                    .collect::<Result<_, _>>()?;
970                for k in &keys {
971                    self.intern(k);
972                }
973                let kind = list_kind(*op);
974                let origin = self.origin(source, line, Some(name), kind);
975                match op {
976                    // EXCLUSIVE / FORBIDS: "at most one" → pairwise Impossible([a_i, a_j]).
977                    ListOp::Exclusive | ListOp::Forbids => {
978                        self.emit_pairwise(&keys, &origin);
979                    }
980                    // ONEOF: pairwise (at most one) + at-least-one. A ONEOF also
981                    // *closes* each of its variables: record every member's object
982                    // as a legal value of `(domain, subject, predicate)` so a later
983                    // out-of-set reference is caught as a typo (closed-world).
984                    ListOp::OneOf => {
985                        self.emit_pairwise(&keys, &origin);
986                        self.emit_at_least_one(&keys, &origin);
987                        for k in &keys {
988                            if let (Some(pred), Some(obj)) = (&k.predicate, &k.object) {
989                                self.oneof_values
990                                    .entry((k.domain.clone(), k.subject.clone(), pred.clone()))
991                                    .or_default()
992                                    .insert(obj.clone());
993                            }
994                        }
995                    }
996                    // ATLEAST: Impossible([NOT a_1, …, NOT a_n]).
997                    ListOp::AtLeast => {
998                        self.emit_at_least_one(&keys, &origin);
999                    }
1000                }
1001            }
1002            Body::Impl {
1003                antecedent,
1004                ante_conn,
1005                consequent,
1006                cons_conn,
1007            } => {
1008                // Implication A → C as `Impossible(A_true ∧ ¬C)`. We group each
1009                // side by its connective and emit one clause per (ante × cons)
1010                // group pair — a uniform rule covering all AND/OR combinations:
1011                //   AND-ante → all its literals share every clause;
1012                //   OR-ante  → one clause per literal;
1013                //   AND-cons → one clause per (negated) literal;
1014                //   OR-cons  → all its (negated) literals share every clause.
1015                let ante = raw_lits(antecedent, ctx)?;
1016                let cons = raw_lits(consequent, ctx)?;
1017                for l in ante.iter().chain(cons.iter()) {
1018                    self.intern(&l.key);
1019                }
1020                let origin = self.origin(source, line, Some(name), kw::PREMISE);
1021
1022                let ante_groups: Vec<Vec<RawLit>> = match ante_conn {
1023                    Conn::And => vec![ante.clone()],
1024                    Conn::Or => ante.iter().map(|l| vec![l.clone()]).collect(),
1025                };
1026                let cons_groups: Vec<Vec<RawLit>> = match cons_conn {
1027                    Conn::And => cons.iter().map(|l| vec![l.clone()]).collect(),
1028                    Conn::Or => vec![cons.clone()],
1029                };
1030                for ag in &ante_groups {
1031                    for cg in &cons_groups {
1032                        let mut lits = ag.clone();
1033                        for c in cg {
1034                            lits.push(RawLit {
1035                                key: c.key.clone(),
1036                                negated: !c.negated,
1037                            });
1038                        }
1039                        self.push_clause(lits, origin.clone());
1040                    }
1041                }
1042            }
1043        }
1044        Ok(())
1045    }
1046
1047    /// Emit "at most one TRUE" as one `Impossible([a_i, a_j])` per unordered
1048    /// pair. Pairwise (not a single big clause) because `Impossible([a,b,c])`
1049    /// only forbids *all three* together — it would still allow two.
1050    fn emit_pairwise(&mut self, keys: &[AtomKey], origin: &Origin) {
1051        for i in 0..keys.len() {
1052            for j in (i + 1)..keys.len() {
1053                let lits = vec![
1054                    RawLit {
1055                        key: keys[i].clone(),
1056                        negated: false,
1057                    },
1058                    RawLit {
1059                        key: keys[j].clone(),
1060                        negated: false,
1061                    },
1062                ];
1063                self.push_clause(lits, origin.clone());
1064            }
1065        }
1066    }
1067
1068    /// Emit "at least one TRUE" as a single `Impossible([NOT a_1, …, NOT a_n])`
1069    /// — it is impossible for all of them to be false at once.
1070    fn emit_at_least_one(&mut self, keys: &[AtomKey], origin: &Origin) {
1071        let lits = keys
1072            .iter()
1073            .map(|k| RawLit {
1074                key: k.clone(),
1075                negated: true,
1076            })
1077            .collect();
1078        self.push_clause(lits, origin.clone());
1079    }
1080
1081    /// Append a clause unless an identical one (by canonical [`clause_sig`]) is
1082    /// already present — `P ∧ P ≡ P`, so dedup keeps the IR minimal.
1083    fn push_clause(&mut self, lits: Vec<RawLit>, origin: Origin) {
1084        let sig = clause_sig(&lits);
1085        if self.clause_sigs.insert(sig) {
1086            self.clauses.push(RawClause { lits, origin });
1087        }
1088        // else: identical clause already present — idempotent.
1089    }
1090
1091    /// Build an [`Origin`] for provenance from the current source/line/name.
1092    fn origin(&self, source: &str, line: u32, premise: Option<&str>, kind: &'static str) -> Origin {
1093        Origin {
1094            source: source.to_string(),
1095            line,
1096            premise: premise.map(|s| s.to_string()),
1097            kind,
1098        }
1099    }
1100
1101    /// Closed-world check: once an `ONEOF` enumerates a variable's values, any
1102    /// reference to that `(domain, subject, predicate)` with an object outside the
1103    /// declared set is rejected as a likely typo — instead of silently minting a
1104    /// new atom that then "hangs in the air" as an UNKNOWN. Reports the earliest
1105    /// (by source, line) offender, with a `did you mean` suggestion when a declared
1106    /// value is within edit distance. Must run after all sources are accumulated
1107    /// (a `ONEOF` may follow the reference) and before [`finalize`].
1108    fn validate_closed_world(&self) -> Result<(), CompileError> {
1109        if self.oneof_values.is_empty() {
1110            return Ok(());
1111        }
1112        // Every atom reference reachable from a fact, clause, or rule, with the
1113        // line it came from. ONEOF members appear here too (as clause literals) but
1114        // are in-set by construction, so they never trip the check. `out_of_set`
1115        // tests one key against its variable's declared values.
1116        let out_of_set = |key: &AtomKey| -> bool {
1117            // A bare proposition (predicate `None`) has no object, so it never trips
1118            // closed-world; only `(predicate, object)` atoms are checked.
1119            match (key.predicate.as_ref(), key.object.as_ref()) {
1120                (Some(pred), Some(obj)) => self
1121                    .oneof_values
1122                    .get(&(key.domain.clone(), key.subject.clone(), pred.clone()))
1123                    .is_some_and(|set| !set.contains(obj)),
1124                _ => false,
1125            }
1126        };
1127        let mut offenders: Vec<(&str, u32, &AtomKey)> = Vec::new();
1128        for f in &self.facts {
1129            if out_of_set(&f.key) {
1130                offenders.push((&f.origin.source, f.origin.line, &f.key));
1131            }
1132        }
1133        for c in &self.clauses {
1134            for l in &c.lits {
1135                if out_of_set(&l.key) {
1136                    offenders.push((&c.origin.source, c.origin.line, &l.key));
1137                }
1138            }
1139        }
1140        for r in &self.rules {
1141            for l in r.antecedent.iter().chain(r.consequent.iter()) {
1142                if out_of_set(&l.key) {
1143                    offenders.push((&r.origin.source, r.origin.line, &l.key));
1144                }
1145            }
1146        }
1147        // Earliest offender wins, for a stable, source-ordered diagnostic.
1148        let Some(&(source, line, key)) = offenders.iter().min_by(|a, b| {
1149            (a.0, a.1, &a.2.subject, &a.2.object).cmp(&(b.0, b.1, &b.2.subject, &b.2.object))
1150        }) else {
1151            return Ok(());
1152        };
1153        // The offender tripped `out_of_set`, so its predicate is `Some`.
1154        let pred = key.predicate.clone().unwrap_or_default();
1155        let set = &self.oneof_values[&(key.domain.clone(), key.subject.clone(), pred.clone())];
1156        let declared: Vec<&str> = set.iter().map(|s| s.as_str()).collect(); // BTreeSet → sorted
1157        let value = key.object.clone().unwrap_or_default();
1158        let suggestion = did_you_mean(&value, &declared);
1159        Err(CompileError::UnknownValue(Box::new(UnknownValue {
1160            file: source.to_string(),
1161            line,
1162            subject: key.subject.clone(),
1163            predicate: pred,
1164            value,
1165            declared: declared.join(", "),
1166            suggestion,
1167        })))
1168    }
1169
1170    /// Resolve every declared `VAR` port against the external `inputs`, validate
1171    /// that every used bare proposition is declared, and push a synthetic hard fact
1172    /// for each resolved port. Returns the per-port report records (the PLACEHOLDERS
1173    /// section). Run after the add-statement loop (all ports declared, all bare
1174    /// props interned) and before [`finalize`].
1175    ///
1176    /// Every ambiguity is a hard error, by design (the engine is deterministic):
1177    /// - a used bare proposition with no `VAR` → [`CompileError::UndeclaredPort`];
1178    /// - an external key naming no declared port → [`CompileError::UnknownPort`];
1179    /// - an external key whose bare name is declared in >1 domain → [`CompileError::AmbiguousPort`];
1180    /// - two bindings disagreeing on one key → [`CompileError::PortConflict`].
1181    ///
1182    /// A resolved port becomes a hard fact (observationally equal to `FACT name` /
1183    /// `NOT name`); a port with neither value nor `DEFAULT` stays UNKNOWN (no fact).
1184    fn resolve_ports(
1185        &mut self,
1186        inputs: &[(String, PortBinding)],
1187    ) -> Result<Vec<PlaceholderInfo>, CompileError> {
1188        // (1) Every *used* bare proposition (in a fact, clause, or rule) must be a
1189        // declared port. Reported with the earliest source/line, like closed-world.
1190        {
1191            let bad = |k: &AtomKey| {
1192                k.predicate.is_none()
1193                    && !self
1194                        .ports
1195                        .contains_key(&(k.domain.clone(), k.subject.clone()))
1196            };
1197            let mut offenders: Vec<(&str, u32, &str)> = Vec::new();
1198            for f in &self.facts {
1199                if bad(&f.key) {
1200                    offenders.push((&f.origin.source, f.origin.line, &f.key.subject));
1201                }
1202            }
1203            for c in &self.clauses {
1204                for l in &c.lits {
1205                    if bad(&l.key) {
1206                        offenders.push((&c.origin.source, c.origin.line, &l.key.subject));
1207                    }
1208                }
1209            }
1210            for r in &self.rules {
1211                for l in r.antecedent.iter().chain(r.consequent.iter()) {
1212                    if bad(&l.key) {
1213                        offenders.push((&r.origin.source, r.origin.line, &l.key.subject));
1214                    }
1215                }
1216            }
1217            if let Some(&(source, line, name)) = offenders
1218                .iter()
1219                .min_by(|a, b| (a.0, a.1, a.2).cmp(&(b.0, b.1, b.2)))
1220            {
1221                let names: Vec<&str> = self.ports.keys().map(|(_, n)| n.as_str()).collect();
1222                return Err(CompileError::UndeclaredPort {
1223                    file: source.to_string(),
1224                    line,
1225                    name: name.to_string(),
1226                    suggestion: did_you_mean(name, &names),
1227                });
1228            }
1229        }
1230
1231        // (2) Merge every binding by key (in-file `PROVIDE`s plus external values);
1232        // two disagreeing values for one key conflict.
1233        let mut merged: BTreeMap<String, PortBinding> = BTreeMap::new();
1234        for (name, b) in self.provides.iter().chain(inputs.iter()) {
1235            match merged.get(name) {
1236                Some(prev) if prev.value != b.value => {
1237                    return Err(CompileError::PortConflict {
1238                        name: name.clone(),
1239                        a_value: prev.value,
1240                        a_origin: prev.origin.clone(),
1241                        b_value: b.value,
1242                        b_origin: b.origin.clone(),
1243                    });
1244                }
1245                _ => {
1246                    merged.entry(name.clone()).or_insert_with(|| b.clone());
1247                }
1248            }
1249        }
1250
1251        // (3) Match each external key to exactly one declared port (by bare name).
1252        let mut supplied: BTreeMap<(String, String), PortBinding> = BTreeMap::new();
1253        for (name, b) in &merged {
1254            let domains: Vec<&str> = self
1255                .ports
1256                .keys()
1257                .filter(|(_, n)| n == name)
1258                .map(|(d, _)| d.as_str())
1259                .collect();
1260            match domains.as_slice() {
1261                [] => {
1262                    let names: Vec<&str> = self.ports.keys().map(|(_, n)| n.as_str()).collect();
1263                    return Err(CompileError::UnknownPort {
1264                        name: name.clone(),
1265                        suggestion: did_you_mean(name, &names),
1266                    });
1267                }
1268                [d] => {
1269                    supplied.insert(((*d).to_string(), name.clone()), b.clone());
1270                }
1271                many => {
1272                    return Err(CompileError::AmbiguousPort {
1273                        name: name.clone(),
1274                        domains: many.join(", "),
1275                    });
1276                }
1277            }
1278        }
1279
1280        // (4) Resolve each declared port: supplied > DEFAULT > unset (UNKNOWN).
1281        let mut placeholders: Vec<PlaceholderInfo> = Vec::new();
1282        let mut interns: Vec<AtomKey> = Vec::new();
1283        let mut synth: Vec<(AtomKey, bool, String, u32)> = Vec::new();
1284        for ((domain, name), decl) in &self.ports {
1285            let key = AtomKey {
1286                domain: domain.clone(),
1287                subject: name.clone(),
1288                predicate: None,
1289                object: None,
1290            };
1291            interns.push(key.clone());
1292            let (status, value, origin) = match supplied.get(&(domain.clone(), name.clone())) {
1293                Some(b) => (
1294                    PlaceholderStatus::Supplied,
1295                    Some(b.value),
1296                    Some(b.origin.clone()),
1297                ),
1298                None => match decl.default {
1299                    Some(v) => (PlaceholderStatus::DefaultUsed, Some(v), None),
1300                    None => (PlaceholderStatus::Unset, None, None),
1301                },
1302            };
1303            if let Some(v) = value {
1304                synth.push((key, v, decl.source.clone(), decl.line));
1305            }
1306            placeholders.push(PlaceholderInfo {
1307                key: name.clone(),
1308                status,
1309                value,
1310                origin,
1311            });
1312        }
1313
1314        // (5) Intern every declared port (so it appears even if unused), keep it out
1315        // of the ORPHAN lint, and assert a hard fact for each resolved one.
1316        for key in interns {
1317            self.intern(&key);
1318            self.relation_consumed.insert(key);
1319        }
1320        for (key, value, source, line) in synth {
1321            self.facts.push(RawFact {
1322                key,
1323                value: if value { Value::True } else { Value::False },
1324                origin: Origin {
1325                    source,
1326                    line,
1327                    premise: None,
1328                    kind: kw::VAR,
1329                },
1330                soft: false,
1331            });
1332        }
1333
1334        Ok(placeholders)
1335    }
1336
1337    /// Intern all atoms (canonical sort), then lower the raw IR to ids.
1338    pub fn finalize(self) -> Compiled {
1339        let atoms: Vec<AtomKey> = self.keys.into_iter().collect(); // BTreeSet → sorted
1340        let mut id_of: BTreeMap<AtomKey, AtomId> = BTreeMap::new();
1341        for (i, k) in atoms.iter().enumerate() {
1342            id_of.insert(k.clone(), i as AtomId);
1343        }
1344        let lower = |l: &RawLit| Lit {
1345            atom: id_of[&l.key],
1346            negated: l.negated,
1347        };
1348
1349        let facts = self
1350            .facts
1351            .into_iter()
1352            .map(|f| Fact {
1353                atom: id_of[&f.key],
1354                value: f.value,
1355                origin: f.origin,
1356                soft: f.soft,
1357            })
1358            .collect();
1359        let clauses = self
1360            .clauses
1361            .into_iter()
1362            .map(|c| Clause {
1363                lits: c.lits.iter().map(lower).collect(),
1364                origin: c.origin,
1365            })
1366            .collect();
1367        let rules = self
1368            .rules
1369            .into_iter()
1370            .map(|r| Rule {
1371                antecedent: r.antecedent.iter().map(lower).collect(),
1372                consequent: r.consequent.iter().map(lower).collect(),
1373                origin: r.origin,
1374            })
1375            .collect();
1376
1377        let consumed = self
1378            .relation_consumed
1379            .iter()
1380            .filter_map(|k| id_of.get(k).copied())
1381            .collect();
1382
1383        Compiled {
1384            atoms,
1385            facts,
1386            clauses,
1387            rules,
1388            checks: self.checks,
1389            pending_imports: self.pending_imports,
1390            unused_imports: Vec::new(), // filled by `compile` (advisory, post-resolution)
1391            consumed,
1392            placeholders: Vec::new(), // filled by `*_with` after `resolve_ports`
1393        }
1394    }
1395}
1396
1397/// Parse a data-only `.vrf` source and extract its `PROVIDE` bindings as
1398/// `(name, value)` pairs. A data file carries only values: any statement other
1399/// than `PROVIDE` (or a `DOMAIN`) is a [`CompileError::DataFileStatement`]. Used to
1400/// load a `--data <file>` of port values without compiling it as a program.
1401pub fn read_data_source(file: &str, src: &str) -> Result<Vec<(String, bool)>, CompileError> {
1402    let program = parse_tagged(file, src)?;
1403    let mut out = Vec::new();
1404    for stmt in &program.statements {
1405        match stmt {
1406            Statement::Provide { name, value } => out.push((name.data.to_string(), *value)),
1407            Statement::Domain(_) => {}
1408            other => {
1409                return Err(CompileError::DataFileStatement {
1410                    file: file.to_string(),
1411                    line: statement_line(other),
1412                });
1413            }
1414        }
1415    }
1416    Ok(out)
1417}
1418
1419/// Parse a data-only `.vrf` source into ready-to-merge port [`PortBinding`]s, each
1420/// tagged with origin `data:<file>`. The shared bridge every surface uses to turn a
1421/// `--data` / data-map source into engine inputs, so a data file behaves identically
1422/// whether it arrives from the CLI, wasm, or MCP.
1423pub fn read_data_bindings(
1424    file: &str,
1425    src: &str,
1426) -> Result<Vec<(String, PortBinding)>, CompileError> {
1427    Ok(read_data_source(file, src)?
1428        .into_iter()
1429        .map(|(name, value)| {
1430            (
1431                name,
1432                PortBinding {
1433                    value,
1434                    origin: alloc::format!("data:{file}"),
1435                },
1436            )
1437        })
1438        .collect())
1439}
1440
1441/// The 1-based source line a statement begins on (for diagnostics).
1442fn statement_line(s: &Statement) -> u32 {
1443    match s {
1444        Statement::Domain(n) => n.span.location_line(),
1445        Statement::Import { path, .. } => path.span.location_line(),
1446        Statement::Fact(a) | Statement::Negation(a) => a.span.location_line(),
1447        Statement::Assume(l) => l.span.location_line(),
1448        Statement::Set { name, .. } => name.span.location_line(),
1449        Statement::Close { relation, .. } => relation.span.location_line(),
1450        Statement::Var { name, .. } => name.span.location_line(),
1451        Statement::Provide { name, .. } => name.span.location_line(),
1452        Statement::Premise { name, .. } | Statement::Rule { name, .. } => name.span.location_line(),
1453        Statement::Check { subject, .. } => subject
1454            .as_ref()
1455            .map(|s| s.span.location_line())
1456            .unwrap_or(0),
1457    }
1458}
1459
1460/// Convenience: compile a single source into the IR. `IMPORT`s are recorded as
1461/// pending, not resolved (use [`compile`] with a [`Resolver`] to resolve them).
1462pub fn compile_source(source: &str, src: &str) -> Result<Compiled, CompileError> {
1463    compile_source_with(source, src, &[])
1464}
1465
1466/// Like [`compile_source`], but resolving declared `VAR` ports against external
1467/// `inputs` (`(name, binding)` pairs from CLI / API / data). The `Compiled` carries
1468/// a `placeholders` record per declared port.
1469pub fn compile_source_with(
1470    source: &str,
1471    src: &str,
1472    inputs: &[(String, PortBinding)],
1473) -> Result<Compiled, CompileError> {
1474    let mut c = Compiler::new();
1475    c.add_source(source, src)?;
1476    c.validate_closed_world()?;
1477    let placeholders = c.resolve_ports(inputs)?;
1478    let mut compiled = c.finalize();
1479    compiled.placeholders = placeholders;
1480    Ok(compiled)
1481}
1482
1483// --- import resolution (source-agnostic) -----------------------------------
1484
1485/// Resolves `IMPORT "path"` to source text. The engine is source-agnostic: it
1486/// consumes strings, so a file is merely one backing store. Mirrors
1487/// vsm-grammar's `SourceResolver`.
1488pub trait Resolver {
1489    /// Load the raw source text for a resolved path.
1490    fn load(&self, path: &str) -> Result<String, CompileError>;
1491
1492    /// Normalize `relative` against the importing source `base`.
1493    /// Default: paths are absolute names, returned unchanged.
1494    fn resolve(&self, _base: &str, relative: &str) -> String {
1495        relative.to_string()
1496    }
1497}
1498
1499/// An in-memory resolver: serves sources from a name → content map. Pure
1500/// `no_std`. Mirrors vsm-grammar's `MemoryResolver`.
1501#[derive(Default)]
1502pub struct MemoryResolver {
1503    sources: BTreeMap<String, String>,
1504}
1505
1506impl MemoryResolver {
1507    /// An empty resolver with no sources.
1508    pub fn new() -> Self {
1509        Self::default()
1510    }
1511
1512    /// Register `content` under `path`; returns `&mut self` for chaining.
1513    pub fn add(&mut self, path: &str, content: &str) -> &mut Self {
1514        self.sources.insert(path.to_string(), content.to_string());
1515        self
1516    }
1517}
1518
1519/// Normalize an `IMPORT` path **identically on every OS and every surface**: treat
1520/// both `/` and `\` as separators, resolve `relative` against `base`'s directory,
1521/// collapse `.` / `..`, and re-join with `/`. Pure and `no_std`, so it does not
1522/// depend on the host (or the compile target's) path semantics — a Windows-style
1523/// and a Unix-style import resolve to the same virtual path whether the resolver is
1524/// filesystem-, JS-callback-, or in-memory-backed. The single source of truth all
1525/// three [`Resolver`]s share.
1526pub fn normalize_import_path(base: &str, relative: &str) -> String {
1527    fn is_sep(c: char) -> bool {
1528        c == '/' || c == '\\'
1529    }
1530    fn push<'a>(parts: &mut Vec<&'a str>, seg: &'a str) {
1531        match seg {
1532            "" | "." => {}
1533            ".." => {
1534                parts.pop();
1535            }
1536            _ => parts.push(seg),
1537        }
1538    }
1539    let mut absolute = base.starts_with(is_sep);
1540    let mut parts: Vec<&str> = Vec::new();
1541    // `base`'s directory = every segment but the last (the importing file's name).
1542    let base_segs: Vec<&str> = base.split(is_sep).collect();
1543    for seg in base_segs.iter().take(base_segs.len().saturating_sub(1)) {
1544        push(&mut parts, seg);
1545    }
1546    // An absolute `relative` replaces the base directory entirely.
1547    if relative.starts_with(is_sep) {
1548        parts.clear();
1549        absolute = true;
1550    }
1551    for seg in relative.split(is_sep) {
1552        push(&mut parts, seg);
1553    }
1554    let joined = parts.join("/");
1555    if absolute {
1556        alloc::format!("/{joined}")
1557    } else {
1558        joined
1559    }
1560}
1561
1562impl Resolver for MemoryResolver {
1563    fn load(&self, path: &str) -> Result<String, CompileError> {
1564        self.sources
1565            .get(path)
1566            .cloned()
1567            .ok_or_else(|| CompileError::ImportNotFound(path.to_string()))
1568    }
1569
1570    fn resolve(&self, base: &str, relative: &str) -> String {
1571        normalize_import_path(base, relative)
1572    }
1573}
1574
1575/// A filesystem-backed resolver. Mirrors vsm-grammar's `FileResolver`:
1576/// relative imports resolve against the importing file's directory, with manual
1577/// `..` normalization (no canonicalization, to keep a virtual layout).
1578#[cfg(feature = "std")]
1579pub struct FileResolver;
1580
1581#[cfg(feature = "std")]
1582impl Resolver for FileResolver {
1583    fn load(&self, path: &str) -> Result<String, CompileError> {
1584        std::fs::read_to_string(path)
1585            .map_err(|e| CompileError::ImportNotFound(alloc::format!("{}: {}", path, e)))
1586    }
1587
1588    fn resolve(&self, base: &str, relative: &str) -> String {
1589        // Share the one OS-independent normalizer (forward slashes, `..` collapsed)
1590        // so a resolved path — and the provenance recorded in the IR — is identical
1591        // on Windows and Unix, and identical to the in-memory and JS resolvers.
1592        // Windows `std::fs` accepts `/` just fine.
1593        normalize_import_path(base, relative)
1594    }
1595}
1596
1597/// One resolved source ready to compile: its provenance path, raw text, and the
1598/// domain context (own domain + import-alias bindings) its atoms resolve against.
1599struct ResolvedFile {
1600    path: String,
1601    content: String,
1602    ctx: DomainCtx,
1603}
1604
1605/// Compile a root source and all its transitive `IMPORT`s into one IR.
1606///
1607/// Each file is keyed by `DOMAIN`; atoms unify only within a domain. Imports are
1608/// referenced by `<domain>.<atom>` and visibility is file-local (naming is not
1609/// transitive, though a dependency's clauses still participate). Sources are
1610/// content-addressed (sha256): a source reached by several paths is compiled once
1611/// (so a diamond — or an exponential fan-out — stays linear, never blowing up),
1612/// and an import cycle is an error.
1613///
1614/// Resolution is **iterative** (an explicit work stack, not native recursion), so
1615/// an arbitrarily deep import chain cannot overflow the call stack.
1616///
1617/// Premise/rule names are per-source labels, not global identifiers: different
1618/// files may reuse a name, and the report qualifies them by source. A name reused
1619/// with a different body is an error only *within the same source*.
1620pub fn compile<R: Resolver>(root: &str, resolver: &R) -> Result<Compiled, CompileError> {
1621    compile_with(root, resolver, &[])
1622}
1623
1624/// Like [`compile`], but resolving declared `VAR` ports against external `inputs`.
1625/// Ports declared in any file of the import graph are aggregated, then resolved
1626/// once after every file is added.
1627pub fn compile_with<R: Resolver>(
1628    root: &str,
1629    resolver: &R,
1630    inputs: &[(String, PortBinding)],
1631) -> Result<Compiled, CompileError> {
1632    let (files, unused_imports) = resolve_graph(root, resolver)?;
1633    let mut c = Compiler::new();
1634    for file in &files {
1635        c.add_resolved(file)?;
1636    }
1637    c.validate_closed_world()?;
1638    let placeholders = c.resolve_ports(inputs)?;
1639    let mut compiled = c.finalize();
1640    compiled.unused_imports = unused_imports;
1641    compiled.placeholders = placeholders;
1642    Ok(compiled)
1643}
1644
1645/// One `IMPORT` edge: the optional local alias, the resolved child path, and the
1646/// `IMPORT` line (for the unused-import advisory).
1647struct ImportEdge {
1648    alias: Option<String>,
1649    child_path: String,
1650    line: u32,
1651}
1652
1653/// A discovered source during graph resolution: its first-seen path, raw text,
1654/// declared domain, import edges, and the set of domain prefixes its atoms use
1655/// (`None` = its own domain; `Some(p)` = a `p.` prefix) — used to flag imports
1656/// that the file never references.
1657struct DiscoveredFile {
1658    path: String,
1659    content: String,
1660    domain: String,
1661    edges: Vec<ImportEdge>,
1662    used_prefixes: BTreeSet<Option<String>>,
1663}
1664
1665/// Resolve the whole import graph reachable from `root` into a flat list of
1666/// [`ResolvedFile`]s, each distinct source appearing once.
1667///
1668/// Iterative depth-first traversal with an explicit work stack (`Enter`/`Exit`
1669/// frames) — no native recursion, so depth is unbounded without risking a stack
1670/// overflow. Memoized by content hash (a diamond/repeat is visited once); a hash
1671/// re-encountered while still on the active path is a [`CompileError::CircularImport`].
1672fn resolve_graph<R: Resolver>(
1673    root: &str,
1674    resolver: &R,
1675) -> Result<(Vec<ResolvedFile>, Vec<UnusedImport>), CompileError> {
1676    /// One unit of pending work on the traversal stack.
1677    enum Step {
1678        /// Visit a file at this resolved path (load, parse, enqueue its imports).
1679        Enter(String),
1680        /// Mark this content hash finished (pop it off the active path).
1681        Exit(String),
1682    }
1683
1684    let mut discovered: BTreeMap<String, DiscoveredFile> = BTreeMap::new(); // by hash
1685    let mut path_hash: BTreeMap<String, String> = BTreeMap::new(); // resolved path → hash
1686    let mut order: Vec<String> = Vec::new(); // finish order, by hash
1687    let mut active: BTreeSet<String> = BTreeSet::new(); // hashes on the current DFS path
1688    let mut work: Vec<Step> = vec![Step::Enter(root.to_string())];
1689
1690    while let Some(step) = work.pop() {
1691        match step {
1692            Step::Exit(hash) => {
1693                active.remove(&hash);
1694                order.push(hash);
1695            }
1696            Step::Enter(path) => {
1697                let content = resolver.load(&path)?;
1698                let hash = hash_hex(content.as_bytes());
1699                path_hash.insert(path.clone(), hash.clone());
1700                if active.contains(&hash) {
1701                    return Err(CompileError::CircularImport(path)); // back-edge to an ancestor
1702                }
1703                if discovered.contains_key(&hash) {
1704                    continue; // already fully resolved by another path — dedup
1705                }
1706                let program = parse_tagged(&path, &content)?;
1707                let domain = extract_domain(&program, &path)?;
1708                let mut edges = Vec::new();
1709                let mut used_prefixes = BTreeSet::new();
1710                for stmt in &program.statements {
1711                    if let Statement::Import { path: p, alias } = stmt {
1712                        edges.push(ImportEdge {
1713                            alias: alias.as_ref().map(|a| a.data.to_string()),
1714                            child_path: resolver.resolve(&path, p.data),
1715                            line: p.span.location_line(),
1716                        });
1717                    } else {
1718                        collect_prefixes(stmt, &mut used_prefixes);
1719                    }
1720                }
1721                drop(program); // release the borrow on `content` before moving it
1722                active.insert(hash.clone());
1723                work.push(Step::Exit(hash.clone()));
1724                for e in edges.iter().rev() {
1725                    work.push(Step::Enter(e.child_path.clone()));
1726                }
1727                discovered.insert(
1728                    hash,
1729                    DiscoveredFile {
1730                        path,
1731                        content,
1732                        domain,
1733                        edges,
1734                        used_prefixes,
1735                    },
1736                );
1737            }
1738        }
1739    }
1740
1741    // Build each file's domain context now that every domain is known.
1742    // Look up every file's domain (small strings) so we can then *move* each
1743    // file's (potentially large) content out of `discovered` instead of cloning.
1744    let domain_of: BTreeMap<&str, &str> = discovered
1745        .iter()
1746        .map(|(h, f)| (h.as_str(), f.domain.as_str()))
1747        .collect();
1748
1749    let mut out = Vec::with_capacity(order.len());
1750    let mut unused: Vec<UnusedImport> = Vec::new();
1751    for hash in &order {
1752        let file = &discovered[hash];
1753        let mut aliases = BTreeMap::new();
1754        aliases.insert(file.domain.clone(), file.domain.clone());
1755        for edge in &file.edges {
1756            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
1757            let bind = edge
1758                .alias
1759                .clone()
1760                .unwrap_or_else(|| child_domain.to_string());
1761            match aliases.get(&bind) {
1762                Some(existing) if existing != child_domain => {
1763                    return Err(CompileError::DomainAliasClash { alias: bind });
1764                }
1765                _ => {
1766                    aliases.insert(bind, child_domain.to_string());
1767                }
1768            }
1769        }
1770
1771        // The domains this file actually references (each used prefix resolved
1772        // against its own domain / imports). An imported domain absent from this
1773        // set is an unused import.
1774        let referenced: BTreeSet<&str> = file
1775            .used_prefixes
1776            .iter()
1777            .filter_map(|p| match p {
1778                None => Some(file.domain.as_str()),
1779                Some(name) => aliases.get(name).map(|d| d.as_str()),
1780            })
1781            .collect();
1782        for edge in &file.edges {
1783            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
1784            if !referenced.contains(child_domain) {
1785                unused.push(UnusedImport {
1786                    file: file.path.clone(),
1787                    domain: child_domain.to_string(),
1788                    alias: edge.alias.clone(),
1789                    line: edge.line,
1790                });
1791            }
1792        }
1793
1794        let ctx = DomainCtx {
1795            current: file.domain.clone(),
1796            aliases,
1797        };
1798        out.push((hash.clone(), ctx));
1799    }
1800    unused.sort();
1801
1802    // Now move content/path out of `discovered` (no large clones) and pair with
1803    // the contexts built above.
1804    let files = out
1805        .into_iter()
1806        .map(|(hash, ctx)| {
1807            let file = discovered.remove(&hash).expect("hash was discovered");
1808            ResolvedFile {
1809                path: file.path,
1810                content: file.content,
1811                ctx,
1812            }
1813        })
1814        .collect();
1815    Ok((files, unused))
1816}
1817
1818/// A canonical signature of a `FOR EACH` quantifier, appended to the body hash so
1819/// two same-named premises that differ only in their quantifier still count as a
1820/// redefinition.
1821fn quant_sig(q: &Quant) -> String {
1822    match q {
1823        Quant::InSet { binder, set } => alloc::format!("|FOREACH {} IN {}", binder.data, set.data),
1824        Quant::Relation {
1825            left,
1826            predicate,
1827            right,
1828        } => alloc::format!("|FOREACH {} {} {}", left.data, predicate.data, right.data),
1829    }
1830}
1831
1832/// Parse one source, tagging any syntax [`Diagnostics`] with its file label so a
1833/// `CompileError::Parse` names the right file. The single spelling of "parse, and
1834/// on failure attach the file" — shared by the inline, resolved, and import paths.
1835fn parse_tagged<'a>(
1836    file: &str,
1837    content: &'a str,
1838) -> Result<elenchus_parser::Program<'a>, CompileError> {
1839    elenchus_parser::parse(content).map_err(|mut diag| {
1840        diag.set_file(file);
1841        CompileError::Parse(diag)
1842    })
1843}
1844
1845/// `" — did you mean \`x\`?"` for an undeclared set name, or empty when no
1846/// declared set name is close enough.
1847fn nearest_set_suggestion(set: &str, sets: &BTreeMap<String, Vec<String>>) -> String {
1848    let names: Vec<&str> = sets.keys().map(String::as_str).collect();
1849    did_you_mean(set, &names)
1850}
1851
1852/// A list of binder substitutions `(name, value)` applied during grounding: one
1853/// entry for an `IN <set>` quantifier, two for a `<a> <rel> <b>` relation.
1854type Subs<'s> = [(&'s str, &'s str)];
1855
1856/// Replace any binder with its value in one identifier; non-matching pass through.
1857fn subst_ident<'s>(s: &'s str, subs: &Subs<'s>) -> &'s str {
1858    subs.iter()
1859        .find_map(|&(b, v)| (s == b).then_some(v))
1860        .unwrap_or(s)
1861}
1862
1863/// Replace the binders in an atom (subject, predicate, and object positions).
1864fn subst_atom<'s>(a: &Atom<'s>, subs: &Subs<'s>) -> Atom<'s> {
1865    Atom {
1866        domain: a.domain,
1867        subject: subst_ident(a.subject, subs),
1868        predicate: a.predicate.map(|p| subst_ident(p, subs)),
1869        object: a.object.map(|o| subst_ident(o, subs)),
1870    }
1871}
1872
1873/// Replace the binders in one located literal (preserving its span and `NOT`).
1874fn subst_lit<'s>(ll: &Located<'s, Literal<'s>>, subs: &Subs<'s>) -> Located<'s, Literal<'s>> {
1875    Located {
1876        data: Literal {
1877            negated: ll.data.negated,
1878            atom: subst_atom(&ll.data.atom, subs),
1879        },
1880        span: ll.span,
1881    }
1882}
1883
1884/// Build a grounded copy of a body with the `FOR EACH` binders substituted.
1885/// Spans are preserved so any error still points at the original source line. The
1886/// result borrows from the original body and from the substitution values, so it
1887/// is consumed immediately (its keys interned) and never stored.
1888fn subst_body<'s>(body: &Body<'s>, subs: &Subs<'s>) -> Body<'s> {
1889    match body {
1890        Body::List { op, atoms } => Body::List {
1891            op: *op,
1892            atoms: atoms
1893                .iter()
1894                .map(|la| Located {
1895                    data: subst_atom(&la.data, subs),
1896                    span: la.span,
1897                })
1898                .collect(),
1899        },
1900        Body::Impl {
1901            antecedent,
1902            ante_conn,
1903            consequent,
1904            cons_conn,
1905        } => Body::Impl {
1906            antecedent: antecedent.iter().map(|l| subst_lit(l, subs)).collect(),
1907            ante_conn: *ante_conn,
1908            consequent: consequent.iter().map(|l| subst_lit(l, subs)).collect(),
1909            cons_conn: *cons_conn,
1910        },
1911    }
1912}
1913
1914/// Collect the domain prefixes used by a statement's atoms into `out` (`None` for
1915/// a bare atom, `Some(p)` for a `p.`-qualified one) — feeds the unused-import lint.
1916fn collect_prefixes(stmt: &Statement, out: &mut BTreeSet<Option<String>>) {
1917    let mut add = |a: &Atom| {
1918        out.insert(a.domain.map(|d| d.to_string()));
1919    };
1920    match stmt {
1921        Statement::Fact(a) | Statement::Negation(a) => add(&a.data),
1922        Statement::Assume(l) => add(&l.data.atom),
1923        Statement::Premise { body, .. } | Statement::Rule { body, .. } => match body {
1924            Body::List { atoms, .. } => atoms.iter().for_each(|a| add(&a.data)),
1925            Body::Impl {
1926                antecedent,
1927                consequent,
1928                ..
1929            } => antecedent
1930                .iter()
1931                .chain(consequent)
1932                .for_each(|l| add(&l.data.atom)),
1933        },
1934        Statement::Domain(_)
1935        | Statement::Import { .. }
1936        | Statement::Check { .. }
1937        | Statement::Set { .. }
1938        | Statement::Close { .. }
1939        | Statement::Var { .. }
1940        | Statement::Provide { .. } => {}
1941    }
1942}
1943
1944/// The transitive closure of a relation's `(from, to)` pairs: add `(a, c)`
1945/// whenever `(a, b)` and `(b, c)` are present, to a fixpoint. A self-pair
1946/// `(x, x)` in the result marks a cycle. A small compile-time graph op.
1947fn transitive_closure(pairs: Vec<(String, String)>) -> Vec<(String, String)> {
1948    let mut set: BTreeSet<(String, String)> = pairs.into_iter().collect();
1949    loop {
1950        let mut added: Vec<(String, String)> = Vec::new();
1951        for (a, b) in &set {
1952            for (c, d) in &set {
1953                if b == c {
1954                    let p = (a.clone(), d.clone());
1955                    if !set.contains(&p) {
1956                        added.push(p);
1957                    }
1958                }
1959            }
1960        }
1961        if added.is_empty() {
1962            break;
1963        }
1964        set.extend(added);
1965    }
1966    set.into_iter().collect()
1967}
1968
1969/// The single `DOMAIN` a source declares, or an error if it has none or several.
1970fn extract_domain(
1971    program: &elenchus_parser::Program,
1972    source: &str,
1973) -> Result<String, CompileError> {
1974    let mut found: Option<String> = None;
1975    for stmt in &program.statements {
1976        if let Statement::Domain(name) = stmt {
1977            if found.is_some() {
1978                return Err(CompileError::DuplicateDomain {
1979                    file: source.to_string(),
1980                });
1981            }
1982            found = Some(name.data.to_string());
1983        }
1984    }
1985    found.ok_or_else(|| CompileError::MissingDomain {
1986        file: source.to_string(),
1987    })
1988}
1989
1990// --- helpers ---------------------------------------------------------------
1991
1992/// Levenshtein edit distance over Unicode scalars (rolling two-row DP). Small
1993/// inputs (atom/value names), so the simple DP is plenty. The one edit-distance
1994/// implementation in the workspace: the compiler's "did you mean" suggestions
1995/// (via [`nearest`]) and the solver's typo-hint lint both build on it.
1996pub fn levenshtein(a: &[char], b: &[char]) -> usize {
1997    let mut prev: Vec<usize> = (0..=b.len()).collect();
1998    let mut cur = vec![0usize; b.len() + 1];
1999    for (i, &ca) in a.iter().enumerate() {
2000        cur[0] = i + 1;
2001        for (j, &cb) in b.iter().enumerate() {
2002            let cost = usize::from(ca != cb);
2003            cur[j + 1] = (prev[j + 1] + 1).min(cur[j] + 1).min(prev[j] + cost);
2004        }
2005        core::mem::swap(&mut prev, &mut cur);
2006    }
2007    prev[b.len()]
2008}
2009
2010/// The closest candidate to `word` within an edit-distance threshold, or `None`
2011/// when nothing is close enough to be a useful "did you mean".
2012///
2013/// The threshold scales with length (`len / 3`, in Unicode scalars) and is **not**
2014/// floored at 1: a value of 1–2 characters yields a budget of 0, so no suggestion
2015/// is offered. This is deliberate — for very short tokens (a single CJK character,
2016/// where one symbol is a whole word, or a two-letter code) every other short value
2017/// sits at distance 1, so a "did you mean" there is pure noise, not a typo cue. The
2018/// rejection itself is exact (set membership), so suppressing the guess never hides
2019/// a real error; it only withholds a meaningless one. Longer names tolerate a slip
2020/// or two, mirroring the spirit of the solver's typo-hint lint.
2021fn nearest<'a>(word: &str, candidates: &[&'a str]) -> Option<&'a str> {
2022    let budget = word.chars().count() / 3;
2023    if budget == 0 {
2024        return None;
2025    }
2026    let w: Vec<char> = word.chars().collect();
2027    candidates
2028        .iter()
2029        .map(|&c| (levenshtein(&w, &c.chars().collect::<Vec<char>>()), c))
2030        .filter(|&(d, _)| d <= budget)
2031        .min_by_key(|&(d, _)| d)
2032        .map(|(_, c)| c)
2033}
2034
2035/// `" — did you mean `x`?"` for the nearest candidate to `word`, or empty when
2036/// none is close enough. The single spelling of the suggestion suffix, shared by
2037/// every "unknown name" diagnostic (values, sets, …).
2038fn did_you_mean(word: &str, candidates: &[&str]) -> String {
2039    match nearest(word, candidates) {
2040        Some(s) => alloc::format!(" — did you mean `{s}`?"),
2041        None => String::new(),
2042    }
2043}
2044
2045/// Lower parsed, located literals to key-based [`RawLit`]s (drops spans),
2046/// resolving each atom's domain through `ctx`.
2047fn raw_lits(
2048    lits: &[elenchus_parser::Located<Literal>],
2049    ctx: &DomainCtx,
2050) -> Result<Vec<RawLit>, CompileError> {
2051    lits.iter()
2052        .map(|l| {
2053            Ok(RawLit {
2054                key: ctx.key(&l.data.atom)?,
2055                negated: l.data.negated,
2056            })
2057        })
2058        .collect()
2059}
2060
2061/// The surface keyword for a list op, used as [`Origin::kind`] in the report.
2062fn list_kind(op: ListOp) -> &'static str {
2063    match op {
2064        ListOp::Exclusive => kw::EXCLUSIVE,
2065        ListOp::Forbids => kw::FORBIDS,
2066        ListOp::OneOf => kw::ONEOF,
2067        ListOp::AtLeast => kw::ATLEAST,
2068    }
2069}
2070
2071/// Stable `domain|subject|predicate|object` string for an atom key (the unit from
2072/// which clause/fact/body signatures are built). Includes the domain so atoms in
2073/// different domains never share a signature.
2074fn key_sig(k: &AtomKey) -> String {
2075    alloc::format!(
2076        "{}|{}|{}|{}",
2077        k.domain,
2078        k.subject,
2079        k.predicate.as_deref().unwrap_or(""),
2080        k.object.as_deref().unwrap_or("")
2081    )
2082}
2083
2084/// Canonical, order-independent signature of a clause's literals (for dedup).
2085fn clause_sig(lits: &[RawLit]) -> String {
2086    let mut parts: Vec<String> = lits
2087        .iter()
2088        .map(|l| alloc::format!("{}|{}", key_sig(&l.key), l.negated as u8))
2089        .collect();
2090    parts.sort();
2091    parts.dedup();
2092    parts.join(";")
2093}
2094
2095/// Canonical body string for a named construct, hashed for redefinition checks.
2096/// Resolves atom domains through `ctx` so the signature keys on resolved identity.
2097fn canonical_body(
2098    name: &str,
2099    body: &Body,
2100    is_rule: bool,
2101    ctx: &DomainCtx,
2102) -> Result<String, CompileError> {
2103    let mut s = String::new();
2104    let _ = write!(
2105        s,
2106        "{}|{}|",
2107        if is_rule { kw::RULE } else { kw::PREMISE },
2108        name
2109    );
2110    match body {
2111        Body::List { op, atoms } => {
2112            let _ = write!(s, "LIST|{}|", list_kind(*op));
2113            let mut keys: Vec<String> = atoms
2114                .iter()
2115                .map(|a| Ok(key_sig(&ctx.key(&a.data)?)))
2116                .collect::<Result<_, CompileError>>()?;
2117            keys.sort();
2118            s.push_str(&keys.join(";"));
2119        }
2120        Body::Impl {
2121            antecedent,
2122            ante_conn,
2123            consequent,
2124            cons_conn,
2125        } => {
2126            let conn = |c: &Conn| if *c == Conn::Or { "OR" } else { "AND" };
2127            s.push_str("IMPL|ANTE|");
2128            s.push_str(conn(ante_conn));
2129            s.push('|');
2130            s.push_str(&lit_sigs(antecedent, ctx)?);
2131            s.push_str("|CONS|");
2132            s.push_str(conn(cons_conn));
2133            s.push('|');
2134            s.push_str(&lit_sigs(consequent, ctx)?);
2135        }
2136    }
2137    Ok(s)
2138}
2139
2140/// Sorted `key|negated` signature of a literal list (order-independent), used
2141/// inside [`canonical_body`] so reordering a body does not look like a redefinition.
2142fn lit_sigs(
2143    lits: &[elenchus_parser::Located<Literal>],
2144    ctx: &DomainCtx,
2145) -> Result<String, CompileError> {
2146    let mut parts: Vec<String> = lits
2147        .iter()
2148        .map(|l| {
2149            Ok(alloc::format!(
2150                "{}|{}",
2151                key_sig(&ctx.key(&l.data.atom)?),
2152                l.data.negated as u8
2153            ))
2154        })
2155        .collect::<Result<_, CompileError>>()?;
2156    parts.sort();
2157    Ok(parts.join(";"))
2158}
2159
2160#[cfg(test)]
2161mod tests {
2162    use super::*;
2163
2164    /// Compile a single inline source under a default `DOMAIN t`, so test
2165    /// programs need not repeat the declaration. Atoms land in domain `t`.
2166    fn cs(src: &str) -> Result<Compiled, CompileError> {
2167        compile_source("<t>", &alloc::format!("DOMAIN t\n{src}"))
2168    }
2169
2170    /// An atom key in the default test domain `t`.
2171    fn key(subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
2172        key_in("t", subject, predicate, object)
2173    }
2174
2175    /// An atom key in an explicit domain.
2176    fn key_in(domain: &str, subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
2177        AtomKey {
2178            domain: domain.to_string(),
2179            subject: subject.to_string(),
2180            predicate: Some(predicate.to_string()),
2181            object: object.map(|o| o.to_string()),
2182        }
2183    }
2184
2185    fn id(c: &Compiled, k: &AtomKey) -> AtomId {
2186        c.atoms.iter().position(|a| a == k).unwrap() as AtomId
2187    }
2188
2189    #[test]
2190    fn exclusive_unfolds_pairwise() {
2191        let src = r#"
2192        PREMISE e:
2193            EXCLUSIVE
2194                x a
2195                x b
2196                x c
2197        "#;
2198        let c = cs(src).unwrap();
2199        // C(3,2) = 3 clauses, each of 2 positive literals.
2200        assert_eq!(c.clauses.len(), 3);
2201        for cl in &c.clauses {
2202            assert_eq!(cl.lits.len(), 2);
2203            assert!(cl.lits.iter().all(|l| !l.negated));
2204        }
2205    }
2206
2207    #[test]
2208    fn implication_negates_consequent() {
2209        // WHEN x a THEN x b  ==  Impossible([x a, NOT x b])
2210        let src = r#"
2211        PREMISE r:
2212            WHEN x a
2213            THEN x b
2214        "#;
2215        let c = cs(src).unwrap();
2216        assert_eq!(c.clauses.len(), 1);
2217        let cl = &c.clauses[0];
2218        assert_eq!(cl.lits.len(), 2);
2219        let a = id(&c, &key("x", "a", None));
2220        let b = id(&c, &key("x", "b", None));
2221        assert!(cl.lits.contains(&Lit {
2222            atom: a,
2223            negated: false
2224        }));
2225        assert!(cl.lits.contains(&Lit {
2226            atom: b,
2227            negated: true
2228        }));
2229    }
2230
2231    #[test]
2232    fn negated_consequent_flips_to_positive() {
2233        // THEN NOT x b  →  NOT(NOT x b) = x b positive inside Impossible
2234        let src = r#"
2235        PREMISE r:
2236            WHEN x a
2237            THEN NOT x b
2238        "#;
2239        let c = cs(src).unwrap();
2240        let b = id(&c, &key("x", "b", None));
2241        assert!(c.clauses[0].lits.contains(&Lit {
2242            atom: b,
2243            negated: false
2244        }));
2245    }
2246
2247    #[test]
2248    fn consequent_or_is_one_clause_with_all_negated() {
2249        // WHEN x p THEN x a OR x b  ==  Impossible([x p, NOT x a, NOT x b])
2250        let src = r#"
2251        PREMISE r:
2252            WHEN x p
2253            THEN x a
2254            OR x b
2255        "#;
2256        let c = cs(src).unwrap();
2257        assert_eq!(c.clauses.len(), 1);
2258        let cl = &c.clauses[0];
2259        assert_eq!(cl.lits.len(), 3);
2260        let p = id(&c, &key("x", "p", None));
2261        let a = id(&c, &key("x", "a", None));
2262        let b = id(&c, &key("x", "b", None));
2263        assert!(cl.lits.contains(&Lit {
2264            atom: p,
2265            negated: false
2266        }));
2267        assert!(cl.lits.contains(&Lit {
2268            atom: a,
2269            negated: true
2270        }));
2271        assert!(cl.lits.contains(&Lit {
2272            atom: b,
2273            negated: true
2274        }));
2275    }
2276
2277    #[test]
2278    fn antecedent_or_is_one_clause_per_disjunct() {
2279        // WHEN x a OR x b THEN x c
2280        //   == Impossible([x a, NOT x c]) ∧ Impossible([x b, NOT x c])
2281        let src = r#"
2282        PREMISE r:
2283            WHEN x a
2284            OR x b
2285            THEN x c
2286        "#;
2287        let c = cs(src).unwrap();
2288        assert_eq!(c.clauses.len(), 2);
2289        let a = id(&c, &key("x", "a", None));
2290        let b = id(&c, &key("x", "b", None));
2291        let cc = id(&c, &key("x", "c", None));
2292        // every clause has exactly two lits and carries NOT c
2293        for cl in &c.clauses {
2294            assert_eq!(cl.lits.len(), 2);
2295            assert!(cl.lits.contains(&Lit {
2296                atom: cc,
2297                negated: true
2298            }));
2299        }
2300        let has = |atom| {
2301            c.clauses.iter().any(|cl| {
2302                cl.lits.contains(&Lit {
2303                    atom,
2304                    negated: false,
2305                })
2306            })
2307        };
2308        assert!(has(a) && has(b));
2309    }
2310
2311    #[test]
2312    fn antecedent_or_with_consequent_or_distributes() {
2313        // (a ∨ b) → (c ∨ d): Impossible([a,¬c,¬d]) ∧ Impossible([b,¬c,¬d])
2314        let src = r#"
2315        PREMISE r:
2316            WHEN x a
2317            OR x b
2318            THEN x c
2319            OR x d
2320        "#;
2321        let c = cs(src).unwrap();
2322        assert_eq!(c.clauses.len(), 2);
2323        for cl in &c.clauses {
2324            assert_eq!(cl.lits.len(), 3);
2325        }
2326    }
2327
2328    #[test]
2329    fn rule_with_or_antecedent_splits_into_two_rules() {
2330        // (a ∨ b) → c derives c whenever either fires: two single-antecedent rules.
2331        let src = r#"
2332        RULE r:
2333            WHEN x a
2334            OR x b
2335            THEN x c
2336        "#;
2337        let c = cs(src).unwrap();
2338        assert_eq!(c.rules.len(), 2);
2339        assert!(
2340            c.rules
2341                .iter()
2342                .all(|r| r.antecedent.len() == 1 && r.consequent.len() == 1)
2343        );
2344    }
2345
2346    #[test]
2347    fn rule_with_or_consequent_is_rejected() {
2348        // A rule cannot derive a disjunction — must be a PREMISE.
2349        let src = r#"
2350        RULE r:
2351            WHEN x a
2352            THEN x b
2353            OR x c
2354        "#;
2355        let err = cs(src).unwrap_err();
2356        assert!(matches!(
2357            err,
2358            CompileError::RuleDisjunctiveConsequent { .. }
2359        ));
2360    }
2361
2362    #[test]
2363    fn oneof_is_pairwise_plus_at_least_one() {
2364        let src = r#"
2365        PREMISE o:
2366            ONEOF
2367                x a
2368                x b
2369        "#;
2370        let c = cs(src).unwrap();
2371        // pairwise C(2,2)=1 + 1 at-least-one = 2 clauses
2372        assert_eq!(c.clauses.len(), 2);
2373        // the at-least-one clause is the all-negated one
2374        assert!(c.clauses.iter().any(|cl| cl.lits.iter().all(|l| l.negated)));
2375    }
2376
2377    #[test]
2378    fn atleast_is_one_negated_clause() {
2379        let src = r#"
2380        PREMISE a:
2381            ATLEAST
2382                x a
2383                x b
2384                x c
2385        "#;
2386        let c = cs(src).unwrap();
2387        assert_eq!(c.clauses.len(), 1);
2388        assert_eq!(c.clauses[0].lits.len(), 3);
2389        assert!(c.clauses[0].lits.iter().all(|l| l.negated));
2390    }
2391
2392    #[test]
2393    fn rules_are_separate_from_clauses() {
2394        let src = r#"
2395        RULE needs:
2396            WHEN x a
2397            THEN x b
2398        "#;
2399        let c = cs(src).unwrap();
2400        assert_eq!(c.clauses.len(), 0);
2401        assert_eq!(c.rules.len(), 1);
2402        assert_eq!(c.rules[0].antecedent.len(), 1);
2403        assert_eq!(c.rules[0].consequent.len(), 1);
2404    }
2405
2406    #[test]
2407    fn atoms_are_canonically_sorted() {
2408        let src = r#"
2409        FACT z z
2410        FACT a a
2411        FACT m m
2412        "#;
2413        let c = cs(src).unwrap();
2414        let mut sorted = c.atoms.clone();
2415        sorted.sort();
2416        assert_eq!(c.atoms, sorted);
2417    }
2418
2419    #[test]
2420    fn duplicate_premise_is_idempotent() {
2421        let src = r#"
2422        PREMISE e:
2423            EXCLUSIVE
2424                x a
2425                x b
2426        PREMISE e:
2427            EXCLUSIVE
2428                x a
2429                x b
2430        "#;
2431        let c = cs(src).unwrap();
2432        assert_eq!(c.clauses.len(), 1);
2433    }
2434
2435    #[test]
2436    fn redefinition_with_different_body_errors() {
2437        let src = r#"
2438        PREMISE e:
2439            EXCLUSIVE
2440                x a
2441                x b
2442        PREMISE e:
2443            EXCLUSIVE
2444                x a
2445                x c
2446        "#;
2447        let err = cs(src).unwrap_err();
2448        assert_eq!(
2449            err,
2450            CompileError::PremiseRedefinition {
2451                name: "e".to_string()
2452            }
2453        );
2454    }
2455
2456    #[test]
2457    fn duplicate_fact_is_idempotent() {
2458        let c = cs("FACT x a\nFACT x a\n").unwrap();
2459        assert_eq!(c.facts.len(), 1);
2460    }
2461
2462    #[test]
2463    fn conflicting_facts_are_both_kept() {
2464        // FACT X + NOT X is a CONFLICT for the solver, not a compile error.
2465        let c = cs("FACT x a\nNOT x a\n").unwrap();
2466        assert_eq!(c.facts.len(), 2);
2467    }
2468
2469    #[test]
2470    fn import_is_recorded_pending() {
2471        let c = cs("IMPORT \"physics.vrf\"\nFACT x a\n").unwrap();
2472        assert_eq!(c.pending_imports, vec!["physics.vrf".to_string()]);
2473    }
2474
2475    #[test]
2476    fn qualified_fact_lands_in_the_imported_domain() {
2477        // The library's premise is about `physics.Engine_X has fuel`; the main file
2478        // asserts a fact qualified INTO that domain, so the two share one atom id.
2479        let mut r = MemoryResolver::new();
2480        r.add(
2481            "lib.vrf",
2482            r#"
2483        DOMAIN physics
2484        PREMISE needs_fuel:
2485            WHEN Engine_X has engine
2486            THEN Engine_X has fuel
2487        "#,
2488        );
2489        r.add(
2490            "main.vrf",
2491            r#"
2492        DOMAIN main
2493        IMPORT "lib.vrf"
2494        FACT physics.Engine_X has engine
2495        FACT physics.Engine_X has fuel
2496        "#,
2497        );
2498        let c = compile("main.vrf", &r).unwrap();
2499        assert!(c.pending_imports.is_empty());
2500        assert_eq!(c.clauses.len(), 1); // the imported premise
2501        assert_eq!(c.facts.len(), 2);
2502
2503        // `physics.Engine_X has fuel` from the FACT and the imported premise share an id.
2504        let fuel = key_in("physics", "Engine_X", "has", Some("fuel"));
2505        let fuel_id = id(&c, &fuel);
2506        assert!(c.facts.iter().any(|f| f.atom == fuel_id));
2507        assert!(c.clauses[0].lits.iter().any(|l| l.atom == fuel_id));
2508    }
2509
2510    #[test]
2511    fn same_triple_in_different_domains_does_not_unify() {
2512        // Without a domain prefix the fact lands in `main`, NOT `physics`, so it is
2513        // a distinct atom from the library's `physics.Engine_X has fuel`.
2514        let mut r = MemoryResolver::new();
2515        r.add("lib.vrf", "DOMAIN physics\nFACT Engine_X has fuel\n");
2516        r.add(
2517            "main.vrf",
2518            "DOMAIN main\nIMPORT \"lib.vrf\"\nFACT Engine_X has fuel\n",
2519        );
2520        let c = compile("main.vrf", &r).unwrap();
2521        // Two distinct atoms: physics.Engine_X has fuel and main.Engine_X has fuel.
2522        assert!(c.atoms.iter().any(|a| a.domain == "physics"));
2523        assert!(c.atoms.iter().any(|a| a.domain == "main"));
2524        assert_eq!(
2525            c.atoms
2526                .iter()
2527                .filter(|a| a.subject == "Engine_X" && a.predicate.as_deref() == Some("has"))
2528                .count(),
2529            2
2530        );
2531    }
2532
2533    #[test]
2534    fn import_alias_binds_a_local_domain_name() {
2535        // `AS phys` lets the consumer reference the imported domain by a local name.
2536        let mut r = MemoryResolver::new();
2537        r.add("lib.vrf", "DOMAIN physics\nFACT Motor over_200\n");
2538        r.add(
2539            "main.vrf",
2540            "DOMAIN main\nIMPORT \"lib.vrf\" AS phys\nFACT phys.Motor over_100\n",
2541        );
2542        let c = compile("main.vrf", &r).unwrap();
2543        // Both facts live in the physics domain (one via its own name, one via alias).
2544        assert_eq!(c.atoms.iter().filter(|a| a.domain == "physics").count(), 2);
2545    }
2546
2547    #[test]
2548    fn unknown_domain_reference_errors() {
2549        // Referencing a domain that is neither this file's nor imported here fails.
2550        let err = cs("FACT ghost.x a\n").unwrap_err();
2551        assert!(matches!(err, CompileError::UnknownDomain { .. }));
2552    }
2553
2554    #[test]
2555    fn imports_are_not_transitive_for_naming() {
2556        // main imports physics; physics imports math. main may NOT name math.
2557        let mut r = MemoryResolver::new();
2558        r.add("math.vrf", "DOMAIN math\nFACT foo bar\n");
2559        r.add(
2560            "physics.vrf",
2561            "DOMAIN physics\nIMPORT \"math.vrf\"\nFACT Motor over_100\n",
2562        );
2563        r.add(
2564            "main.vrf",
2565            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT math.foo bar\n",
2566        );
2567        let err = compile("main.vrf", &r).unwrap_err();
2568        assert!(matches!(err, CompileError::UnknownDomain { .. }));
2569    }
2570
2571    #[test]
2572    fn transitive_dependency_clauses_still_load() {
2573        // Even though main can't *name* math, math's clauses still participate.
2574        let mut r = MemoryResolver::new();
2575        r.add(
2576            "math.vrf",
2577            r"
2578        DOMAIN math
2579        PREMISE e:
2580            EXCLUSIVE
2581                x a
2582                x b
2583        ",
2584        );
2585        r.add("physics.vrf", "DOMAIN physics\nIMPORT \"math.vrf\"\n");
2586        r.add("main.vrf", "DOMAIN main\nIMPORT \"physics.vrf\"\n");
2587        let c = compile("main.vrf", &r).unwrap();
2588        assert_eq!(c.clauses.len(), 1); // math's clause loaded transitively
2589        assert!(c.clauses.iter().all(|cl| cl.origin.source == "math.vrf"));
2590    }
2591
2592    #[test]
2593    fn missing_domain_errors() {
2594        let err = compile_source("nodomain.vrf", "FACT x a\n").unwrap_err();
2595        assert!(matches!(err, CompileError::MissingDomain { .. }));
2596    }
2597
2598    #[test]
2599    fn duplicate_domain_errors() {
2600        let err = compile_source("dup.vrf", "DOMAIN a\nDOMAIN b\nFACT x a\n").unwrap_err();
2601        assert!(matches!(err, CompileError::DuplicateDomain { .. }));
2602    }
2603
2604    #[test]
2605    fn alias_clash_when_one_local_name_binds_two_domains() {
2606        // The same local alias `x` bound to two genuinely different domains is a
2607        // clash: disambiguate with distinct aliases.
2608        let mut r = MemoryResolver::new();
2609        r.add("a.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2610        r.add("b.vrf", "DOMAIN chemistry\nFACT atom reacts\n");
2611        r.add(
2612            "main.vrf",
2613            "DOMAIN main\nIMPORT \"a.vrf\" AS x\nIMPORT \"b.vrf\" AS x\n",
2614        );
2615        let err = compile("main.vrf", &r).unwrap_err();
2616        assert!(matches!(err, CompileError::DomainAliasClash { .. }));
2617    }
2618
2619    #[test]
2620    fn two_files_with_the_same_domain_name_merge() {
2621        // Nominal domains: two files both declaring DOMAIN physics share it (the
2622        // value of importing a premise library is exactly this unification).
2623        let mut r = MemoryResolver::new();
2624        r.add("a.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2625        r.add(
2626            "main.vrf",
2627            "DOMAIN physics\nIMPORT \"a.vrf\"\nFACT Motor over_200\n",
2628        );
2629        let c = compile("main.vrf", &r).unwrap();
2630        // Both motors live in the single shared `physics` domain.
2631        assert!(c.atoms.iter().all(|a| a.domain == "physics"));
2632        assert_eq!(c.atoms.len(), 2);
2633    }
2634
2635    #[test]
2636    fn diamond_import_is_deduped() {
2637        // main → a, c ; a → base ; c → base. base merged once.
2638        let mut r = MemoryResolver::new();
2639        r.add(
2640            "base.vrf",
2641            r#"
2642        DOMAIN base
2643        PREMISE b:
2644            EXCLUSIVE
2645                x a
2646                x b
2647        "#,
2648        );
2649        r.add("a.vrf", "DOMAIN a\nIMPORT \"base.vrf\"\n");
2650        r.add("c.vrf", "DOMAIN c\nIMPORT \"base.vrf\"\n");
2651        r.add(
2652            "main.vrf",
2653            "DOMAIN main\nIMPORT \"a.vrf\"\nIMPORT \"c.vrf\"\n",
2654        );
2655        let c = compile("main.vrf", &r).unwrap();
2656        assert_eq!(c.clauses.len(), 1); // base's single clause, not two
2657    }
2658
2659    #[test]
2660    fn circular_import_errors() {
2661        let mut r = MemoryResolver::new();
2662        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\n");
2663        r.add("b.vrf", "DOMAIN b\nIMPORT \"a.vrf\"\n");
2664        let err = compile("a.vrf", &r).unwrap_err();
2665        assert!(matches!(err, CompileError::CircularImport(_)));
2666    }
2667
2668    #[test]
2669    fn three_node_cycle_errors() {
2670        // a → b → c → a. The back-edge to the on-path ancestor is detected.
2671        let mut r = MemoryResolver::new();
2672        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\n");
2673        r.add("b.vrf", "DOMAIN b\nIMPORT \"c.vrf\"\n");
2674        r.add("c.vrf", "DOMAIN c\nIMPORT \"a.vrf\"\n");
2675        let err = compile("a.vrf", &r).unwrap_err();
2676        assert!(matches!(err, CompileError::CircularImport(_)));
2677    }
2678
2679    #[test]
2680    fn shared_grandchild_diamond_loads_once() {
2681        // The user's case: a imports B and C; C ALSO imports B. B must be compiled
2682        // exactly once (its single clause is not duplicated by the two paths to it).
2683        let mut r = MemoryResolver::new();
2684        r.add(
2685            "b.vrf",
2686            r"
2687        DOMAIN b
2688        PREMISE e:
2689            EXCLUSIVE
2690                x a
2691                x b
2692        ",
2693        );
2694        r.add("c.vrf", "DOMAIN c\nIMPORT \"b.vrf\"\n");
2695        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\nIMPORT \"c.vrf\"\n");
2696        let c = compile("a.vrf", &r).unwrap();
2697        assert_eq!(
2698            c.clauses.len(),
2699            1,
2700            "b.vrf's clause must appear exactly once"
2701        );
2702    }
2703
2704    #[test]
2705    fn exponential_fan_out_is_memoized_not_blown_up() {
2706        // f_k imports f_{k-1} TWICE. Without content-hash memoization the visit
2707        // count is 2^k (2^40 ≈ a trillion); with it, the work is linear, so this
2708        // finishes instantly. A guard against any combinatorial blow-up / DoS.
2709        let mut r = MemoryResolver::new();
2710        r.add("f0.vrf", "DOMAIN d0\nFACT x a\n");
2711        let n = 40;
2712        for k in 1..=n {
2713            r.add(
2714                &alloc::format!("f{k}.vrf"),
2715                &alloc::format!(
2716                    "DOMAIN d{k}\nIMPORT \"f{}.vrf\"\nIMPORT \"f{}.vrf\"\n",
2717                    k - 1,
2718                    k - 1
2719                ),
2720            );
2721        }
2722        let c = compile(&alloc::format!("f{n}.vrf"), &r).unwrap();
2723        assert_eq!(c.facts.len(), 1); // the single fact from f0, reached once
2724    }
2725
2726    #[test]
2727    fn very_deep_linear_chain_does_not_overflow() {
2728        // A long non-cyclic chain. Resolution is iterative (explicit work stack),
2729        // so a depth that would overflow a recursive loader compiles cleanly.
2730        let mut r = MemoryResolver::new();
2731        r.add("f0.vrf", "DOMAIN d0\nFACT x a\n");
2732        let n = 10_000;
2733        for k in 1..=n {
2734            r.add(
2735                &alloc::format!("f{k}.vrf"),
2736                &alloc::format!("DOMAIN d{k}\nIMPORT \"f{}.vrf\"\n", k - 1),
2737            );
2738        }
2739        let c = compile(&alloc::format!("f{n}.vrf"), &r).unwrap();
2740        assert_eq!(c.facts.len(), 1);
2741    }
2742
2743    #[test]
2744    fn missing_import_errors() {
2745        let mut r = MemoryResolver::new();
2746        r.add("main.vrf", "DOMAIN main\nIMPORT \"ghost.vrf\"\n");
2747        let err = compile("main.vrf", &r).unwrap_err();
2748        assert!(matches!(err, CompileError::ImportNotFound(_)));
2749    }
2750
2751    #[test]
2752    fn unused_import_is_flagged() {
2753        // main imports physics but never writes a `physics.` atom → unused.
2754        let mut r = MemoryResolver::new();
2755        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2756        r.add(
2757            "main.vrf",
2758            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT x a\n",
2759        );
2760        let c = compile("main.vrf", &r).unwrap();
2761        assert_eq!(c.unused_imports.len(), 1);
2762        assert_eq!(c.unused_imports[0].domain, "physics");
2763        assert_eq!(c.unused_imports[0].file, "main.vrf");
2764        assert_eq!(c.unused_imports[0].alias, None);
2765    }
2766
2767    #[test]
2768    fn referenced_import_is_not_unused() {
2769        // The same import, but now a `physics.` atom uses it → not flagged.
2770        let mut r = MemoryResolver::new();
2771        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2772        r.add(
2773            "main.vrf",
2774            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT physics.Motor over_200\n",
2775        );
2776        let c = compile("main.vrf", &r).unwrap();
2777        assert!(c.unused_imports.is_empty(), "{:?}", c.unused_imports);
2778    }
2779
2780    #[test]
2781    fn unused_import_records_its_alias() {
2782        let mut r = MemoryResolver::new();
2783        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2784        r.add(
2785            "main.vrf",
2786            "DOMAIN main\nIMPORT \"physics.vrf\" AS phys\nFACT x a\n",
2787        );
2788        let c = compile("main.vrf", &r).unwrap();
2789        assert_eq!(c.unused_imports.len(), 1);
2790        assert_eq!(c.unused_imports[0].alias.as_deref(), Some("phys"));
2791    }
2792
2793    #[test]
2794    fn import_referenced_only_inside_a_premise_is_used() {
2795        // The reference can be anywhere — here inside a premise body, not a fact.
2796        let mut r = MemoryResolver::new();
2797        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2798        r.add(
2799            "main.vrf",
2800            r#"
2801        DOMAIN main
2802        IMPORT "physics.vrf"
2803        PREMISE p:
2804            WHEN physics.Motor over_100
2805            THEN x ok
2806        "#,
2807        );
2808        let c = compile("main.vrf", &r).unwrap();
2809        assert!(c.unused_imports.is_empty(), "{:?}", c.unused_imports);
2810    }
2811
2812    #[test]
2813    fn same_premise_name_across_files_coexists() {
2814        // Two files may legitimately reuse a premise NAME with different bodies.
2815        // Names are per-source labels — both premises apply, qualified by source.
2816        // NOT a redefinition error. (Atoms stay apart too: different domains.)
2817        let mut r = MemoryResolver::new();
2818        r.add(
2819            "physics.vrf",
2820            r#"
2821        DOMAIN physics
2822        PREMISE safety:
2823            EXCLUSIVE
2824                x a
2825                x b
2826        "#,
2827        );
2828        r.add(
2829            "main.vrf",
2830            r#"
2831        DOMAIN main
2832        IMPORT "physics.vrf"
2833        PREMISE safety:
2834            EXCLUSIVE
2835                x a
2836                x c
2837        "#,
2838        );
2839        let c = compile("main.vrf", &r).unwrap();
2840        assert_eq!(c.clauses.len(), 2); // a-b from physics, a-c from main
2841        assert!(c.clauses.iter().any(|cl| cl.origin.source == "physics.vrf"));
2842        assert!(c.clauses.iter().any(|cl| cl.origin.source == "main.vrf"));
2843    }
2844
2845    #[test]
2846    fn redefinition_within_one_source_still_errors() {
2847        // But reusing a name with a different body *inside one source* is a mistake.
2848        let src = r#"
2849        DOMAIN m
2850        PREMISE e:
2851            EXCLUSIVE
2852                x a
2853                x b
2854        PREMISE e:
2855            EXCLUSIVE
2856                x a
2857                x c
2858        "#;
2859        let err = compile_source("main.vrf", src).unwrap_err();
2860        assert_eq!(
2861            err,
2862            CompileError::PremiseRedefinition {
2863                name: "e".to_string()
2864            }
2865        );
2866    }
2867
2868    #[test]
2869    fn import_demo_examples_resolve() {
2870        let mut r = MemoryResolver::new();
2871        r.add(
2872            "physics.vrf",
2873            include_str!("../../../docs/examples/physics.vrf"),
2874        );
2875        r.add(
2876            "import-demo.vrf",
2877            include_str!("../../../docs/examples/import-demo.vrf"),
2878        );
2879        let c = compile("import-demo.vrf", &r).unwrap();
2880        assert!(c.pending_imports.is_empty());
2881        // physics.vrf: one_path (EXCLUSIVE, 1 clause) + speed_order (impl, 1 clause)
2882        assert_eq!(c.clauses.len(), 2);
2883        // The qualified facts (`physics.Motor …`) share ids with the imported premise.
2884        let over_100 = id(&c, &key_in("physics", "Motor", "over_100", None));
2885        assert!(c.facts.iter().any(|f| f.atom == over_100));
2886        assert!(
2887            c.clauses
2888                .iter()
2889                .any(|cl| cl.lits.iter().any(|l| l.atom == over_100))
2890        );
2891    }
2892
2893    #[test]
2894    fn creature_example_compiles() {
2895        let src = include_str!("../../../docs/examples/creature.vrf");
2896        let c = compile_source("creature.vrf", src).unwrap();
2897        assert_eq!(c.facts.len(), 2); // flying, warm_blood
2898        assert_eq!(c.rules.len(), 1); // needs_oxygen
2899        assert_eq!(c.checks.len(), 1);
2900        // fly_xor_swim (1) + wings_need_bone (THEN wing AND bone → 2) + no_dual_temp (1) = 4
2901        assert_eq!(c.clauses.len(), 4);
2902        assert_eq!(c.atoms.len(), 7);
2903    }
2904
2905    #[test]
2906    fn forbids_unfolds_pairwise() {
2907        let src = r#"
2908        PREMISE f:
2909            FORBIDS
2910                x a
2911                x b
2912                x c
2913        "#;
2914        let c = cs(src).unwrap();
2915        assert_eq!(c.clauses.len(), 3); // C(3,2), like EXCLUSIVE
2916        assert!(
2917            c.clauses
2918                .iter()
2919                .all(|cl| cl.lits.len() == 2 && cl.lits.iter().all(|l| !l.negated))
2920        );
2921    }
2922
2923    #[test]
2924    fn rule_with_multiple_consequents() {
2925        let src = r#"
2926        RULE r:
2927            WHEN x a
2928            THEN x b
2929            AND  x c
2930        "#;
2931        let c = cs(src).unwrap();
2932        assert_eq!(c.rules.len(), 1);
2933        assert_eq!(c.rules[0].consequent.len(), 2);
2934    }
2935
2936    #[test]
2937    fn negated_antecedent_literal_keeps_polarity() {
2938        // WHEN NOT x a THEN x b  ==  Impossible([NOT x a, NOT x b])
2939        let src = r#"
2940        PREMISE a:
2941            WHEN NOT x a
2942            THEN x b
2943        "#;
2944        let c = cs(src).unwrap();
2945        let xa = id(&c, &key("x", "a", None));
2946        assert!(c.clauses[0].lits.contains(&Lit {
2947            atom: xa,
2948            negated: true
2949        }));
2950    }
2951
2952    #[test]
2953    fn rule_keeps_consequent_negation() {
2954        let src = r#"
2955        RULE r:
2956            WHEN x a
2957            THEN NOT x b
2958        "#;
2959        let c = cs(src).unwrap();
2960        assert!(c.rules[0].consequent[0].negated);
2961    }
2962
2963    #[test]
2964    fn compilation_is_deterministic() {
2965        let src = r#"
2966        PREMISE e:
2967            EXCLUSIVE
2968                z z
2969                a a
2970                m m
2971        FACT q q
2972        "#;
2973        assert_eq!(cs(src).unwrap(), cs(src).unwrap());
2974    }
2975
2976    #[test]
2977    fn empty_program_compiles_to_empty_ir() {
2978        let c = cs("// nothing here\n").unwrap();
2979        assert!(c.atoms.is_empty() && c.clauses.is_empty() && c.facts.is_empty());
2980    }
2981
2982    #[test]
2983    fn same_clause_from_two_named_premises_is_deduped() {
2984        // Different names, identical logical content → one clause, no redefinition.
2985        let src = r#"
2986        PREMISE e1:
2987            EXCLUSIVE
2988                x a
2989                x b
2990        PREMISE e2:
2991            EXCLUSIVE
2992                x a
2993                x b
2994        "#;
2995        let c = cs(src).unwrap();
2996        assert_eq!(c.clauses.len(), 1);
2997    }
2998
2999    #[test]
3000    fn object_distinguishes_atom_identity() {
3001        // `x p a` and `x p b` differ only by object → two distinct atoms.
3002        let c = cs("FACT x p a\nFACT x p b\n").unwrap();
3003        assert_eq!(c.atoms.len(), 2);
3004    }
3005
3006    // --- closed-world: ONEOF closes its variable's value set -----------------
3007
3008    /// A `ONEOF` body declaring three values of `resolved is …`. Flush-left so it
3009    /// concatenates cleanly in front of an appended line (CAPSTONE-style const).
3010    const ONEOF_RESOLVED: &str = r"PREMISE pick:
3011    ONEOF
3012        resolved is censored
3013        resolved is censored_mtp
3014        resolved is uncensored
3015";
3016
3017    #[test]
3018    fn value_outside_oneof_is_rejected() {
3019        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censoredmtp\n");
3020        let err = cs(&src).unwrap_err();
3021        let CompileError::UnknownValue(e) = err else {
3022            panic!("expected UnknownValue, got {err:?}");
3023        };
3024        assert_eq!(e.value, "censoredmtp");
3025        assert_eq!(e.subject, "resolved");
3026        assert_eq!(e.predicate, "is");
3027        assert_eq!(e.declared, "censored, censored_mtp, uncensored");
3028    }
3029
3030    #[test]
3031    fn near_miss_value_suggests_the_intended_one() {
3032        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censoredmtp\n");
3033        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
3034            panic!("expected UnknownValue");
3035        };
3036        assert_eq!(e.suggestion, " — did you mean `censored_mtp`?");
3037    }
3038
3039    #[test]
3040    fn far_off_value_offers_no_suggestion() {
3041        // `wildly_different` is past the edit-distance budget of every declared
3042        // value, so we reject it but do not guess.
3043        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is wildly_different\n");
3044        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
3045            panic!("expected UnknownValue");
3046        };
3047        assert_eq!(e.suggestion, "");
3048    }
3049
3050    #[test]
3051    fn declared_value_compiles_cleanly() {
3052        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censored_mtp\n");
3053        assert!(cs(&src).is_ok());
3054    }
3055
3056    #[test]
3057    fn oneof_declared_after_the_reference_still_catches_it() {
3058        // The check runs once every source is accumulated, so order is irrelevant.
3059        let src = alloc::format!("FACT resolved is censoredmtp\n{ONEOF_RESOLVED}");
3060        assert!(matches!(
3061            cs(&src).unwrap_err(),
3062            CompileError::UnknownValue(_)
3063        ));
3064    }
3065
3066    #[test]
3067    fn out_of_set_value_inside_a_premise_is_caught() {
3068        // Closed-world covers references anywhere — not just FACTs.
3069        let src = alloc::format!(
3070            r"{ONEOF_RESOLVED}
3071            PREMISE p:
3072                WHEN resolved is censoredmtp
3073                THEN x done
3074        "
3075        );
3076        assert!(matches!(
3077            cs(&src).unwrap_err(),
3078            CompileError::UnknownValue(_)
3079        ));
3080    }
3081
3082    #[test]
3083    fn out_of_set_value_inside_a_rule_is_caught() {
3084        let src = alloc::format!(
3085            r"{ONEOF_RESOLVED}
3086            RULE r:
3087                WHEN x go
3088                THEN resolved is censoredmtp
3089        "
3090        );
3091        assert!(matches!(
3092            cs(&src).unwrap_err(),
3093            CompileError::UnknownValue(_)
3094        ));
3095    }
3096
3097    #[test]
3098    fn binary_atoms_in_a_oneof_do_not_close_anything() {
3099        // `alice cooks` / `alice cleans` have no object slot, so there is no value
3100        // set to violate — a later `alice bakes` is just another atom, not an error.
3101        let src = r"
3102        PREMISE chores:
3103            ONEOF
3104                alice cooks
3105                alice cleans
3106        FACT alice bakes
3107        ";
3108        assert!(cs(src).is_ok());
3109    }
3110
3111    #[test]
3112    fn a_subject_without_a_oneof_stays_open() {
3113        // No ONEOF over `mood is …` → open world, any value is a fresh atom.
3114        let src = alloc::format!("{ONEOF_RESOLVED}FACT mood is anything_goes\n");
3115        assert!(cs(&src).is_ok());
3116    }
3117
3118    #[test]
3119    fn two_oneofs_union_their_declared_values() {
3120        // A value declared by either ONEOF for the same variable is legal.
3121        let src = r"
3122        PREMISE a:
3123            ONEOF
3124                v is one
3125                v is two
3126        PREMISE b:
3127            ONEOF
3128                v is two
3129                v is three
3130        FACT v is three
3131        ";
3132        assert!(cs(src).is_ok());
3133    }
3134
3135    #[test]
3136    fn earliest_offender_is_reported() {
3137        // Two violations; the diagnostic points at the first by line.
3138        let src = alloc::format!(
3139            "{ONEOF_RESOLVED}FACT resolved is firstbad\nFACT resolved is secondbad\n"
3140        );
3141        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
3142            panic!("expected UnknownValue");
3143        };
3144        assert_eq!(e.value, "firstbad");
3145    }
3146
3147    #[test]
3148    fn closed_world_spans_imported_domains() {
3149        // physics closes `Motor speed …`; main, referencing it via the prefix with
3150        // a typo, is rejected — the value set is shared across the domain boundary.
3151        let mut r = MemoryResolver::new();
3152        r.add(
3153            "physics.vrf",
3154            r"
3155        DOMAIN physics
3156        PREMISE g:
3157            ONEOF
3158                Motor speed slow
3159                Motor speed fast
3160        ",
3161        );
3162        r.add(
3163            "main.vrf",
3164            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT physics.Motor speed faast\n",
3165        );
3166        let CompileError::UnknownValue(e) = compile("main.vrf", &r).unwrap_err() else {
3167            panic!("expected UnknownValue");
3168        };
3169        assert_eq!(e.value, "faast");
3170        assert_eq!(e.suggestion, " — did you mean `fast`?");
3171    }
3172
3173    #[test]
3174    fn same_value_in_a_different_domain_does_not_clash() {
3175        // `state is open` is closed in domain a; domain b's own `state is shut`
3176        // (never declared in a) is fine — value sets are per-domain.
3177        let mut r = MemoryResolver::new();
3178        r.add(
3179            "a.vrf",
3180            r"
3181        DOMAIN a
3182        PREMISE s:
3183            ONEOF
3184                state is open
3185                state is closed
3186        ",
3187        );
3188        r.add("b.vrf", "DOMAIN b\nIMPORT \"a.vrf\"\nFACT state is shut\n");
3189        // `state is shut` is in domain b, which has no ONEOF → open, so it compiles.
3190        assert!(compile("b.vrf", &r).is_ok());
3191    }
3192
3193    #[test]
3194    fn levenshtein_basics() {
3195        // The canonical distance works on char slices; spell the string cases
3196        // through a tiny adapter so the table below reads as before.
3197        fn lev(a: &str, b: &str) -> usize {
3198            levenshtein(
3199                &a.chars().collect::<Vec<char>>(),
3200                &b.chars().collect::<Vec<char>>(),
3201            )
3202        }
3203        assert_eq!(lev("", ""), 0);
3204        assert_eq!(lev("abc", "abc"), 0);
3205        assert_eq!(lev("censoredmtp", "censored_mtp"), 1);
3206        assert_eq!(lev("norml", "normal"), 1);
3207        assert_eq!(lev("kitten", "sitting"), 3);
3208    }
3209
3210    #[test]
3211    fn nearest_respects_the_length_budget() {
3212        let cands = ["censored", "censored_mtp", "uncensored"];
3213        assert_eq!(nearest("censoredmtp", &cands), Some("censored_mtp"));
3214        // "zzz" is far from all; no suggestion.
3215        assert_eq!(nearest("zzz", &cands), None);
3216    }
3217
3218    #[test]
3219    fn nearest_offers_nothing_for_very_short_values() {
3220        // 1–2 character values get a budget of 0: every other short token is at
3221        // distance 1, so a "did you mean" carries no signal. True for single CJK
3222        // characters (one symbol = a whole word) and for two-letter codes alike.
3223        assert_eq!(nearest("七", &["一", "二", "三"]), None);
3224        assert_eq!(nearest("us", &["uk", "eu", "jp"]), None);
3225        // A multi-character CJK word still gets a sensible nearest (one wrong
3226        // character = distance 1, budget = 3/3 = 1).
3227        assert_eq!(nearest("中文字", &["中文学", "日本語"]), Some("中文学"));
3228    }
3229
3230    #[test]
3231    fn short_value_is_still_rejected_just_without_a_guess() {
3232        // The closed-world error does not depend on the suggestion: an out-of-set
3233        // single-character value is rejected exactly, only the `did you mean` is
3234        // suppressed.
3235        let src = r"
3236        PREMISE pick:
3237            ONEOF
3238                roll is 一
3239                roll is 二
3240        FACT roll is 七
3241        ";
3242        let CompileError::UnknownValue(e) = cs(src).unwrap_err() else {
3243            panic!("expected UnknownValue");
3244        };
3245        assert_eq!(e.value, "七");
3246        assert_eq!(e.suggestion, "");
3247    }
3248
3249    // --- FOR EACH / SET (bounded quantification, Phase 1) ------------------
3250
3251    #[test]
3252    fn for_each_grounds_once_per_element() {
3253        // A ONEOF body over a 2-element set: each element yields one pairwise
3254        // clause + one at-least-one clause = 2 clauses; 2 elements → 4 clauses,
3255        // and 4 distinct grounded atoms (a/b × slot m/n).
3256        let src = r"
3257        SET xs
3258            a
3259            b
3260        PREMISE slot FOR EACH t IN xs:
3261            ONEOF
3262                t slot m
3263                t slot n
3264        ";
3265        let c = cs(src).unwrap();
3266        assert_eq!(c.clauses.len(), 4);
3267        for s in ["a", "b"] {
3268            for o in ["m", "n"] {
3269                assert!(c.atoms.contains(&key(s, "slot", Some(o))));
3270            }
3271        }
3272    }
3273
3274    #[test]
3275    fn for_each_in_a_rule_derives_per_element() {
3276        // A quantified RULE grounds to one rule per element.
3277        let src = r"
3278        SET xs
3279            a
3280            b
3281        RULE r FOR EACH t IN xs:
3282            WHEN t on
3283            THEN t hot
3284        ";
3285        let c = cs(src).unwrap();
3286        assert_eq!(c.rules.len(), 2);
3287    }
3288
3289    #[test]
3290    fn for_each_over_an_undeclared_set_is_rejected() {
3291        let src = r"
3292        SET tasks
3293            a
3294        PREMISE p FOR EACH t IN taske:
3295            ONEOF
3296                t s x
3297                t s y
3298        ";
3299        let CompileError::UnknownSet {
3300            set, suggestion, ..
3301        } = cs(src).unwrap_err()
3302        else {
3303            panic!("expected UnknownSet");
3304        };
3305        assert_eq!(set, "taske");
3306        assert_eq!(suggestion, " — did you mean `tasks`?");
3307    }
3308
3309    #[test]
3310    fn for_each_closes_each_grounded_variable() {
3311        // ONEOF inside FOR EACH closes the variable per element, so an out-of-set
3312        // value on a grounded subject is a hard error (closed-world after subst).
3313        let src = r"
3314        SET xs
3315            a
3316            b
3317        PREMISE c FOR EACH t IN xs:
3318            ONEOF
3319                t color red
3320                t color blue
3321        FACT a color gren
3322        ";
3323        let CompileError::UnknownValue(e) = cs(src).unwrap_err() else {
3324            panic!("expected UnknownValue from the grounded ONEOF");
3325        };
3326        assert_eq!(e.value, "gren");
3327        assert_eq!(e.subject, "a");
3328    }
3329
3330    #[test]
3331    fn nested_for_each_is_a_parse_error() {
3332        // The structural guarantee: a second FOR EACH is unrepresentable — the
3333        // header carries exactly one, so nesting fails to parse (no domain
3334        // product can ever be written).
3335        let src = r"
3336        SET xs
3337            a
3338        PREMISE p FOR EACH x IN xs FOR EACH y IN xs:
3339            ONEOF
3340                x r y
3341                x s y
3342        ";
3343        assert!(matches!(cs(src), Err(CompileError::Parse(_))));
3344    }
3345
3346    #[test]
3347    fn relation_for_each_grounds_per_fact_pair() {
3348        // Two declared edges → the body is instantiated once per edge (two
3349        // pairwise clauses), and both edge atoms are recorded as consumed so the
3350        // ORPHAN lint will not flag them.
3351        let src = r"
3352        FACT a linked b
3353        FACT b linked c
3354        PREMISE p FOR EACH x linked y:
3355            FORBIDS
3356                x hot on
3357                y hot on
3358        ";
3359        let c = cs(src).unwrap();
3360        assert_eq!(c.clauses.len(), 2);
3361        assert_eq!(c.consumed.len(), 2);
3362        assert!(c.consumed.contains(&id(&c, &key("a", "linked", Some("b")))));
3363    }
3364
3365    #[test]
3366    fn relation_for_each_over_no_edges_is_inert() {
3367        // A relation with no matching facts grounds to nothing (vacuous), not an
3368        // error — unlike an undeclared SET.
3369        let src = r"
3370        PREMISE p FOR EACH x linked y:
3371            FORBIDS
3372                x hot on
3373                y hot on
3374        ";
3375        let c = cs(src).unwrap();
3376        assert_eq!(c.clauses.len(), 0);
3377        assert!(c.consumed.is_empty());
3378    }
3379
3380    #[test]
3381    fn close_transitive_extends_the_relation() {
3382        // a->b, b->c; CLOSE adds a->c, so a relation FOR EACH grounds over all
3383        // three pairs (without CLOSE it would be two).
3384        let src = r"
3385        FACT a r b
3386        FACT b r c
3387        CLOSE r TRANSITIVE
3388        PREMISE p FOR EACH x r y:
3389            FORBIDS
3390                x hot on
3391                y hot on
3392        ";
3393        let c = cs(src).unwrap();
3394        assert_eq!(c.clauses.len(), 3);
3395    }
3396
3397    #[test]
3398    fn close_transitive_rejects_a_cycle() {
3399        let src = r"
3400        FACT a r b
3401        FACT b r a
3402        CLOSE r TRANSITIVE
3403        ";
3404        let CompileError::CyclicRelation { relation, .. } = cs(src).unwrap_err() else {
3405            panic!("expected CyclicRelation");
3406        };
3407        assert_eq!(relation, "r");
3408    }
3409
3410    #[test]
3411    fn grounding_count_is_linear_in_the_set() {
3412        // No domain product: N elements → exactly N groundings (here N clauses,
3413        // one at-least-one per element), never N².
3414        let elems: alloc::string::String = (0..20).map(|i| alloc::format!("    e{i}\n")).collect();
3415        let src = alloc::format!(
3416            "SET xs\n{elems}PREMISE p FOR EACH t IN xs:\n    ATLEAST\n        t a\n        t b\n"
3417        );
3418        let c = cs(&src).unwrap();
3419        assert_eq!(c.clauses.len(), 20);
3420    }
3421}