Skip to main content

elenchus_compiler/
lib.rs

1//! elenchus-compiler — compiles parsed elenchus DSL into a canonical clause IR.
2//!
3//! This crate is **preparation, not solving**. It takes the AST (from
4//! `elenchus-parser`) and produces a deterministic, solver-ready intermediate
5//! representation:
6//!
7//! - **atom interner**: `(subject, predicate, object?)` → dense `u32` id,
8//!   canonically sorted so ids (and any later enumeration) are deterministic;
9//! - **desugaring**: surface CAPS sugar → `Impossible` clauses
10//!   (`EXCLUSIVE` pairwise, `WHEN…THEN` → `Impossible([A, …, NOT C])`, etc.);
11//! - **content-addressing** (sha256, mirroring vsm-guard's CAS): identical
12//!   clauses are deduped (idempotent — `P ∧ P ≡ P`), and a named construct
13//!   redefined with a different body is a `PremiseRedefinition` error.
14//!
15//! The actual reasoning (3-valued forward chaining, SAT, all-SAT, the WARNING
16//! pool, the four results) lives in `elenchus-solver`. `IMPORT` resolution is a
17//! source-agnostic [`Resolver`] that flat-merges another source into the shared
18//! atom universe ([`compile`] resolves imports; [`compile_source`] leaves them
19//! pending).
20//!
21//! # Example
22//!
23//! ```
24//! use elenchus_compiler::compile_source;
25//!
26//! // `ASSUME` lowers to a *soft* fact: the same atom universe as a `FACT`, but
27//! // one the solver may retract. Here `x a` is asserted both ways (hard + soft).
28//! let ir = compile_source("demo.vrf", "DOMAIN d\nFACT x a\nASSUME NOT x a\nCHECK x\n").unwrap();
29//! assert_eq!(ir.facts.len(), 2);
30//! assert!(ir.facts.iter().any(|f| f.soft)); // the ASSUME is the soft one
31//! ```
32#![no_std]
33// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
34#![warn(missing_docs)]
35
36extern crate alloc;
37
38#[cfg(feature = "std")]
39extern crate std;
40
41use alloc::boxed::Box;
42use alloc::collections::{BTreeMap, BTreeSet};
43use alloc::string::{String, ToString};
44use alloc::vec;
45use alloc::vec::Vec;
46use core::fmt::Write as _;
47
48use elenchus_parser::{Atom, Body, CloseKind, Conn, ListOp, Literal, Located, Quant, Statement};
49/// Re-exported so downstream crates name one source of truth: [`Diagnostics`] for
50/// the syntax errors carried by [`CompileError::Parse`], and [`kw`] for the
51/// keyword spellings an [`Origin::kind`] is built from (so the solver matches a
52/// `kind` against `kw::PREMISE`, not a re-typed `"PREMISE"`).
53pub use elenchus_parser::{Diagnostics, kw};
54use sha2::{Digest, Sha256};
55use thiserror::Error;
56
57// --- content-addressing (mirrors vsm-guard::hashing) -----------------------
58
59/// SHA-256 content addressing. Used only for dedup / redefinition / provenance,
60/// never for namespacing atoms.
61pub fn hash_hex(data: &[u8]) -> String {
62    let mut hasher = Sha256::new();
63    hasher.update(data);
64    let out = hasher.finalize();
65    let mut s = String::with_capacity(out.len() * 2);
66    for b in out {
67        let _ = write!(s, "{:02x}", b);
68    }
69    s
70}
71
72// --- IR types --------------------------------------------------------------
73
74/// Dense atom identifier (also the SAT variable number).
75pub type AtomId = u32;
76
77/// The identity of an atom: the `domain` plus the triple
78/// `(subject, predicate, object?)`, owned so it survives across merged sources.
79/// The domain is the leading sort key, so atoms group by domain; ordering is
80/// otherwise lexicographic → canonical. Two atoms with the same triple in
81/// *different* domains are distinct (no cross-domain unification).
82#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
83pub struct AtomKey {
84    /// The domain this atom belongs to (the resolved namespace, never a raw
85    /// alias). `physics.engine` and `plan.engine` are different atoms.
86    pub domain: String,
87    /// The entity the claim is about (owned copy of the parser's `subject`).
88    pub subject: String,
89    /// The relation or property asserted. `None` for a **bare proposition** — a
90    /// single-word atom introduced by a `VAR` port (e.g. `db_ready`). `None`
91    /// sorts before any `Some`, so existing (always-`Some`) atoms keep their order.
92    pub predicate: Option<String>,
93    /// Optional object; part of identity, so `has flying` ≠ `has swimming`. Always
94    /// `None` when `predicate` is `None`.
95    pub object: Option<String>,
96}
97
98/// The domain context of one file being compiled: its own declared domain (where
99/// bare atoms fall) and the local names — aliases or imported domain names — it
100/// may reference other domains by. Resolving an atom's optional `domain.` prefix
101/// against this context yields its canonical [`AtomKey`] domain.
102struct DomainCtx {
103    /// The file's own declared domain (the target for unqualified atoms).
104    current: String,
105    /// `local name -> canonical domain` for every name visible in this file
106    /// (always includes `current -> current`, plus one entry per `IMPORT`).
107    aliases: BTreeMap<String, String>,
108}
109
110impl DomainCtx {
111    /// Resolve an atom's optional `domain.` prefix to a canonical domain name.
112    /// `None` → the file's own domain; a prefix not imported here is an error.
113    fn resolve(&self, prefix: Option<&str>) -> Result<String, CompileError> {
114        match prefix {
115            None => Ok(self.current.clone()),
116            Some(p) => self
117                .aliases
118                .get(p)
119                .cloned()
120                .ok_or_else(|| CompileError::UnknownDomain {
121                    domain: p.to_string(),
122                }),
123        }
124    }
125
126    /// Build the owned [`AtomKey`] for a borrowed parser [`Atom`], resolving its
127    /// domain prefix against this file's context.
128    fn key(&self, a: &Atom) -> Result<AtomKey, CompileError> {
129        Ok(AtomKey {
130            domain: self.resolve(a.domain)?,
131            subject: a.subject.to_string(),
132            predicate: a.predicate.map(|p| p.to_string()),
133            object: a.object.map(|o| o.to_string()),
134        })
135    }
136}
137
138/// A literal as it appears *inside* an `Impossible` clause: an atom, optionally
139/// negated. `negated = true` means the literal is `NOT atom`.
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141pub struct Lit {
142    /// Interned id of the atom (also its SAT variable number).
143    pub atom: AtomId,
144    /// `true` means this literal is `NOT atom` inside the clause.
145    pub negated: bool,
146}
147
148/// A confident truth value. UNKNOWN is the *absence* of a fact, never stored.
149#[derive(Debug, Clone, Copy, PartialEq, Eq)]
150pub enum Value {
151    /// The atom is asserted TRUE (from `FACT`).
152    True,
153    /// The atom is asserted FALSE (from `NOT`).
154    False,
155}
156
157/// Where a piece of IR came from — for readable conflict/warning pools.
158#[derive(Debug, Clone, PartialEq, Eq)]
159pub struct Origin {
160    /// The source label this came from (file name or `"<root>"`/`"<text>"`).
161    pub source: String,
162    /// 1-based line number of the originating statement.
163    pub line: u32,
164    /// The premise/rule name, if it came from a named construct.
165    pub premise: Option<String>,
166    /// Surface kind for the report. A surface keyword (a [`kw`] constant such as
167    /// `kw::FACT` / `kw::PREMISE`) for source constructs, or [`KIND_UNSAT`] for
168    /// the synthetic origin the solver attaches to a global unsatisfiability.
169    pub kind: &'static str,
170}
171
172/// The [`Origin::kind`] the solver stamps on a conflict that is not pinned to one
173/// source construct but to the program being jointly unsatisfiable. Not a
174/// keyword — so it lives here, next to the other kinds, as the one spelling both
175/// the solver (which sets it) and any reader (which matches it) share.
176pub const KIND_UNSAT: &str = "UNSAT";
177
178/// A confident fact (from `FACT` / `NOT`). Conflicting facts on the same atom
179/// are preserved (both kept) — the solver reports that as a CONFLICT.
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub struct Fact {
182    /// The atom this fact pins down.
183    pub atom: AtomId,
184    /// The asserted truth value.
185    pub value: Value,
186    /// Where it came from (for the report).
187    pub origin: Origin,
188    /// `true` for an `ASSUME` (a *soft*, retractable hypothesis). A soft fact
189    /// behaves like a normal fact in the forward pass, but when the assumptions
190    /// cannot all hold the solver may drop it (and only it) to explain the
191    /// contradiction — a `FACT`/`NOT` is never retractable.
192    pub soft: bool,
193}
194
195/// An `Impossible` clause: the listed literals cannot all hold simultaneously.
196#[derive(Debug, Clone, PartialEq, Eq)]
197pub struct Clause {
198    /// The literals that cannot all hold at once (an `Impossible([...])`).
199    pub lits: Vec<Lit>,
200    /// Where it came from (for the report).
201    pub origin: Origin,
202}
203
204/// A forward-chaining rule (from `RULE`): if all antecedent literals hold, derive
205/// the consequent literals.
206#[derive(Debug, Clone, PartialEq, Eq)]
207pub struct Rule {
208    /// Literals that must all hold for the rule to fire.
209    pub antecedent: Vec<Lit>,
210    /// Literals derived (asserted) when the antecedent holds.
211    pub consequent: Vec<Lit>,
212    /// Where it came from (for the report).
213    pub origin: Origin,
214}
215
216/// A `CHECK` query.
217#[derive(Debug, Clone, PartialEq, Eq)]
218pub struct Check {
219    /// Restrict the report to this subject; `None` means check everything.
220    pub subject: Option<String>,
221    /// `true` runs the backward (all-SAT) pass to detect UNDERDETERMINED.
222    pub bidirectional: bool,
223}
224
225/// The compiled IR: the solver's input.
226#[derive(Debug, Clone, PartialEq, Eq)]
227pub struct Compiled {
228    /// Indexed by [`AtomId`]; canonically sorted.
229    pub atoms: Vec<AtomKey>,
230    /// Confident assertions from `FACT`/`NOT`.
231    pub facts: Vec<Fact>,
232    /// `Impossible` clauses (desugared premises + the built-in non-contradiction).
233    pub clauses: Vec<Clause>,
234    /// Forward-chaining rules from `RULE`.
235    pub rules: Vec<Rule>,
236    /// `CHECK` queries.
237    pub checks: Vec<Check>,
238    /// Imports seen but not yet resolved (only populated by [`compile_source`];
239    /// [`compile`] resolves them, leaving this empty).
240    pub pending_imports: Vec<String>,
241    /// Advisory: imports that a file makes but never references (no `domain.atom`
242    /// from that file uses the imported domain). Structural, per-file, and inert —
243    /// it never affects the solve. Only populated by [`compile`] (an unresolved
244    /// import in [`compile_source`] cannot be classified). See [`UnusedImport`].
245    pub unused_imports: Vec<UnusedImport>,
246    /// Atoms consumed as data by a relation `FOR EACH` (the edge facts, e.g. each
247    /// `a linked b`). They are read by the quantifier, so the solver must not
248    /// report them as ORPHAN facts even though no clause references them.
249    pub consumed: Vec<AtomId>,
250    /// One record per declared `VAR` port: how it resolved (supplied / default /
251    /// unset), its value and origin. Drives the report's PLACEHOLDERS section;
252    /// purely advisory. Filled by `compile_source_with` / `compile_with` after
253    /// [`Compiler::resolve_ports`]; empty when no port was declared.
254    pub placeholders: Vec<PlaceholderInfo>,
255}
256
257/// An advisory record: a file `IMPORT`s a domain it never references. Such an
258/// import is inert — no `domain.atom` in that file mentions it, so removing it
259/// would not change the result. It is almost always a leftover or a forgotten
260/// `domain.` prefix. **Purely informational** — it never changes the verdict.
261#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
262pub struct UnusedImport {
263    /// The source that declared the unused `IMPORT`.
264    pub file: String,
265    /// The imported domain that is never referenced from `file`.
266    pub domain: String,
267    /// The local alias, if the import used `AS <alias>`.
268    pub alias: Option<String>,
269    /// 1-based line of the `IMPORT` statement in `file`.
270    pub line: u32,
271}
272
273/// One external value bound to a port `key`, supplied from outside the program
274/// (CLI / API / a data file). The `origin` is a short human tag used both in the
275/// placeholders report and in a [`CompileError::PortConflict`] message.
276#[derive(Debug, Clone, PartialEq, Eq)]
277pub struct PortBinding {
278    /// The boolean truth supplied for the port.
279    pub value: bool,
280    /// Where it came from: `"CLI"`, `"api"`, `"data:<file>"`, or `"PROVIDE <file>"`.
281    pub origin: String,
282}
283
284/// How a declared `VAR` port got (or did not get) its value — the per-port status
285/// shown in the report's PLACEHOLDERS section. Advisory only; never affects the
286/// verdict.
287#[derive(Debug, Clone, Copy, PartialEq, Eq)]
288pub enum PlaceholderStatus {
289    /// An external value (CLI/API/data) was supplied.
290    Supplied,
291    /// No external value; the port's `DEFAULT` was used.
292    DefaultUsed,
293    /// No external value and no `DEFAULT` — the port stays UNKNOWN.
294    Unset,
295}
296
297/// A reporting record for one declared `VAR` port: its key, how it resolved, the
298/// value it took (if any), and where that value came from.
299#[derive(Debug, Clone, PartialEq, Eq)]
300pub struct PlaceholderInfo {
301    /// The port's name (the external key).
302    pub key: String,
303    /// How it resolved (supplied / default / unset).
304    pub status: PlaceholderStatus,
305    /// The resolved boolean, or `None` when unset.
306    pub value: Option<bool>,
307    /// The origin of a supplied value (`None` for default/unset).
308    pub origin: Option<String>,
309}
310
311/// Anything that can go wrong while compiling (and resolving imports).
312#[derive(Debug, Error, PartialEq, Eq)]
313pub enum CompileError {
314    /// A source failed to parse; carries the full syntax diagnostics (every
315    /// error, each as a caret block with the keyword's correct syntax). The
316    /// source label is already inside the [`Diagnostics`] header.
317    #[error("{0}")]
318    Parse(elenchus_parser::Diagnostics),
319    /// A name was reused with a different body *within the same source*.
320    #[error("'{name}' redefined with a different body")]
321    PremiseRedefinition {
322        /// The clashing premise/rule name.
323        name: String,
324    },
325    /// A source did not declare its `DOMAIN` (required, once, as the first
326    /// statement).
327    #[error("{file}: missing a DOMAIN declaration (every file must start with `DOMAIN <name>`)")]
328    MissingDomain {
329        /// The source label that lacked a `DOMAIN`.
330        file: String,
331    },
332    /// A source declared `DOMAIN` more than once (a file has exactly one domain).
333    #[error("{file}: more than one DOMAIN declaration (a file has exactly one domain)")]
334    DuplicateDomain {
335        /// The source label with the duplicate `DOMAIN`.
336        file: String,
337    },
338    /// An atom referenced a `domain.` prefix that is not the file's own domain and
339    /// was not imported in this file.
340    #[error("unknown domain '{domain}' — declare it with DOMAIN, or IMPORT it in this file")]
341    UnknownDomain {
342        /// The unresolved domain prefix.
343        domain: String,
344    },
345    /// Two imports bound the same local domain name to different domains (use a
346    /// distinct `AS <alias>` to tell them apart).
347    #[error("domain name '{alias}' is bound to two different imports (disambiguate with AS)")]
348    DomainAliasClash {
349        /// The clashing local domain name.
350        alias: String,
351    },
352    /// An `IMPORT` target could not be loaded by the [`Resolver`].
353    #[error("import not found: {0}")]
354    ImportNotFound(String),
355    /// Imports form a cycle (a source transitively imports itself).
356    #[error("circular import: {0}")]
357    CircularImport(String),
358    /// A `RULE` used `OR` in its `THEN`: forward chaining cannot derive a
359    /// disjunction (it would not know which literal to assert). Model it as a
360    /// `PREMISE` constraint instead.
361    #[error("rule '{name}' cannot derive a disjunction (OR in THEN); use a PREMISE instead")]
362    RuleDisjunctiveConsequent {
363        /// The offending rule name.
364        name: String,
365    },
366    /// A reference used a value outside the closed set an `ONEOF` declared for that
367    /// variable. Almost always a typo: the misspelling would otherwise mint a new
368    /// atom that hangs in the air as UNKNOWN. Closed-world is opt-in — it only
369    /// applies to a `(subject, predicate)` whose values an `ONEOF` enumerated.
370    /// Boxed so this comparatively large payload does not bloat every `Result`.
371    #[error(transparent)]
372    UnknownValue(Box<UnknownValue>),
373    /// A `FOR EACH … IN <set>` named a set that was never declared with `SET`.
374    /// Usually a typo in the set name; the suggestion offers the nearest declared
375    /// set when one is close.
376    #[error("{file}:{line}: FOR EACH ranges over '{set}', which is not a declared SET{suggestion}")]
377    UnknownSet {
378        /// The source the offending `FOR EACH` is in.
379        file: String,
380        /// 1-based line of the `FOR EACH`.
381        line: u32,
382        /// The undeclared set name that was referenced.
383        set: String,
384        /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
385        suggestion: String,
386    },
387    /// `CLOSE <relation> TRANSITIVE` found a cycle: a node transitively reaches
388    /// itself. Transitive closure requires a DAG (e.g. a dependency graph).
389    #[error(
390        "{file}:{line}: relation '{relation}' has a cycle (`{node}` reaches itself) \
391         — CLOSE … TRANSITIVE requires a DAG"
392    )]
393    CyclicRelation {
394        /// The source the `CLOSE` is in.
395        file: String,
396        /// 1-based line of the `CLOSE`.
397        line: u32,
398        /// The relation predicate being closed.
399        relation: String,
400        /// A node on the cycle (reaches itself).
401        node: String,
402    },
403    /// A bare proposition (a single-word atom like `db_ready`) was used in the
404    /// program but never declared with `VAR`. Almost always a typo or a forgotten
405    /// declaration; the suggestion offers the nearest declared port when close.
406    #[error(
407        "{file}:{line}: '{name}' is a bare proposition but no VAR declares it \
408         — add `VAR {name}`{suggestion}"
409    )]
410    UndeclaredPort {
411        /// The source the offending reference is in.
412        file: String,
413        /// 1-based line of the offending reference.
414        line: u32,
415        /// The undeclared bare-proposition name.
416        name: String,
417        /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
418        suggestion: String,
419    },
420    /// An external value (`--set`, API, or `PROVIDE`) named a port that no `VAR`
421    /// declares. Strict by design: silently ignoring an unknown key would hide a
422    /// mistake.
423    #[error("no VAR declares the port '{name}' that an external value sets{suggestion}")]
424    UnknownPort {
425        /// The supplied key that matches no declared port.
426        name: String,
427        /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
428        suggestion: String,
429    },
430    /// An external value named a bare target declared in more than one domain, so
431    /// which one it sets is ambiguous. Resolve it by qualifying the key with a
432    /// `domain.` prefix.
433    #[error(
434        "'{name}' is declared in multiple domains ({domains}); qualify which one as `<domain>.{name}`"
435    )]
436    AmbiguousPort {
437        /// The ambiguous bare name (port or atom subject).
438        name: String,
439        /// The domains that declare this name, comma-joined (sorted).
440        domains: String,
441    },
442    /// A multi-word external value (`PROVIDE engine has_fuel: true` or a `--set`/API
443    /// key with a predicate) named an atom that no statement in the program uses.
444    /// Strict by design — like [`CompileError::UnknownPort`], a typo must not be
445    /// silently injected as a new fact.
446    #[error(
447        "no atom '{name}' is used in the program, so an external value cannot set it \
448         (use it in a FACT/PREMISE/RULE, or fix a typo)"
449    )]
450    UnknownExternalAtom {
451        /// The atom reference (e.g. `engine has_fuel`) that matched nothing.
452        name: String,
453    },
454    /// Two sources supplied different values for the same port. Ambiguity is a hard
455    /// error: the engine is about determinism, so it never silently picks one.
456    #[error(
457        "the port '{name}' is set to two different values: {a_value} (from {a_origin}) \
458         and {b_value} (from {b_origin})"
459    )]
460    PortConflict {
461        /// The port set inconsistently.
462        name: String,
463        /// The first binding's value.
464        a_value: bool,
465        /// The first binding's origin tag.
466        a_origin: String,
467        /// The second (conflicting) binding's value.
468        b_value: bool,
469        /// The second binding's origin tag.
470        b_origin: String,
471    },
472    /// A data file (loaded via `--data`) contained a statement other than `PROVIDE`
473    /// (or `DOMAIN`). Data files carry only values, never logic.
474    #[error("{file}:{line}: a data file may only contain PROVIDE (and DOMAIN), not this statement")]
475    DataFileStatement {
476        /// The data file with the offending statement.
477        file: String,
478        /// 1-based line of the offending statement.
479        line: u32,
480    },
481}
482
483/// Details of a closed-world violation (see [`CompileError::UnknownValue`]). Kept
484/// in its own (boxed) struct so the common error path stays small.
485#[derive(Debug, Error, PartialEq, Eq)]
486#[error(
487    "{file}:{line}: '{value}' is not a declared value of '{subject} {predicate}' \
488     — ONEOF declares {{ {declared} }}{suggestion}"
489)]
490pub struct UnknownValue {
491    /// The source the offending reference is in.
492    pub file: String,
493    /// 1-based line of the offending reference.
494    pub line: u32,
495    /// The variable's subject.
496    pub subject: String,
497    /// The variable's predicate.
498    pub predicate: String,
499    /// The out-of-set value that was used.
500    pub value: String,
501    /// The declared legal values, comma-joined (sorted).
502    pub declared: String,
503    /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
504    pub suggestion: String,
505}
506
507// --- raw (key-based) intermediate, before interning ------------------------
508// While accumulating we key everything by `AtomKey` (the owned triple) rather
509// than by `AtomId`, because ids only become stable once *all* sources are merged
510// and the atom set is sorted in `finalize`. These mirror the public IR types but
511// hold keys instead of ids.
512
513/// A literal keyed by atom identity (pre-interning counterpart of [`Lit`]).
514#[derive(Clone)]
515struct RawLit {
516    key: AtomKey,
517    negated: bool,
518}
519
520/// A fact keyed by atom identity (pre-interning counterpart of [`Fact`]).
521struct RawFact {
522    key: AtomKey,
523    value: Value,
524    origin: Origin,
525    soft: bool,
526}
527
528/// A clause keyed by atom identity (pre-interning counterpart of [`Clause`]).
529struct RawClause {
530    lits: Vec<RawLit>,
531    origin: Origin,
532}
533
534/// A rule keyed by atom identity (pre-interning counterpart of [`Rule`]).
535struct RawRule {
536    antecedent: Vec<RawLit>,
537    consequent: Vec<RawLit>,
538    origin: Origin,
539}
540
541/// A declared `VAR` port, keyed in the compiler by `(domain, name)`: its optional
542/// `DEFAULT` fallback and where it was declared (for provenance on the synthetic
543/// fact and the placeholders report).
544struct PortDecl {
545    default: Option<bool>,
546    source: String,
547    line: u32,
548}
549
550/// A parsed reference an external value binds: an optional canonical `domain`, a
551/// `subject`, and optional `predicate`/`object`. A lone subject (predicate `None`)
552/// names a `VAR` port; a multi-word ref names an atom an external value asserts.
553/// `domain == None` means "search every domain" (the historical bare-name match);
554/// `Some(d)` pins it to that domain. Built from a `PROVIDE` atom (alias resolved to
555/// a canonical domain) or parsed from a CLI/API key by [`parse_port_ref`].
556#[derive(Debug, Clone, PartialEq, Eq)]
557struct PortRef {
558    domain: Option<String>,
559    subject: String,
560    predicate: Option<String>,
561    object: Option<String>,
562}
563
564impl PortRef {
565    /// The human label used in error messages — the key the way it was written
566    /// (`engine has_fuel`, `self.has_vision`).
567    fn label(&self) -> String {
568        let mut s = String::new();
569        if let Some(d) = &self.domain {
570            s.push_str(d);
571            s.push('.');
572        }
573        s.push_str(&self.subject);
574        if let Some(p) = &self.predicate {
575            s.push(' ');
576            s.push_str(p);
577        }
578        if let Some(o) = &self.object {
579            s.push(' ');
580            s.push_str(o);
581        }
582        s
583    }
584}
585
586/// Parse a CLI/API key (`db_ready`, `self.has_vision`, `engine has_fuel`) into a
587/// [`PortRef`]. A leading `domain.` (a `.` inside the first whitespace token)
588/// becomes the domain; the rest splits on whitespace into `subject [predicate
589/// [object]]`. `.` is not an identifier character, so the split is unambiguous and
590/// backward-compatible with the bare names used before qualification existed.
591fn parse_port_ref(key: &str) -> PortRef {
592    let mut words = key.split_whitespace();
593    let first = words.next().unwrap_or("");
594    let (domain, subject) = match first.split_once('.') {
595        Some((d, s)) => (Some(d.to_string()), s.to_string()),
596        None => (None, first.to_string()),
597    };
598    PortRef {
599        domain,
600        subject,
601        predicate: words.next().map(str::to_string),
602        object: words.next().map(str::to_string),
603    }
604}
605
606/// The human label for a resolved [`AtomKey`] in port diagnostics
607/// (`domain.subject predicate object`).
608fn atomkey_label(k: &AtomKey) -> String {
609    let mut s = alloc::format!("{}.{}", k.domain, k.subject);
610    if let Some(p) = &k.predicate {
611        s.push(' ');
612        s.push_str(p);
613    }
614    if let Some(o) = &k.object {
615        s.push(' ');
616        s.push_str(o);
617    }
618    s
619}
620
621// --- compiler --------------------------------------------------------------
622
623/// Accumulates statements from one or more sources, then interns + emits the IR.
624#[derive(Default)]
625pub struct Compiler {
626    keys: BTreeSet<AtomKey>,
627    facts: Vec<RawFact>,
628    clauses: Vec<RawClause>,
629    rules: Vec<RawRule>,
630    checks: Vec<Check>,
631    pending_imports: Vec<String>,
632    /// (source, name) → content hash of its body, for redefinition detection.
633    /// Scoped per source: premise/rule names are labels, not global identifiers,
634    /// so different files (domains) may reuse a name. A clash is only an error
635    /// *within the same source*.
636    defined: BTreeMap<(String, String), String>,
637    /// dedup of identical clauses by canonical content hash.
638    clause_sigs: BTreeSet<String>,
639    /// dedup of identical facts by (key, value).
640    fact_sigs: BTreeSet<String>,
641    /// Closed-world value sets declared by `ONEOF`: `(domain, subject, predicate)`
642    /// → the set of legal objects. Once a variable's values are enumerated by a
643    /// `ONEOF`, a reference to that variable with an object *outside* the set is a
644    /// compile error (a likely typo), not a silent new atom. Only `ONEOF` members
645    /// that carry an object register here (binary atoms have no value slot to
646    /// close). See [`Compiler::validate_closed_world`].
647    oneof_values: BTreeMap<(String, String, String), BTreeSet<String>>,
648    /// Declared `SET <name>` collections: name → elements, used to ground a
649    /// `FOR EACH <binder> IN <name>` quantifier by instantiating the body once
650    /// per element. Populated in a pre-pass so a `FOR EACH` may reference a set
651    /// declared later in the file.
652    sets: BTreeMap<String, Vec<String>>,
653    /// Declared relation pairs: predicate → `(subject, object)` of every 3-part
654    /// `FACT`, used to ground a `FOR EACH <a> <predicate> <b>` quantifier. Also a
655    /// pre-pass, so the edges may be declared after the quantifier.
656    relations: BTreeMap<String, Vec<(String, String)>>,
657    /// Edge atoms consumed by a relation `FOR EACH` (e.g. each `a linked b`).
658    /// They are *read as data* by the quantifier, so they are not idle facts —
659    /// [`Compiler::finalize`] passes them to the report to suppress the ORPHAN
660    /// lint.
661    relation_consumed: BTreeSet<AtomKey>,
662    /// Declared `VAR` ports, keyed by `(domain, name)`. Collected as statements are
663    /// added (across every imported file), then resolved against external values in
664    /// [`Compiler::resolve_ports`]. The first declaration of a `(domain, name)`
665    /// wins; later duplicates are ignored.
666    ports: BTreeMap<(String, String), PortDecl>,
667    /// `PROVIDE` bindings seen in compiled sources (origin `"PROVIDE <file>"`).
668    /// Each target's `domain.` prefix is already resolved to a canonical domain.
669    /// They join the same conflict pool as external `--set`/API values in
670    /// [`Compiler::resolve_ports`].
671    provides: Vec<(PortRef, PortBinding)>,
672}
673
674impl Compiler {
675    /// A fresh, empty compiler.
676    pub fn new() -> Self {
677        Self::default()
678    }
679
680    /// Parse one source and accumulate its statements. `source` is a label used
681    /// in provenance (e.g. a file name or `"<root>"`). The source must declare its
682    /// `DOMAIN`; `IMPORT`s are recorded as pending (their domains cannot be bound
683    /// without a [`Resolver`]), so a single source may only reference its own
684    /// domain. Use [`compile`] for cross-domain references.
685    pub fn add_source(&mut self, source: &str, src: &str) -> Result<(), CompileError> {
686        let program = parse_tagged(source, src)?;
687        let domain = extract_domain(&program, source)?;
688        let mut aliases = BTreeMap::new();
689        aliases.insert(domain.clone(), domain.clone());
690        let ctx = DomainCtx {
691            current: domain,
692            aliases,
693        };
694        self.collect_decls(&program);
695        self.apply_closures(&program, source)?;
696        for stmt in &program.statements {
697            match stmt {
698                Statement::Domain(_) => {}
699                Statement::Import { path, .. } => {
700                    self.pending_imports.push(path.data.to_string());
701                }
702                other => self.add_statement(source, other, &ctx)?,
703            }
704        }
705        Ok(())
706    }
707
708    /// Apply every `CLOSE … TRANSITIVE`: replace the relation's pairs with their
709    /// transitive closure so a relation `FOR EACH` ranges over reachability, and
710    /// reject a cycle (a node reaching itself). A pure compile-time graph pass —
711    /// the solver never sees it. Runs after [`collect_decls`] (the direct edges
712    /// must be known) and before grounding.
713    fn apply_closures(
714        &mut self,
715        program: &elenchus_parser::Program,
716        source: &str,
717    ) -> Result<(), CompileError> {
718        for stmt in &program.statements {
719            if let Statement::Close { relation, kind } = stmt {
720                let CloseKind::Transitive = kind;
721                let pairs = self
722                    .relations
723                    .get(relation.data)
724                    .cloned()
725                    .unwrap_or_default();
726                let closed = transitive_closure(pairs);
727                if let Some((node, _)) = closed.iter().find(|(a, b)| a == b) {
728                    return Err(CompileError::CyclicRelation {
729                        file: source.to_string(),
730                        line: relation.span.location_line(),
731                        relation: relation.data.to_string(),
732                        node: node.clone(),
733                    });
734                }
735                self.relations.insert(relation.data.to_string(), closed);
736            }
737        }
738        Ok(())
739    }
740
741    /// Pre-pass: record every `SET` and every relation pair (3-part `FACT`) so a
742    /// `FOR EACH` may reference a set or relation declared anywhere in the same
743    /// source, including after the quantifier.
744    fn collect_decls(&mut self, program: &elenchus_parser::Program) {
745        for stmt in &program.statements {
746            match stmt {
747                Statement::Set { name, elements } => {
748                    self.sets.insert(
749                        name.data.to_string(),
750                        elements.iter().map(|e| e.data.to_string()).collect(),
751                    );
752                }
753                Statement::Fact(a) => {
754                    // Only a 3-part fact (`a rel b`) declares a relation pair; a bare
755                    // proposition or a 2-word fact has no object (hence no predicate
756                    // pair to record).
757                    if let (Some(pred), Some(obj)) = (a.data.predicate, a.data.object) {
758                        self.relations
759                            .entry(pred.to_string())
760                            .or_default()
761                            .push((a.data.subject.to_string(), obj.to_string()));
762                    }
763                }
764                _ => {}
765            }
766        }
767    }
768
769    /// Compile one already-resolved file's statements under its domain context.
770    fn add_resolved(&mut self, file: &ResolvedFile) -> Result<(), CompileError> {
771        let program = parse_tagged(&file.path, &file.content)?;
772        self.collect_decls(&program);
773        self.apply_closures(&program, &file.path)?;
774        for stmt in &program.statements {
775            match stmt {
776                Statement::Import { .. } | Statement::Domain(_) => {}
777                other => self.add_statement(&file.path, other, &file.ctx)?,
778            }
779        }
780        Ok(())
781    }
782
783    /// Route one statement (never `IMPORT`/`DOMAIN` — handled by the loaders) to
784    /// the right accumulator, resolving atom domains through `ctx`.
785    fn add_statement(
786        &mut self,
787        source: &str,
788        stmt: &Statement,
789        ctx: &DomainCtx,
790    ) -> Result<(), CompileError> {
791        match stmt {
792            // Handled by `add_source` / `load_recursive`, never reach here.
793            Statement::Import { .. } | Statement::Domain(_) => {}
794            Statement::Fact(a) => self.add_fact(source, a, Value::True, kw::FACT, false, ctx)?,
795            Statement::Negation(a) => {
796                self.add_fact(source, a, Value::False, kw::NOT, false, ctx)?
797            }
798            Statement::Assume(l) => {
799                let value = if l.data.negated {
800                    Value::False
801                } else {
802                    Value::True
803                };
804                // A soft assertion shares the FACT accumulator; the atom is the
805                // literal's atom, the polarity its `NOT`, and `soft` marks it
806                // retractable. The span is the whole `ASSUME` line.
807                let located = elenchus_parser::Located {
808                    data: l.data.atom.clone(),
809                    span: l.span,
810                };
811                self.add_fact(source, &located, value, kw::ASSUME, true, ctx)?;
812            }
813            Statement::Check {
814                subject,
815                bidirectional,
816            } => self.checks.push(Check {
817                subject: subject.as_ref().map(|s| s.data.to_string()),
818                bidirectional: *bidirectional,
819            }),
820            // Declared in the `collect_decls` / `apply_closures` pre-passes;
821            // nothing to emit here.
822            Statement::Set { .. } | Statement::Close { .. } => {}
823            // A port declaration: record it under this file's domain (the first
824            // declaration of a `(domain, name)` wins). It is resolved against
825            // external values later, in `resolve_ports`.
826            Statement::Var { name, default } => {
827                self.ports
828                    .entry((ctx.current.clone(), name.data.to_string()))
829                    .or_insert(PortDecl {
830                        default: *default,
831                        source: source.to_string(),
832                        line: name.span.location_line(),
833                    });
834            }
835            // A value binding: resolve the target's `domain.` prefix against this
836            // file's context (so an alias becomes a canonical domain; no prefix
837            // stays `None` = search every domain, the historical behaviour), then
838            // queue it into the conflict pool alongside any external values.
839            Statement::Provide { atom, value } => {
840                let a = &atom.data;
841                let domain = match a.domain {
842                    None => None,
843                    Some(p) => Some(ctx.resolve(Some(p))?),
844                };
845                self.provides.push((
846                    PortRef {
847                        domain,
848                        subject: a.subject.to_string(),
849                        predicate: a.predicate.map(|p| p.to_string()),
850                        object: a.object.map(|o| o.to_string()),
851                    },
852                    PortBinding {
853                        value: *value,
854                        origin: alloc::format!("PROVIDE {source}"),
855                    },
856                ));
857            }
858            Statement::Premise { name, quant, body } => {
859                self.add_named(source, name, quant.as_ref(), body, false, ctx)?;
860            }
861            Statement::Rule { name, quant, body } => {
862                self.add_named(source, name, quant.as_ref(), body, true, ctx)?;
863            }
864        }
865        Ok(())
866    }
867
868    /// Record an atom identity in the shared universe (deduped by the `BTreeSet`).
869    fn intern(&mut self, key: &AtomKey) {
870        if !self.keys.contains(key) {
871            self.keys.insert(key.clone());
872        }
873    }
874
875    /// Accumulate a `FACT`/`NOT`; exact duplicates (same key+value+kind) are
876    /// dropped as idempotent, while a `FACT` and a `NOT` on the same atom are
877    /// both kept so the solver can report the CONFLICT.
878    fn add_fact(
879        &mut self,
880        source: &str,
881        a: &elenchus_parser::Located<Atom>,
882        value: Value,
883        kind: &'static str,
884        soft: bool,
885        ctx: &DomainCtx,
886    ) -> Result<(), CompileError> {
887        let key = ctx.key(&a.data)?;
888        self.intern(&key);
889        let sig = alloc::format!(
890            "{}|{}|{}|{}",
891            key_sig(&key),
892            matches!(value, Value::True) as u8,
893            kind,
894            "" // facts dedup ignores line; identical FACT twice is idempotent
895        );
896        if !self.fact_sigs.insert(sig) {
897            return Ok(()); // exact duplicate fact — idempotent
898        }
899        self.facts.push(RawFact {
900            key,
901            value,
902            origin: Origin {
903                source: source.to_string(),
904                line: a.span.location_line(),
905                premise: None,
906                kind,
907            },
908            soft,
909        });
910        Ok(())
911    }
912
913    /// Handle a named construct (`PREMISE` or `RULE`). `is_rule` selects derivation
914    /// vs checking. Returns an error on redefinition with a different body.
915    fn add_named(
916        &mut self,
917        source: &str,
918        name: &Located<&str>,
919        quant: Option<&Quant>,
920        body: &Body,
921        is_rule: bool,
922        ctx: &DomainCtx,
923    ) -> Result<(), CompileError> {
924        let line = name.span.location_line();
925        let name = name.data;
926        // The redefinition hash covers the quantifier too, so two same-named
927        // premises that differ only in their `FOR EACH` are still a redefinition.
928        let mut canon = canonical_body(name, body, is_rule, ctx)?;
929        if let Some(q) = quant {
930            canon.push_str(&quant_sig(q));
931        }
932        let body_hash = hash_hex(canon.as_bytes());
933        let key = (source.to_string(), name.to_string());
934        match self.defined.get(&key) {
935            Some(prev) if *prev == body_hash => return Ok(()), // identical → idempotent
936            Some(_) => {
937                // Same name + different body *in the same source* — a real mistake.
938                return Err(CompileError::PremiseRedefinition {
939                    name: name.to_string(),
940                });
941            }
942            None => {
943                self.defined.insert(key, body_hash);
944            }
945        }
946
947        match quant {
948            // Unquantified: emit the body once, as before.
949            None => self.emit_named(source, name, line, body, is_rule, ctx),
950            // `FOR EACH <binder> IN <set>`: instantiate the body once per element,
951            // substituting the binder. Grounding is exactly `|set|` repetitions of
952            // the *same* desugar — linear, never a domain product (a second binder
953            // is unrepresentable in the grammar).
954            Some(Quant::InSet { binder, set }) => {
955                let elements = match self.sets.get(set.data) {
956                    Some(els) => els.clone(),
957                    None => {
958                        return Err(CompileError::UnknownSet {
959                            file: source.to_string(),
960                            line: set.span.location_line(),
961                            set: set.data.to_string(),
962                            suggestion: nearest_set_suggestion(set.data, &self.sets),
963                        });
964                    }
965                };
966                for el in &elements {
967                    let grounded = subst_body(body, &[(binder.data, el)]);
968                    self.emit_named(source, name, line, &grounded, is_rule, ctx)?;
969                }
970                Ok(())
971            }
972            // `FOR EACH <a> <relation> <b>`: instantiate the body once per declared
973            // FACT pair of that relation, binding `a`→subject, `b`→object. The pair
974            // is pinned by data, so this is linear in the number of facts — never a
975            // product of the domain with itself.
976            Some(Quant::Relation {
977                left,
978                predicate,
979                right,
980            }) => {
981                let pairs = self
982                    .relations
983                    .get(predicate.data)
984                    .cloned()
985                    .unwrap_or_default();
986                for (subj, obj) in &pairs {
987                    let grounded = subst_body(body, &[(left.data, subj), (right.data, obj)]);
988                    self.emit_named(source, name, line, &grounded, is_rule, ctx)?;
989                    // The edge atom is read as data by the quantifier, not idle —
990                    // record it so the ORPHAN lint does not flag it.
991                    if let Ok(k) = ctx.key(&Atom {
992                        domain: None,
993                        subject: subj,
994                        predicate: Some(predicate.data),
995                        object: Some(obj),
996                    }) {
997                        self.relation_consumed.insert(k);
998                    }
999                }
1000                Ok(())
1001            }
1002        }
1003    }
1004
1005    /// Emit the clauses/rule for one (already-grounded) named construct's body.
1006    /// Split out of [`Compiler::add_named`] so a `FOR EACH` can call it once per
1007    /// element with the binder substituted, reusing the exact same desugar.
1008    fn emit_named(
1009        &mut self,
1010        source: &str,
1011        name: &str,
1012        line: u32,
1013        body: &Body,
1014        is_rule: bool,
1015        ctx: &DomainCtx,
1016    ) -> Result<(), CompileError> {
1017        if is_rule {
1018            // RULE always has an implication body (guaranteed by the grammar).
1019            if let Body::Impl {
1020                antecedent,
1021                ante_conn,
1022                consequent,
1023                cons_conn,
1024            } = body
1025            {
1026                // A rule *derives* its consequent; an `OR` consequent is not a
1027                // single fact to assert, so reject it (use a PREMISE instead).
1028                if *cons_conn == Conn::Or {
1029                    return Err(CompileError::RuleDisjunctiveConsequent {
1030                        name: name.to_string(),
1031                    });
1032                }
1033                let (ante, cons) = (raw_lits(antecedent, ctx)?, raw_lits(consequent, ctx)?);
1034                for l in ante.iter().chain(cons.iter()) {
1035                    self.intern(&l.key);
1036                }
1037                let origin = self.origin(source, line, Some(name), kw::RULE);
1038                match ante_conn {
1039                    // a ∧ b → C : one rule firing on the whole antecedent.
1040                    Conn::And => self.rules.push(RawRule {
1041                        antecedent: ante,
1042                        consequent: cons,
1043                        origin,
1044                    }),
1045                    // (a ∨ b) → C == (a → C) ∧ (b → C): one rule per antecedent.
1046                    Conn::Or => {
1047                        for a in &ante {
1048                            self.rules.push(RawRule {
1049                                antecedent: vec![a.clone()],
1050                                consequent: cons.clone(),
1051                                origin: origin.clone(),
1052                            });
1053                        }
1054                    }
1055                }
1056            }
1057            return Ok(());
1058        }
1059
1060        match body {
1061            Body::List { op, atoms } => {
1062                let keys: Vec<AtomKey> = atoms
1063                    .iter()
1064                    .map(|a| ctx.key(&a.data))
1065                    .collect::<Result<_, _>>()?;
1066                for k in &keys {
1067                    self.intern(k);
1068                }
1069                let kind = list_kind(*op);
1070                let origin = self.origin(source, line, Some(name), kind);
1071                match op {
1072                    // EXCLUSIVE / FORBIDS: "at most one" → pairwise Impossible([a_i, a_j]).
1073                    ListOp::Exclusive | ListOp::Forbids => {
1074                        self.emit_pairwise(&keys, &origin);
1075                    }
1076                    // ONEOF: pairwise (at most one) + at-least-one. A ONEOF also
1077                    // *closes* each of its variables: record every member's object
1078                    // as a legal value of `(domain, subject, predicate)` so a later
1079                    // out-of-set reference is caught as a typo (closed-world).
1080                    ListOp::OneOf => {
1081                        self.emit_pairwise(&keys, &origin);
1082                        self.emit_at_least_one(&keys, &origin);
1083                        for k in &keys {
1084                            if let (Some(pred), Some(obj)) = (&k.predicate, &k.object) {
1085                                self.oneof_values
1086                                    .entry((k.domain.clone(), k.subject.clone(), pred.clone()))
1087                                    .or_default()
1088                                    .insert(obj.clone());
1089                            }
1090                        }
1091                    }
1092                    // ATLEAST: Impossible([NOT a_1, …, NOT a_n]).
1093                    ListOp::AtLeast => {
1094                        self.emit_at_least_one(&keys, &origin);
1095                    }
1096                }
1097            }
1098            Body::Impl {
1099                antecedent,
1100                ante_conn,
1101                consequent,
1102                cons_conn,
1103            } => {
1104                // Implication A → C as `Impossible(A_true ∧ ¬C)`. We group each
1105                // side by its connective and emit one clause per (ante × cons)
1106                // group pair — a uniform rule covering all AND/OR combinations:
1107                //   AND-ante → all its literals share every clause;
1108                //   OR-ante  → one clause per literal;
1109                //   AND-cons → one clause per (negated) literal;
1110                //   OR-cons  → all its (negated) literals share every clause.
1111                let ante = raw_lits(antecedent, ctx)?;
1112                let cons = raw_lits(consequent, ctx)?;
1113                for l in ante.iter().chain(cons.iter()) {
1114                    self.intern(&l.key);
1115                }
1116                let origin = self.origin(source, line, Some(name), kw::PREMISE);
1117
1118                let ante_groups: Vec<Vec<RawLit>> = match ante_conn {
1119                    Conn::And => vec![ante.clone()],
1120                    Conn::Or => ante.iter().map(|l| vec![l.clone()]).collect(),
1121                };
1122                let cons_groups: Vec<Vec<RawLit>> = match cons_conn {
1123                    Conn::And => cons.iter().map(|l| vec![l.clone()]).collect(),
1124                    Conn::Or => vec![cons.clone()],
1125                };
1126                for ag in &ante_groups {
1127                    for cg in &cons_groups {
1128                        let mut lits = ag.clone();
1129                        for c in cg {
1130                            lits.push(RawLit {
1131                                key: c.key.clone(),
1132                                negated: !c.negated,
1133                            });
1134                        }
1135                        self.push_clause(lits, origin.clone());
1136                    }
1137                }
1138            }
1139        }
1140        Ok(())
1141    }
1142
1143    /// Emit "at most one TRUE" as one `Impossible([a_i, a_j])` per unordered
1144    /// pair. Pairwise (not a single big clause) because `Impossible([a,b,c])`
1145    /// only forbids *all three* together — it would still allow two.
1146    fn emit_pairwise(&mut self, keys: &[AtomKey], origin: &Origin) {
1147        for i in 0..keys.len() {
1148            for j in (i + 1)..keys.len() {
1149                let lits = vec![
1150                    RawLit {
1151                        key: keys[i].clone(),
1152                        negated: false,
1153                    },
1154                    RawLit {
1155                        key: keys[j].clone(),
1156                        negated: false,
1157                    },
1158                ];
1159                self.push_clause(lits, origin.clone());
1160            }
1161        }
1162    }
1163
1164    /// Emit "at least one TRUE" as a single `Impossible([NOT a_1, …, NOT a_n])`
1165    /// — it is impossible for all of them to be false at once.
1166    fn emit_at_least_one(&mut self, keys: &[AtomKey], origin: &Origin) {
1167        let lits = keys
1168            .iter()
1169            .map(|k| RawLit {
1170                key: k.clone(),
1171                negated: true,
1172            })
1173            .collect();
1174        self.push_clause(lits, origin.clone());
1175    }
1176
1177    /// Append a clause unless an identical one (by canonical [`clause_sig`]) is
1178    /// already present — `P ∧ P ≡ P`, so dedup keeps the IR minimal.
1179    fn push_clause(&mut self, lits: Vec<RawLit>, origin: Origin) {
1180        let sig = clause_sig(&lits);
1181        if self.clause_sigs.insert(sig) {
1182            self.clauses.push(RawClause { lits, origin });
1183        }
1184        // else: identical clause already present — idempotent.
1185    }
1186
1187    /// Build an [`Origin`] for provenance from the current source/line/name.
1188    fn origin(&self, source: &str, line: u32, premise: Option<&str>, kind: &'static str) -> Origin {
1189        Origin {
1190            source: source.to_string(),
1191            line,
1192            premise: premise.map(|s| s.to_string()),
1193            kind,
1194        }
1195    }
1196
1197    /// Closed-world check: once an `ONEOF` enumerates a variable's values, any
1198    /// reference to that `(domain, subject, predicate)` with an object outside the
1199    /// declared set is rejected as a likely typo — instead of silently minting a
1200    /// new atom that then "hangs in the air" as an UNKNOWN. Reports the earliest
1201    /// (by source, line) offender, with a `did you mean` suggestion when a declared
1202    /// value is within edit distance. Must run after all sources are accumulated
1203    /// (a `ONEOF` may follow the reference) and before [`finalize`].
1204    fn validate_closed_world(&self) -> Result<(), CompileError> {
1205        if self.oneof_values.is_empty() {
1206            return Ok(());
1207        }
1208        // Every atom reference reachable from a fact, clause, or rule, with the
1209        // line it came from. ONEOF members appear here too (as clause literals) but
1210        // are in-set by construction, so they never trip the check. `out_of_set`
1211        // tests one key against its variable's declared values.
1212        let out_of_set = |key: &AtomKey| -> bool {
1213            // A bare proposition (predicate `None`) has no object, so it never trips
1214            // closed-world; only `(predicate, object)` atoms are checked.
1215            match (key.predicate.as_ref(), key.object.as_ref()) {
1216                (Some(pred), Some(obj)) => self
1217                    .oneof_values
1218                    .get(&(key.domain.clone(), key.subject.clone(), pred.clone()))
1219                    .is_some_and(|set| !set.contains(obj)),
1220                _ => false,
1221            }
1222        };
1223        let mut offenders: Vec<(&str, u32, &AtomKey)> = Vec::new();
1224        for f in &self.facts {
1225            if out_of_set(&f.key) {
1226                offenders.push((&f.origin.source, f.origin.line, &f.key));
1227            }
1228        }
1229        for c in &self.clauses {
1230            for l in &c.lits {
1231                if out_of_set(&l.key) {
1232                    offenders.push((&c.origin.source, c.origin.line, &l.key));
1233                }
1234            }
1235        }
1236        for r in &self.rules {
1237            for l in r.antecedent.iter().chain(r.consequent.iter()) {
1238                if out_of_set(&l.key) {
1239                    offenders.push((&r.origin.source, r.origin.line, &l.key));
1240                }
1241            }
1242        }
1243        // Earliest offender wins, for a stable, source-ordered diagnostic.
1244        let Some(&(source, line, key)) = offenders.iter().min_by(|a, b| {
1245            (a.0, a.1, &a.2.subject, &a.2.object).cmp(&(b.0, b.1, &b.2.subject, &b.2.object))
1246        }) else {
1247            return Ok(());
1248        };
1249        // The offender tripped `out_of_set`, so its predicate is `Some`.
1250        let pred = key.predicate.clone().unwrap_or_default();
1251        let set = &self.oneof_values[&(key.domain.clone(), key.subject.clone(), pred.clone())];
1252        let declared: Vec<&str> = set.iter().map(|s| s.as_str()).collect(); // BTreeSet → sorted
1253        let value = key.object.clone().unwrap_or_default();
1254        let suggestion = did_you_mean(&value, &declared);
1255        Err(CompileError::UnknownValue(Box::new(UnknownValue {
1256            file: source.to_string(),
1257            line,
1258            subject: key.subject.clone(),
1259            predicate: pred,
1260            value,
1261            declared: declared.join(", "),
1262            suggestion,
1263        })))
1264    }
1265
1266    /// Resolve every declared `VAR` port against the external `inputs`, validate
1267    /// that every used bare proposition is declared, and push a synthetic hard fact
1268    /// for each resolved port. Returns the per-port report records (the PLACEHOLDERS
1269    /// section). Run after the add-statement loop (all ports declared, all bare
1270    /// props interned) and before [`finalize`].
1271    ///
1272    /// Every ambiguity is a hard error, by design (the engine is deterministic):
1273    /// - a used bare proposition with no `VAR` → [`CompileError::UndeclaredPort`];
1274    /// - an external key naming no declared port → [`CompileError::UnknownPort`];
1275    /// - an external key whose bare name is declared in >1 domain → [`CompileError::AmbiguousPort`];
1276    /// - two bindings disagreeing on one key → [`CompileError::PortConflict`].
1277    ///
1278    /// A resolved port becomes a hard fact (observationally equal to `FACT name` /
1279    /// `NOT name`); a port with neither value nor `DEFAULT` stays UNKNOWN (no fact).
1280    fn resolve_ports(
1281        &mut self,
1282        inputs: &[(String, PortBinding)],
1283    ) -> Result<Vec<PlaceholderInfo>, CompileError> {
1284        // (1) Every *used* bare proposition (in a fact, clause, or rule) must be a
1285        // declared port. Reported with the earliest source/line, like closed-world.
1286        {
1287            let bad = |k: &AtomKey| {
1288                k.predicate.is_none()
1289                    && !self
1290                        .ports
1291                        .contains_key(&(k.domain.clone(), k.subject.clone()))
1292            };
1293            let mut offenders: Vec<(&str, u32, &str)> = Vec::new();
1294            for f in &self.facts {
1295                if bad(&f.key) {
1296                    offenders.push((&f.origin.source, f.origin.line, &f.key.subject));
1297                }
1298            }
1299            for c in &self.clauses {
1300                for l in &c.lits {
1301                    if bad(&l.key) {
1302                        offenders.push((&c.origin.source, c.origin.line, &l.key.subject));
1303                    }
1304                }
1305            }
1306            for r in &self.rules {
1307                for l in r.antecedent.iter().chain(r.consequent.iter()) {
1308                    if bad(&l.key) {
1309                        offenders.push((&r.origin.source, r.origin.line, &l.key.subject));
1310                    }
1311                }
1312            }
1313            if let Some(&(source, line, name)) = offenders
1314                .iter()
1315                .min_by(|a, b| (a.0, a.1, a.2).cmp(&(b.0, b.1, b.2)))
1316            {
1317                let names: Vec<&str> = self.ports.keys().map(|(_, n)| n.as_str()).collect();
1318                return Err(CompileError::UndeclaredPort {
1319                    file: source.to_string(),
1320                    line,
1321                    name: name.to_string(),
1322                    suggestion: did_you_mean(name, &names),
1323                });
1324            }
1325        }
1326
1327        // (2) Resolve every binding's ref (in-file `PROVIDE`s plus external values)
1328        // to a canonical `AtomKey`, then merge by that key — so a bare and a
1329        // `domain.`-qualified key for the same target meet in one pool. Two
1330        // disagreeing values for one resolved key conflict.
1331        let external = inputs.iter().map(|(k, b)| (parse_port_ref(k), b.clone()));
1332        let bindings: Vec<(PortRef, PortBinding)> =
1333            self.provides.iter().cloned().chain(external).collect();
1334        let mut merged: BTreeMap<AtomKey, PortBinding> = BTreeMap::new();
1335        for (rf, b) in bindings {
1336            let key = self.resolve_port_ref(&rf)?;
1337            match merged.get(&key) {
1338                Some(prev) if prev.value != b.value => {
1339                    return Err(CompileError::PortConflict {
1340                        name: atomkey_label(&key),
1341                        a_value: prev.value,
1342                        a_origin: prev.origin.clone(),
1343                        b_value: b.value,
1344                        b_origin: b.origin.clone(),
1345                    });
1346                }
1347                _ => {
1348                    merged.entry(key).or_insert(b);
1349                }
1350            }
1351        }
1352
1353        // (3) Resolve each declared port: supplied > DEFAULT > unset (UNKNOWN). The
1354        // placeholder key is bare unless the name collides across domains, in which
1355        // case it is qualified so the PLACEHOLDERS section stays unambiguous.
1356        let mut name_counts: BTreeMap<&str, usize> = BTreeMap::new();
1357        for (_, n) in self.ports.keys() {
1358            *name_counts.entry(n.as_str()).or_default() += 1;
1359        }
1360        let mut placeholders: Vec<PlaceholderInfo> = Vec::new();
1361        let mut interns: Vec<AtomKey> = Vec::new();
1362        let mut synth: Vec<(AtomKey, bool, String, u32)> = Vec::new();
1363        for ((domain, name), decl) in &self.ports {
1364            let key = AtomKey {
1365                domain: domain.clone(),
1366                subject: name.clone(),
1367                predicate: None,
1368                object: None,
1369            };
1370            interns.push(key.clone());
1371            let (status, value, origin) = match merged.get(&key) {
1372                Some(b) => (
1373                    PlaceholderStatus::Supplied,
1374                    Some(b.value),
1375                    Some(b.origin.clone()),
1376                ),
1377                None => match decl.default {
1378                    Some(v) => (PlaceholderStatus::DefaultUsed, Some(v), None),
1379                    None => (PlaceholderStatus::Unset, None, None),
1380                },
1381            };
1382            if let Some(v) = value {
1383                synth.push((key, v, decl.source.clone(), decl.line));
1384            }
1385            let label = if name_counts.get(name.as_str()).copied().unwrap_or(0) > 1 {
1386                alloc::format!("{domain}.{name}")
1387            } else {
1388                name.clone()
1389            };
1390            placeholders.push(PlaceholderInfo {
1391                key: label,
1392                status,
1393                value,
1394                origin,
1395            });
1396        }
1397
1398        // (4) Intern every declared port (so it appears even if unused), keep it out
1399        // of the ORPHAN lint, and assert a hard fact for each resolved one.
1400        for key in interns {
1401            self.intern(&key);
1402            self.relation_consumed.insert(key);
1403        }
1404        for (key, value, source, line) in synth {
1405            self.facts.push(RawFact {
1406                key,
1407                value: if value { Value::True } else { Value::False },
1408                origin: Origin {
1409                    source,
1410                    line,
1411                    premise: None,
1412                    kind: kw::VAR,
1413                },
1414                soft: false,
1415            });
1416        }
1417
1418        // (5) A multi-word binding asserts an *atom* (not a VAR port) — inject it as
1419        // a hard fact, tagged with its supply origin. The atom is already interned
1420        // (it must occur in the program; `resolve_port_ref` rejects unknown ones).
1421        for (key, b) in &merged {
1422            if key.predicate.is_some() {
1423                self.facts.push(RawFact {
1424                    key: key.clone(),
1425                    value: if b.value { Value::True } else { Value::False },
1426                    origin: Origin {
1427                        source: b.origin.clone(),
1428                        line: 0,
1429                        premise: None,
1430                        kind: kw::PROVIDE,
1431                    },
1432                    soft: false,
1433                });
1434            }
1435        }
1436
1437        Ok(placeholders)
1438    }
1439
1440    /// Resolve one external [`PortRef`] to the canonical [`AtomKey`] it binds.
1441    /// A single-word ref (`predicate == None`) must match a declared `VAR` port; a
1442    /// multi-word ref must match an atom the program already uses. A `domain.`
1443    /// prefix pins the domain; without one, a name found in more than one domain is
1444    /// [`CompileError::AmbiguousPort`] (resolve by qualifying it).
1445    fn resolve_port_ref(&self, rf: &PortRef) -> Result<AtomKey, CompileError> {
1446        if rf.predicate.is_none() {
1447            let domains: Vec<&str> = self
1448                .ports
1449                .keys()
1450                .filter(|k| k.1 == rf.subject && rf.domain.as_deref().is_none_or(|d| k.0 == d))
1451                .map(|k| k.0.as_str())
1452                .collect();
1453            match domains.as_slice() {
1454                [] => {
1455                    let names: Vec<&str> = self.ports.keys().map(|(_, n)| n.as_str()).collect();
1456                    Err(CompileError::UnknownPort {
1457                        name: rf.label(),
1458                        suggestion: did_you_mean(&rf.subject, &names),
1459                    })
1460                }
1461                [d] => Ok(AtomKey {
1462                    domain: (*d).to_string(),
1463                    subject: rf.subject.clone(),
1464                    predicate: None,
1465                    object: None,
1466                }),
1467                many => Err(CompileError::AmbiguousPort {
1468                    name: rf.subject.clone(),
1469                    domains: many.join(", "),
1470                }),
1471            }
1472        } else {
1473            let cands: Vec<&AtomKey> = self
1474                .keys
1475                .iter()
1476                .filter(|k| {
1477                    k.subject == rf.subject
1478                        && k.predicate.as_deref() == rf.predicate.as_deref()
1479                        && k.object.as_deref() == rf.object.as_deref()
1480                        && rf.domain.as_deref().is_none_or(|d| k.domain == d)
1481                })
1482                .collect();
1483            match cands.as_slice() {
1484                [] => Err(CompileError::UnknownExternalAtom { name: rf.label() }),
1485                [k] => Ok((*k).clone()),
1486                many => Err(CompileError::AmbiguousPort {
1487                    name: rf.label(),
1488                    domains: many
1489                        .iter()
1490                        .map(|k| k.domain.as_str())
1491                        .collect::<Vec<_>>()
1492                        .join(", "),
1493                }),
1494            }
1495        }
1496    }
1497
1498    /// Intern all atoms (canonical sort), then lower the raw IR to ids.
1499    pub fn finalize(self) -> Compiled {
1500        let atoms: Vec<AtomKey> = self.keys.into_iter().collect(); // BTreeSet → sorted
1501        let mut id_of: BTreeMap<AtomKey, AtomId> = BTreeMap::new();
1502        for (i, k) in atoms.iter().enumerate() {
1503            id_of.insert(k.clone(), i as AtomId);
1504        }
1505        let lower = |l: &RawLit| Lit {
1506            atom: id_of[&l.key],
1507            negated: l.negated,
1508        };
1509
1510        let facts = self
1511            .facts
1512            .into_iter()
1513            .map(|f| Fact {
1514                atom: id_of[&f.key],
1515                value: f.value,
1516                origin: f.origin,
1517                soft: f.soft,
1518            })
1519            .collect();
1520        let clauses = self
1521            .clauses
1522            .into_iter()
1523            .map(|c| Clause {
1524                lits: c.lits.iter().map(lower).collect(),
1525                origin: c.origin,
1526            })
1527            .collect();
1528        let rules = self
1529            .rules
1530            .into_iter()
1531            .map(|r| Rule {
1532                antecedent: r.antecedent.iter().map(lower).collect(),
1533                consequent: r.consequent.iter().map(lower).collect(),
1534                origin: r.origin,
1535            })
1536            .collect();
1537
1538        let consumed = self
1539            .relation_consumed
1540            .iter()
1541            .filter_map(|k| id_of.get(k).copied())
1542            .collect();
1543
1544        Compiled {
1545            atoms,
1546            facts,
1547            clauses,
1548            rules,
1549            checks: self.checks,
1550            pending_imports: self.pending_imports,
1551            unused_imports: Vec::new(), // filled by `compile` (advisory, post-resolution)
1552            consumed,
1553            placeholders: Vec::new(), // filled by `*_with` after `resolve_ports`
1554        }
1555    }
1556}
1557
1558/// Parse a data-only `.vrf` source and extract its `PROVIDE` bindings as
1559/// `(name, value)` pairs. A data file carries only values: any statement other
1560/// than `PROVIDE` (or a `DOMAIN`) is a [`CompileError::DataFileStatement`]. Used to
1561/// load a `--data <file>` of port values without compiling it as a program.
1562pub fn read_data_source(file: &str, src: &str) -> Result<Vec<(String, bool)>, CompileError> {
1563    let program = parse_tagged(file, src)?;
1564    let mut out = Vec::new();
1565    for stmt in &program.statements {
1566        match stmt {
1567            Statement::Provide { atom, value } => {
1568                // Serialize the target into a key string (`[domain.]subject[
1569                // predicate[ object]]`) that `parse_port_ref` re-parses uniformly,
1570                // so a data file and a `--set` key share one resolution path.
1571                let a = &atom.data;
1572                let mut key = String::new();
1573                if let Some(d) = a.domain {
1574                    key.push_str(d);
1575                    key.push('.');
1576                }
1577                key.push_str(a.subject);
1578                if let Some(p) = a.predicate {
1579                    key.push(' ');
1580                    key.push_str(p);
1581                }
1582                if let Some(o) = a.object {
1583                    key.push(' ');
1584                    key.push_str(o);
1585                }
1586                out.push((key, *value));
1587            }
1588            Statement::Domain(_) => {}
1589            other => {
1590                return Err(CompileError::DataFileStatement {
1591                    file: file.to_string(),
1592                    line: statement_line(other),
1593                });
1594            }
1595        }
1596    }
1597    Ok(out)
1598}
1599
1600/// Parse a data-only `.vrf` source into ready-to-merge port [`PortBinding`]s, each
1601/// tagged with origin `data:<file>`. The shared bridge every surface uses to turn a
1602/// `--data` / data-map source into engine inputs, so a data file behaves identically
1603/// whether it arrives from the CLI, wasm, or MCP.
1604pub fn read_data_bindings(
1605    file: &str,
1606    src: &str,
1607) -> Result<Vec<(String, PortBinding)>, CompileError> {
1608    Ok(read_data_source(file, src)?
1609        .into_iter()
1610        .map(|(name, value)| {
1611            (
1612                name,
1613                PortBinding {
1614                    value,
1615                    origin: alloc::format!("data:{file}"),
1616                },
1617            )
1618        })
1619        .collect())
1620}
1621
1622/// The 1-based source line a statement begins on (for diagnostics).
1623fn statement_line(s: &Statement) -> u32 {
1624    match s {
1625        Statement::Domain(n) => n.span.location_line(),
1626        Statement::Import { path, .. } => path.span.location_line(),
1627        Statement::Fact(a) | Statement::Negation(a) => a.span.location_line(),
1628        Statement::Assume(l) => l.span.location_line(),
1629        Statement::Set { name, .. } => name.span.location_line(),
1630        Statement::Close { relation, .. } => relation.span.location_line(),
1631        Statement::Var { name, .. } => name.span.location_line(),
1632        Statement::Provide { atom, .. } => atom.span.location_line(),
1633        Statement::Premise { name, .. } | Statement::Rule { name, .. } => name.span.location_line(),
1634        Statement::Check { subject, .. } => subject
1635            .as_ref()
1636            .map(|s| s.span.location_line())
1637            .unwrap_or(0),
1638    }
1639}
1640
1641/// Convenience: compile a single source into the IR. `IMPORT`s are recorded as
1642/// pending, not resolved (use [`compile`] with a [`Resolver`] to resolve them).
1643pub fn compile_source(source: &str, src: &str) -> Result<Compiled, CompileError> {
1644    compile_source_with(source, src, &[])
1645}
1646
1647/// Like [`compile_source`], but resolving declared `VAR` ports against external
1648/// `inputs` (`(name, binding)` pairs from CLI / API / data). The `Compiled` carries
1649/// a `placeholders` record per declared port.
1650pub fn compile_source_with(
1651    source: &str,
1652    src: &str,
1653    inputs: &[(String, PortBinding)],
1654) -> Result<Compiled, CompileError> {
1655    let mut c = Compiler::new();
1656    c.add_source(source, src)?;
1657    c.validate_closed_world()?;
1658    let placeholders = c.resolve_ports(inputs)?;
1659    let mut compiled = c.finalize();
1660    compiled.placeholders = placeholders;
1661    Ok(compiled)
1662}
1663
1664// --- import resolution (source-agnostic) -----------------------------------
1665
1666/// Resolves `IMPORT "path"` to source text. The engine is source-agnostic: it
1667/// consumes strings, so a file is merely one backing store. Mirrors
1668/// vsm-grammar's `SourceResolver`.
1669pub trait Resolver {
1670    /// Load the raw source text for a resolved path.
1671    fn load(&self, path: &str) -> Result<String, CompileError>;
1672
1673    /// Normalize `relative` against the importing source `base`.
1674    /// Default: paths are absolute names, returned unchanged.
1675    fn resolve(&self, _base: &str, relative: &str) -> String {
1676        relative.to_string()
1677    }
1678}
1679
1680/// An in-memory resolver: serves sources from a name → content map. Pure
1681/// `no_std`. Mirrors vsm-grammar's `MemoryResolver`.
1682#[derive(Default)]
1683pub struct MemoryResolver {
1684    sources: BTreeMap<String, String>,
1685}
1686
1687impl MemoryResolver {
1688    /// An empty resolver with no sources.
1689    pub fn new() -> Self {
1690        Self::default()
1691    }
1692
1693    /// Register `content` under `path`; returns `&mut self` for chaining.
1694    pub fn add(&mut self, path: &str, content: &str) -> &mut Self {
1695        self.sources.insert(path.to_string(), content.to_string());
1696        self
1697    }
1698}
1699
1700/// Normalize an `IMPORT` path **identically on every OS and every surface**: treat
1701/// both `/` and `\` as separators, resolve `relative` against `base`'s directory,
1702/// collapse `.` / `..`, and re-join with `/`. Pure and `no_std`, so it does not
1703/// depend on the host (or the compile target's) path semantics — a Windows-style
1704/// and a Unix-style import resolve to the same virtual path whether the resolver is
1705/// filesystem-, JS-callback-, or in-memory-backed. The single source of truth all
1706/// three [`Resolver`]s share.
1707pub fn normalize_import_path(base: &str, relative: &str) -> String {
1708    fn is_sep(c: char) -> bool {
1709        c == '/' || c == '\\'
1710    }
1711    fn push<'a>(parts: &mut Vec<&'a str>, seg: &'a str) {
1712        match seg {
1713            "" | "." => {}
1714            ".." => {
1715                parts.pop();
1716            }
1717            _ => parts.push(seg),
1718        }
1719    }
1720    let mut absolute = base.starts_with(is_sep);
1721    let mut parts: Vec<&str> = Vec::new();
1722    // `base`'s directory = every segment but the last (the importing file's name).
1723    let base_segs: Vec<&str> = base.split(is_sep).collect();
1724    for seg in base_segs.iter().take(base_segs.len().saturating_sub(1)) {
1725        push(&mut parts, seg);
1726    }
1727    // An absolute `relative` replaces the base directory entirely.
1728    if relative.starts_with(is_sep) {
1729        parts.clear();
1730        absolute = true;
1731    }
1732    for seg in relative.split(is_sep) {
1733        push(&mut parts, seg);
1734    }
1735    let joined = parts.join("/");
1736    if absolute {
1737        alloc::format!("/{joined}")
1738    } else {
1739        joined
1740    }
1741}
1742
1743impl Resolver for MemoryResolver {
1744    fn load(&self, path: &str) -> Result<String, CompileError> {
1745        self.sources
1746            .get(path)
1747            .cloned()
1748            .ok_or_else(|| CompileError::ImportNotFound(path.to_string()))
1749    }
1750
1751    fn resolve(&self, base: &str, relative: &str) -> String {
1752        normalize_import_path(base, relative)
1753    }
1754}
1755
1756/// A filesystem-backed resolver. Mirrors vsm-grammar's `FileResolver`:
1757/// relative imports resolve against the importing file's directory, with manual
1758/// `..` normalization (no canonicalization, to keep a virtual layout).
1759#[cfg(feature = "std")]
1760pub struct FileResolver;
1761
1762#[cfg(feature = "std")]
1763impl Resolver for FileResolver {
1764    fn load(&self, path: &str) -> Result<String, CompileError> {
1765        std::fs::read_to_string(path)
1766            .map_err(|e| CompileError::ImportNotFound(alloc::format!("{}: {}", path, e)))
1767    }
1768
1769    fn resolve(&self, base: &str, relative: &str) -> String {
1770        // Share the one OS-independent normalizer (forward slashes, `..` collapsed)
1771        // so a resolved path — and the provenance recorded in the IR — is identical
1772        // on Windows and Unix, and identical to the in-memory and JS resolvers.
1773        // Windows `std::fs` accepts `/` just fine.
1774        normalize_import_path(base, relative)
1775    }
1776}
1777
1778/// One resolved source ready to compile: its provenance path, raw text, and the
1779/// domain context (own domain + import-alias bindings) its atoms resolve against.
1780struct ResolvedFile {
1781    path: String,
1782    content: String,
1783    ctx: DomainCtx,
1784}
1785
1786/// Compile a root source and all its transitive `IMPORT`s into one IR.
1787///
1788/// Each file is keyed by `DOMAIN`; atoms unify only within a domain. Imports are
1789/// referenced by `<domain>.<atom>` and visibility is file-local (naming is not
1790/// transitive, though a dependency's clauses still participate). Sources are
1791/// content-addressed (sha256): a source reached by several paths is compiled once
1792/// (so a diamond — or an exponential fan-out — stays linear, never blowing up),
1793/// and an import cycle is an error.
1794///
1795/// Resolution is **iterative** (an explicit work stack, not native recursion), so
1796/// an arbitrarily deep import chain cannot overflow the call stack.
1797///
1798/// Premise/rule names are per-source labels, not global identifiers: different
1799/// files may reuse a name, and the report qualifies them by source. A name reused
1800/// with a different body is an error only *within the same source*.
1801pub fn compile<R: Resolver>(root: &str, resolver: &R) -> Result<Compiled, CompileError> {
1802    compile_with(root, resolver, &[])
1803}
1804
1805/// Like [`compile`], but resolving declared `VAR` ports against external `inputs`.
1806/// Ports declared in any file of the import graph are aggregated, then resolved
1807/// once after every file is added.
1808pub fn compile_with<R: Resolver>(
1809    root: &str,
1810    resolver: &R,
1811    inputs: &[(String, PortBinding)],
1812) -> Result<Compiled, CompileError> {
1813    let (files, unused_imports) = resolve_graph(root, resolver)?;
1814    let mut c = Compiler::new();
1815    for file in &files {
1816        c.add_resolved(file)?;
1817    }
1818    c.validate_closed_world()?;
1819    let placeholders = c.resolve_ports(inputs)?;
1820    let mut compiled = c.finalize();
1821    compiled.unused_imports = unused_imports;
1822    compiled.placeholders = placeholders;
1823    Ok(compiled)
1824}
1825
1826/// One `IMPORT` edge: the optional local alias, the resolved child path, and the
1827/// `IMPORT` line (for the unused-import advisory).
1828struct ImportEdge {
1829    alias: Option<String>,
1830    child_path: String,
1831    line: u32,
1832}
1833
1834/// A discovered source during graph resolution: its first-seen path, raw text,
1835/// declared domain, import edges, and the set of domain prefixes its atoms use
1836/// (`None` = its own domain; `Some(p)` = a `p.` prefix) — used to flag imports
1837/// that the file never references.
1838struct DiscoveredFile {
1839    path: String,
1840    content: String,
1841    domain: String,
1842    edges: Vec<ImportEdge>,
1843    used_prefixes: BTreeSet<Option<String>>,
1844}
1845
1846/// Resolve the whole import graph reachable from `root` into a flat list of
1847/// [`ResolvedFile`]s, each distinct source appearing once.
1848///
1849/// Iterative depth-first traversal with an explicit work stack (`Enter`/`Exit`
1850/// frames) — no native recursion, so depth is unbounded without risking a stack
1851/// overflow. Memoized by content hash (a diamond/repeat is visited once); a hash
1852/// re-encountered while still on the active path is a [`CompileError::CircularImport`].
1853fn resolve_graph<R: Resolver>(
1854    root: &str,
1855    resolver: &R,
1856) -> Result<(Vec<ResolvedFile>, Vec<UnusedImport>), CompileError> {
1857    /// One unit of pending work on the traversal stack.
1858    enum Step {
1859        /// Visit a file at this resolved path (load, parse, enqueue its imports).
1860        Enter(String),
1861        /// Mark this content hash finished (pop it off the active path).
1862        Exit(String),
1863    }
1864
1865    let mut discovered: BTreeMap<String, DiscoveredFile> = BTreeMap::new(); // by hash
1866    let mut path_hash: BTreeMap<String, String> = BTreeMap::new(); // resolved path → hash
1867    let mut order: Vec<String> = Vec::new(); // finish order, by hash
1868    let mut active: BTreeSet<String> = BTreeSet::new(); // hashes on the current DFS path
1869    let mut work: Vec<Step> = vec![Step::Enter(root.to_string())];
1870
1871    while let Some(step) = work.pop() {
1872        match step {
1873            Step::Exit(hash) => {
1874                active.remove(&hash);
1875                order.push(hash);
1876            }
1877            Step::Enter(path) => {
1878                let content = resolver.load(&path)?;
1879                let hash = hash_hex(content.as_bytes());
1880                path_hash.insert(path.clone(), hash.clone());
1881                if active.contains(&hash) {
1882                    return Err(CompileError::CircularImport(path)); // back-edge to an ancestor
1883                }
1884                if discovered.contains_key(&hash) {
1885                    continue; // already fully resolved by another path — dedup
1886                }
1887                let program = parse_tagged(&path, &content)?;
1888                let domain = extract_domain(&program, &path)?;
1889                let mut edges = Vec::new();
1890                let mut used_prefixes = BTreeSet::new();
1891                for stmt in &program.statements {
1892                    if let Statement::Import { path: p, alias } = stmt {
1893                        edges.push(ImportEdge {
1894                            alias: alias.as_ref().map(|a| a.data.to_string()),
1895                            child_path: resolver.resolve(&path, p.data),
1896                            line: p.span.location_line(),
1897                        });
1898                    } else {
1899                        collect_prefixes(stmt, &mut used_prefixes);
1900                    }
1901                }
1902                drop(program); // release the borrow on `content` before moving it
1903                active.insert(hash.clone());
1904                work.push(Step::Exit(hash.clone()));
1905                for e in edges.iter().rev() {
1906                    work.push(Step::Enter(e.child_path.clone()));
1907                }
1908                discovered.insert(
1909                    hash,
1910                    DiscoveredFile {
1911                        path,
1912                        content,
1913                        domain,
1914                        edges,
1915                        used_prefixes,
1916                    },
1917                );
1918            }
1919        }
1920    }
1921
1922    // Build each file's domain context now that every domain is known.
1923    // Look up every file's domain (small strings) so we can then *move* each
1924    // file's (potentially large) content out of `discovered` instead of cloning.
1925    let domain_of: BTreeMap<&str, &str> = discovered
1926        .iter()
1927        .map(|(h, f)| (h.as_str(), f.domain.as_str()))
1928        .collect();
1929
1930    let mut out = Vec::with_capacity(order.len());
1931    let mut unused: Vec<UnusedImport> = Vec::new();
1932    for hash in &order {
1933        let file = &discovered[hash];
1934        let mut aliases = BTreeMap::new();
1935        aliases.insert(file.domain.clone(), file.domain.clone());
1936        for edge in &file.edges {
1937            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
1938            let bind = edge
1939                .alias
1940                .clone()
1941                .unwrap_or_else(|| child_domain.to_string());
1942            match aliases.get(&bind) {
1943                Some(existing) if existing != child_domain => {
1944                    return Err(CompileError::DomainAliasClash { alias: bind });
1945                }
1946                _ => {
1947                    aliases.insert(bind, child_domain.to_string());
1948                }
1949            }
1950        }
1951
1952        // The domains this file actually references (each used prefix resolved
1953        // against its own domain / imports). An imported domain absent from this
1954        // set is an unused import.
1955        let referenced: BTreeSet<&str> = file
1956            .used_prefixes
1957            .iter()
1958            .filter_map(|p| match p {
1959                None => Some(file.domain.as_str()),
1960                Some(name) => aliases.get(name).map(|d| d.as_str()),
1961            })
1962            .collect();
1963        for edge in &file.edges {
1964            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
1965            if !referenced.contains(child_domain) {
1966                unused.push(UnusedImport {
1967                    file: file.path.clone(),
1968                    domain: child_domain.to_string(),
1969                    alias: edge.alias.clone(),
1970                    line: edge.line,
1971                });
1972            }
1973        }
1974
1975        let ctx = DomainCtx {
1976            current: file.domain.clone(),
1977            aliases,
1978        };
1979        out.push((hash.clone(), ctx));
1980    }
1981    unused.sort();
1982
1983    // Now move content/path out of `discovered` (no large clones) and pair with
1984    // the contexts built above.
1985    let files = out
1986        .into_iter()
1987        .map(|(hash, ctx)| {
1988            let file = discovered.remove(&hash).expect("hash was discovered");
1989            ResolvedFile {
1990                path: file.path,
1991                content: file.content,
1992                ctx,
1993            }
1994        })
1995        .collect();
1996    Ok((files, unused))
1997}
1998
1999/// A canonical signature of a `FOR EACH` quantifier, appended to the body hash so
2000/// two same-named premises that differ only in their quantifier still count as a
2001/// redefinition.
2002fn quant_sig(q: &Quant) -> String {
2003    match q {
2004        Quant::InSet { binder, set } => alloc::format!("|FOREACH {} IN {}", binder.data, set.data),
2005        Quant::Relation {
2006            left,
2007            predicate,
2008            right,
2009        } => alloc::format!("|FOREACH {} {} {}", left.data, predicate.data, right.data),
2010    }
2011}
2012
2013/// Parse one source, tagging any syntax [`Diagnostics`] with its file label so a
2014/// `CompileError::Parse` names the right file. The single spelling of "parse, and
2015/// on failure attach the file" — shared by the inline, resolved, and import paths.
2016fn parse_tagged<'a>(
2017    file: &str,
2018    content: &'a str,
2019) -> Result<elenchus_parser::Program<'a>, CompileError> {
2020    elenchus_parser::parse(content).map_err(|mut diag| {
2021        diag.set_file(file);
2022        CompileError::Parse(diag)
2023    })
2024}
2025
2026/// `" — did you mean \`x\`?"` for an undeclared set name, or empty when no
2027/// declared set name is close enough.
2028fn nearest_set_suggestion(set: &str, sets: &BTreeMap<String, Vec<String>>) -> String {
2029    let names: Vec<&str> = sets.keys().map(String::as_str).collect();
2030    did_you_mean(set, &names)
2031}
2032
2033/// A list of binder substitutions `(name, value)` applied during grounding: one
2034/// entry for an `IN <set>` quantifier, two for a `<a> <rel> <b>` relation.
2035type Subs<'s> = [(&'s str, &'s str)];
2036
2037/// Replace any binder with its value in one identifier; non-matching pass through.
2038fn subst_ident<'s>(s: &'s str, subs: &Subs<'s>) -> &'s str {
2039    subs.iter()
2040        .find_map(|&(b, v)| (s == b).then_some(v))
2041        .unwrap_or(s)
2042}
2043
2044/// Replace the binders in an atom (subject, predicate, and object positions).
2045fn subst_atom<'s>(a: &Atom<'s>, subs: &Subs<'s>) -> Atom<'s> {
2046    Atom {
2047        domain: a.domain,
2048        subject: subst_ident(a.subject, subs),
2049        predicate: a.predicate.map(|p| subst_ident(p, subs)),
2050        object: a.object.map(|o| subst_ident(o, subs)),
2051    }
2052}
2053
2054/// Replace the binders in one located literal (preserving its span and `NOT`).
2055fn subst_lit<'s>(ll: &Located<'s, Literal<'s>>, subs: &Subs<'s>) -> Located<'s, Literal<'s>> {
2056    Located {
2057        data: Literal {
2058            negated: ll.data.negated,
2059            atom: subst_atom(&ll.data.atom, subs),
2060        },
2061        span: ll.span,
2062    }
2063}
2064
2065/// Build a grounded copy of a body with the `FOR EACH` binders substituted.
2066/// Spans are preserved so any error still points at the original source line. The
2067/// result borrows from the original body and from the substitution values, so it
2068/// is consumed immediately (its keys interned) and never stored.
2069fn subst_body<'s>(body: &Body<'s>, subs: &Subs<'s>) -> Body<'s> {
2070    match body {
2071        Body::List { op, atoms } => Body::List {
2072            op: *op,
2073            atoms: atoms
2074                .iter()
2075                .map(|la| Located {
2076                    data: subst_atom(&la.data, subs),
2077                    span: la.span,
2078                })
2079                .collect(),
2080        },
2081        Body::Impl {
2082            antecedent,
2083            ante_conn,
2084            consequent,
2085            cons_conn,
2086        } => Body::Impl {
2087            antecedent: antecedent.iter().map(|l| subst_lit(l, subs)).collect(),
2088            ante_conn: *ante_conn,
2089            consequent: consequent.iter().map(|l| subst_lit(l, subs)).collect(),
2090            cons_conn: *cons_conn,
2091        },
2092    }
2093}
2094
2095/// Collect the domain prefixes used by a statement's atoms into `out` (`None` for
2096/// a bare atom, `Some(p)` for a `p.`-qualified one) — feeds the unused-import lint.
2097fn collect_prefixes(stmt: &Statement, out: &mut BTreeSet<Option<String>>) {
2098    let mut add = |a: &Atom| {
2099        out.insert(a.domain.map(|d| d.to_string()));
2100    };
2101    match stmt {
2102        Statement::Fact(a) | Statement::Negation(a) => add(&a.data),
2103        Statement::Assume(l) => add(&l.data.atom),
2104        Statement::Premise { body, .. } | Statement::Rule { body, .. } => match body {
2105            Body::List { atoms, .. } => atoms.iter().for_each(|a| add(&a.data)),
2106            Body::Impl {
2107                antecedent,
2108                consequent,
2109                ..
2110            } => antecedent
2111                .iter()
2112                .chain(consequent)
2113                .for_each(|l| add(&l.data.atom)),
2114        },
2115        Statement::Domain(_)
2116        | Statement::Import { .. }
2117        | Statement::Check { .. }
2118        | Statement::Set { .. }
2119        | Statement::Close { .. }
2120        | Statement::Var { .. }
2121        | Statement::Provide { .. } => {}
2122    }
2123}
2124
2125/// The transitive closure of a relation's `(from, to)` pairs: add `(a, c)`
2126/// whenever `(a, b)` and `(b, c)` are present, to a fixpoint. A self-pair
2127/// `(x, x)` in the result marks a cycle. A small compile-time graph op.
2128fn transitive_closure(pairs: Vec<(String, String)>) -> Vec<(String, String)> {
2129    let mut set: BTreeSet<(String, String)> = pairs.into_iter().collect();
2130    loop {
2131        let mut added: Vec<(String, String)> = Vec::new();
2132        for (a, b) in &set {
2133            for (c, d) in &set {
2134                if b == c {
2135                    let p = (a.clone(), d.clone());
2136                    if !set.contains(&p) {
2137                        added.push(p);
2138                    }
2139                }
2140            }
2141        }
2142        if added.is_empty() {
2143            break;
2144        }
2145        set.extend(added);
2146    }
2147    set.into_iter().collect()
2148}
2149
2150/// The single `DOMAIN` a source declares, or an error if it has none or several.
2151fn extract_domain(
2152    program: &elenchus_parser::Program,
2153    source: &str,
2154) -> Result<String, CompileError> {
2155    let mut found: Option<String> = None;
2156    for stmt in &program.statements {
2157        if let Statement::Domain(name) = stmt {
2158            if found.is_some() {
2159                return Err(CompileError::DuplicateDomain {
2160                    file: source.to_string(),
2161                });
2162            }
2163            found = Some(name.data.to_string());
2164        }
2165    }
2166    found.ok_or_else(|| CompileError::MissingDomain {
2167        file: source.to_string(),
2168    })
2169}
2170
2171// --- helpers ---------------------------------------------------------------
2172
2173/// Levenshtein edit distance over Unicode scalars (rolling two-row DP). Small
2174/// inputs (atom/value names), so the simple DP is plenty. The one edit-distance
2175/// implementation in the workspace: the compiler's "did you mean" suggestions
2176/// (via [`nearest`]) and the solver's typo-hint lint both build on it.
2177pub fn levenshtein(a: &[char], b: &[char]) -> usize {
2178    let mut prev: Vec<usize> = (0..=b.len()).collect();
2179    let mut cur = vec![0usize; b.len() + 1];
2180    for (i, &ca) in a.iter().enumerate() {
2181        cur[0] = i + 1;
2182        for (j, &cb) in b.iter().enumerate() {
2183            let cost = usize::from(ca != cb);
2184            cur[j + 1] = (prev[j + 1] + 1).min(cur[j] + 1).min(prev[j] + cost);
2185        }
2186        core::mem::swap(&mut prev, &mut cur);
2187    }
2188    prev[b.len()]
2189}
2190
2191/// The closest candidate to `word` within an edit-distance threshold, or `None`
2192/// when nothing is close enough to be a useful "did you mean".
2193///
2194/// The threshold scales with length (`len / 3`, in Unicode scalars) and is **not**
2195/// floored at 1: a value of 1–2 characters yields a budget of 0, so no suggestion
2196/// is offered. This is deliberate — for very short tokens (a single CJK character,
2197/// where one symbol is a whole word, or a two-letter code) every other short value
2198/// sits at distance 1, so a "did you mean" there is pure noise, not a typo cue. The
2199/// rejection itself is exact (set membership), so suppressing the guess never hides
2200/// a real error; it only withholds a meaningless one. Longer names tolerate a slip
2201/// or two, mirroring the spirit of the solver's typo-hint lint.
2202fn nearest<'a>(word: &str, candidates: &[&'a str]) -> Option<&'a str> {
2203    let budget = word.chars().count() / 3;
2204    if budget == 0 {
2205        return None;
2206    }
2207    let w: Vec<char> = word.chars().collect();
2208    candidates
2209        .iter()
2210        .map(|&c| (levenshtein(&w, &c.chars().collect::<Vec<char>>()), c))
2211        .filter(|&(d, _)| d <= budget)
2212        .min_by_key(|&(d, _)| d)
2213        .map(|(_, c)| c)
2214}
2215
2216/// `" — did you mean `x`?"` for the nearest candidate to `word`, or empty when
2217/// none is close enough. The single spelling of the suggestion suffix, shared by
2218/// every "unknown name" diagnostic (values, sets, …).
2219fn did_you_mean(word: &str, candidates: &[&str]) -> String {
2220    match nearest(word, candidates) {
2221        Some(s) => alloc::format!(" — did you mean `{s}`?"),
2222        None => String::new(),
2223    }
2224}
2225
2226/// Lower parsed, located literals to key-based [`RawLit`]s (drops spans),
2227/// resolving each atom's domain through `ctx`.
2228fn raw_lits(
2229    lits: &[elenchus_parser::Located<Literal>],
2230    ctx: &DomainCtx,
2231) -> Result<Vec<RawLit>, CompileError> {
2232    lits.iter()
2233        .map(|l| {
2234            Ok(RawLit {
2235                key: ctx.key(&l.data.atom)?,
2236                negated: l.data.negated,
2237            })
2238        })
2239        .collect()
2240}
2241
2242/// The surface keyword for a list op, used as [`Origin::kind`] in the report.
2243fn list_kind(op: ListOp) -> &'static str {
2244    match op {
2245        ListOp::Exclusive => kw::EXCLUSIVE,
2246        ListOp::Forbids => kw::FORBIDS,
2247        ListOp::OneOf => kw::ONEOF,
2248        ListOp::AtLeast => kw::ATLEAST,
2249    }
2250}
2251
2252/// Stable `domain|subject|predicate|object` string for an atom key (the unit from
2253/// which clause/fact/body signatures are built). Includes the domain so atoms in
2254/// different domains never share a signature.
2255fn key_sig(k: &AtomKey) -> String {
2256    alloc::format!(
2257        "{}|{}|{}|{}",
2258        k.domain,
2259        k.subject,
2260        k.predicate.as_deref().unwrap_or(""),
2261        k.object.as_deref().unwrap_or("")
2262    )
2263}
2264
2265/// Canonical, order-independent signature of a clause's literals (for dedup).
2266fn clause_sig(lits: &[RawLit]) -> String {
2267    let mut parts: Vec<String> = lits
2268        .iter()
2269        .map(|l| alloc::format!("{}|{}", key_sig(&l.key), l.negated as u8))
2270        .collect();
2271    parts.sort();
2272    parts.dedup();
2273    parts.join(";")
2274}
2275
2276/// Canonical body string for a named construct, hashed for redefinition checks.
2277/// Resolves atom domains through `ctx` so the signature keys on resolved identity.
2278fn canonical_body(
2279    name: &str,
2280    body: &Body,
2281    is_rule: bool,
2282    ctx: &DomainCtx,
2283) -> Result<String, CompileError> {
2284    let mut s = String::new();
2285    let _ = write!(
2286        s,
2287        "{}|{}|",
2288        if is_rule { kw::RULE } else { kw::PREMISE },
2289        name
2290    );
2291    match body {
2292        Body::List { op, atoms } => {
2293            let _ = write!(s, "LIST|{}|", list_kind(*op));
2294            let mut keys: Vec<String> = atoms
2295                .iter()
2296                .map(|a| Ok(key_sig(&ctx.key(&a.data)?)))
2297                .collect::<Result<_, CompileError>>()?;
2298            keys.sort();
2299            s.push_str(&keys.join(";"));
2300        }
2301        Body::Impl {
2302            antecedent,
2303            ante_conn,
2304            consequent,
2305            cons_conn,
2306        } => {
2307            let conn = |c: &Conn| if *c == Conn::Or { "OR" } else { "AND" };
2308            s.push_str("IMPL|ANTE|");
2309            s.push_str(conn(ante_conn));
2310            s.push('|');
2311            s.push_str(&lit_sigs(antecedent, ctx)?);
2312            s.push_str("|CONS|");
2313            s.push_str(conn(cons_conn));
2314            s.push('|');
2315            s.push_str(&lit_sigs(consequent, ctx)?);
2316        }
2317    }
2318    Ok(s)
2319}
2320
2321/// Sorted `key|negated` signature of a literal list (order-independent), used
2322/// inside [`canonical_body`] so reordering a body does not look like a redefinition.
2323fn lit_sigs(
2324    lits: &[elenchus_parser::Located<Literal>],
2325    ctx: &DomainCtx,
2326) -> Result<String, CompileError> {
2327    let mut parts: Vec<String> = lits
2328        .iter()
2329        .map(|l| {
2330            Ok(alloc::format!(
2331                "{}|{}",
2332                key_sig(&ctx.key(&l.data.atom)?),
2333                l.data.negated as u8
2334            ))
2335        })
2336        .collect::<Result<_, CompileError>>()?;
2337    parts.sort();
2338    Ok(parts.join(";"))
2339}
2340
2341#[cfg(test)]
2342mod tests {
2343    use super::*;
2344
2345    /// Compile a single inline source under a default `DOMAIN t`, so test
2346    /// programs need not repeat the declaration. Atoms land in domain `t`.
2347    fn cs(src: &str) -> Result<Compiled, CompileError> {
2348        compile_source("<t>", &alloc::format!("DOMAIN t\n{src}"))
2349    }
2350
2351    /// An atom key in the default test domain `t`.
2352    fn key(subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
2353        key_in("t", subject, predicate, object)
2354    }
2355
2356    /// An atom key in an explicit domain.
2357    fn key_in(domain: &str, subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
2358        AtomKey {
2359            domain: domain.to_string(),
2360            subject: subject.to_string(),
2361            predicate: Some(predicate.to_string()),
2362            object: object.map(|o| o.to_string()),
2363        }
2364    }
2365
2366    fn id(c: &Compiled, k: &AtomKey) -> AtomId {
2367        c.atoms.iter().position(|a| a == k).unwrap() as AtomId
2368    }
2369
2370    #[test]
2371    fn exclusive_unfolds_pairwise() {
2372        let src = r#"
2373        PREMISE e:
2374            EXCLUSIVE
2375                x a
2376                x b
2377                x c
2378        "#;
2379        let c = cs(src).unwrap();
2380        // C(3,2) = 3 clauses, each of 2 positive literals.
2381        assert_eq!(c.clauses.len(), 3);
2382        for cl in &c.clauses {
2383            assert_eq!(cl.lits.len(), 2);
2384            assert!(cl.lits.iter().all(|l| !l.negated));
2385        }
2386    }
2387
2388    #[test]
2389    fn implication_negates_consequent() {
2390        // WHEN x a THEN x b  ==  Impossible([x a, NOT x b])
2391        let src = r#"
2392        PREMISE r:
2393            WHEN x a
2394            THEN x b
2395        "#;
2396        let c = cs(src).unwrap();
2397        assert_eq!(c.clauses.len(), 1);
2398        let cl = &c.clauses[0];
2399        assert_eq!(cl.lits.len(), 2);
2400        let a = id(&c, &key("x", "a", None));
2401        let b = id(&c, &key("x", "b", None));
2402        assert!(cl.lits.contains(&Lit {
2403            atom: a,
2404            negated: false
2405        }));
2406        assert!(cl.lits.contains(&Lit {
2407            atom: b,
2408            negated: true
2409        }));
2410    }
2411
2412    #[test]
2413    fn negated_consequent_flips_to_positive() {
2414        // THEN NOT x b  →  NOT(NOT x b) = x b positive inside Impossible
2415        let src = r#"
2416        PREMISE r:
2417            WHEN x a
2418            THEN NOT x b
2419        "#;
2420        let c = cs(src).unwrap();
2421        let b = id(&c, &key("x", "b", None));
2422        assert!(c.clauses[0].lits.contains(&Lit {
2423            atom: b,
2424            negated: false
2425        }));
2426    }
2427
2428    #[test]
2429    fn consequent_or_is_one_clause_with_all_negated() {
2430        // WHEN x p THEN x a OR x b  ==  Impossible([x p, NOT x a, NOT x b])
2431        let src = r#"
2432        PREMISE r:
2433            WHEN x p
2434            THEN x a
2435            OR x b
2436        "#;
2437        let c = cs(src).unwrap();
2438        assert_eq!(c.clauses.len(), 1);
2439        let cl = &c.clauses[0];
2440        assert_eq!(cl.lits.len(), 3);
2441        let p = id(&c, &key("x", "p", None));
2442        let a = id(&c, &key("x", "a", None));
2443        let b = id(&c, &key("x", "b", None));
2444        assert!(cl.lits.contains(&Lit {
2445            atom: p,
2446            negated: false
2447        }));
2448        assert!(cl.lits.contains(&Lit {
2449            atom: a,
2450            negated: true
2451        }));
2452        assert!(cl.lits.contains(&Lit {
2453            atom: b,
2454            negated: true
2455        }));
2456    }
2457
2458    #[test]
2459    fn antecedent_or_is_one_clause_per_disjunct() {
2460        // WHEN x a OR x b THEN x c
2461        //   == Impossible([x a, NOT x c]) ∧ Impossible([x b, NOT x c])
2462        let src = r#"
2463        PREMISE r:
2464            WHEN x a
2465            OR x b
2466            THEN x c
2467        "#;
2468        let c = cs(src).unwrap();
2469        assert_eq!(c.clauses.len(), 2);
2470        let a = id(&c, &key("x", "a", None));
2471        let b = id(&c, &key("x", "b", None));
2472        let cc = id(&c, &key("x", "c", None));
2473        // every clause has exactly two lits and carries NOT c
2474        for cl in &c.clauses {
2475            assert_eq!(cl.lits.len(), 2);
2476            assert!(cl.lits.contains(&Lit {
2477                atom: cc,
2478                negated: true
2479            }));
2480        }
2481        let has = |atom| {
2482            c.clauses.iter().any(|cl| {
2483                cl.lits.contains(&Lit {
2484                    atom,
2485                    negated: false,
2486                })
2487            })
2488        };
2489        assert!(has(a) && has(b));
2490    }
2491
2492    #[test]
2493    fn antecedent_or_with_consequent_or_distributes() {
2494        // (a ∨ b) → (c ∨ d): Impossible([a,¬c,¬d]) ∧ Impossible([b,¬c,¬d])
2495        let src = r#"
2496        PREMISE r:
2497            WHEN x a
2498            OR x b
2499            THEN x c
2500            OR x d
2501        "#;
2502        let c = cs(src).unwrap();
2503        assert_eq!(c.clauses.len(), 2);
2504        for cl in &c.clauses {
2505            assert_eq!(cl.lits.len(), 3);
2506        }
2507    }
2508
2509    #[test]
2510    fn rule_with_or_antecedent_splits_into_two_rules() {
2511        // (a ∨ b) → c derives c whenever either fires: two single-antecedent rules.
2512        let src = r#"
2513        RULE r:
2514            WHEN x a
2515            OR x b
2516            THEN x c
2517        "#;
2518        let c = cs(src).unwrap();
2519        assert_eq!(c.rules.len(), 2);
2520        assert!(
2521            c.rules
2522                .iter()
2523                .all(|r| r.antecedent.len() == 1 && r.consequent.len() == 1)
2524        );
2525    }
2526
2527    #[test]
2528    fn rule_with_or_consequent_is_rejected() {
2529        // A rule cannot derive a disjunction — must be a PREMISE.
2530        let src = r#"
2531        RULE r:
2532            WHEN x a
2533            THEN x b
2534            OR x c
2535        "#;
2536        let err = cs(src).unwrap_err();
2537        assert!(matches!(
2538            err,
2539            CompileError::RuleDisjunctiveConsequent { .. }
2540        ));
2541    }
2542
2543    #[test]
2544    fn oneof_is_pairwise_plus_at_least_one() {
2545        let src = r#"
2546        PREMISE o:
2547            ONEOF
2548                x a
2549                x b
2550        "#;
2551        let c = cs(src).unwrap();
2552        // pairwise C(2,2)=1 + 1 at-least-one = 2 clauses
2553        assert_eq!(c.clauses.len(), 2);
2554        // the at-least-one clause is the all-negated one
2555        assert!(c.clauses.iter().any(|cl| cl.lits.iter().all(|l| l.negated)));
2556    }
2557
2558    #[test]
2559    fn atleast_is_one_negated_clause() {
2560        let src = r#"
2561        PREMISE a:
2562            ATLEAST
2563                x a
2564                x b
2565                x c
2566        "#;
2567        let c = cs(src).unwrap();
2568        assert_eq!(c.clauses.len(), 1);
2569        assert_eq!(c.clauses[0].lits.len(), 3);
2570        assert!(c.clauses[0].lits.iter().all(|l| l.negated));
2571    }
2572
2573    #[test]
2574    fn rules_are_separate_from_clauses() {
2575        let src = r#"
2576        RULE needs:
2577            WHEN x a
2578            THEN x b
2579        "#;
2580        let c = cs(src).unwrap();
2581        assert_eq!(c.clauses.len(), 0);
2582        assert_eq!(c.rules.len(), 1);
2583        assert_eq!(c.rules[0].antecedent.len(), 1);
2584        assert_eq!(c.rules[0].consequent.len(), 1);
2585    }
2586
2587    #[test]
2588    fn atoms_are_canonically_sorted() {
2589        let src = r#"
2590        FACT z z
2591        FACT a a
2592        FACT m m
2593        "#;
2594        let c = cs(src).unwrap();
2595        let mut sorted = c.atoms.clone();
2596        sorted.sort();
2597        assert_eq!(c.atoms, sorted);
2598    }
2599
2600    #[test]
2601    fn duplicate_premise_is_idempotent() {
2602        let src = r#"
2603        PREMISE e:
2604            EXCLUSIVE
2605                x a
2606                x b
2607        PREMISE e:
2608            EXCLUSIVE
2609                x a
2610                x b
2611        "#;
2612        let c = cs(src).unwrap();
2613        assert_eq!(c.clauses.len(), 1);
2614    }
2615
2616    #[test]
2617    fn redefinition_with_different_body_errors() {
2618        let src = r#"
2619        PREMISE e:
2620            EXCLUSIVE
2621                x a
2622                x b
2623        PREMISE e:
2624            EXCLUSIVE
2625                x a
2626                x c
2627        "#;
2628        let err = cs(src).unwrap_err();
2629        assert_eq!(
2630            err,
2631            CompileError::PremiseRedefinition {
2632                name: "e".to_string()
2633            }
2634        );
2635    }
2636
2637    #[test]
2638    fn duplicate_fact_is_idempotent() {
2639        let c = cs("FACT x a\nFACT x a\n").unwrap();
2640        assert_eq!(c.facts.len(), 1);
2641    }
2642
2643    #[test]
2644    fn conflicting_facts_are_both_kept() {
2645        // FACT X + NOT X is a CONFLICT for the solver, not a compile error.
2646        let c = cs("FACT x a\nNOT x a\n").unwrap();
2647        assert_eq!(c.facts.len(), 2);
2648    }
2649
2650    #[test]
2651    fn import_is_recorded_pending() {
2652        let c = cs("IMPORT \"physics.vrf\"\nFACT x a\n").unwrap();
2653        assert_eq!(c.pending_imports, vec!["physics.vrf".to_string()]);
2654    }
2655
2656    #[test]
2657    fn qualified_fact_lands_in_the_imported_domain() {
2658        // The library's premise is about `physics.Engine_X has fuel`; the main file
2659        // asserts a fact qualified INTO that domain, so the two share one atom id.
2660        let mut r = MemoryResolver::new();
2661        r.add(
2662            "lib.vrf",
2663            r#"
2664        DOMAIN physics
2665        PREMISE needs_fuel:
2666            WHEN Engine_X has engine
2667            THEN Engine_X has fuel
2668        "#,
2669        );
2670        r.add(
2671            "main.vrf",
2672            r#"
2673        DOMAIN main
2674        IMPORT "lib.vrf"
2675        FACT physics.Engine_X has engine
2676        FACT physics.Engine_X has fuel
2677        "#,
2678        );
2679        let c = compile("main.vrf", &r).unwrap();
2680        assert!(c.pending_imports.is_empty());
2681        assert_eq!(c.clauses.len(), 1); // the imported premise
2682        assert_eq!(c.facts.len(), 2);
2683
2684        // `physics.Engine_X has fuel` from the FACT and the imported premise share an id.
2685        let fuel = key_in("physics", "Engine_X", "has", Some("fuel"));
2686        let fuel_id = id(&c, &fuel);
2687        assert!(c.facts.iter().any(|f| f.atom == fuel_id));
2688        assert!(c.clauses[0].lits.iter().any(|l| l.atom == fuel_id));
2689    }
2690
2691    #[test]
2692    fn same_triple_in_different_domains_does_not_unify() {
2693        // Without a domain prefix the fact lands in `main`, NOT `physics`, so it is
2694        // a distinct atom from the library's `physics.Engine_X has fuel`.
2695        let mut r = MemoryResolver::new();
2696        r.add("lib.vrf", "DOMAIN physics\nFACT Engine_X has fuel\n");
2697        r.add(
2698            "main.vrf",
2699            "DOMAIN main\nIMPORT \"lib.vrf\"\nFACT Engine_X has fuel\n",
2700        );
2701        let c = compile("main.vrf", &r).unwrap();
2702        // Two distinct atoms: physics.Engine_X has fuel and main.Engine_X has fuel.
2703        assert!(c.atoms.iter().any(|a| a.domain == "physics"));
2704        assert!(c.atoms.iter().any(|a| a.domain == "main"));
2705        assert_eq!(
2706            c.atoms
2707                .iter()
2708                .filter(|a| a.subject == "Engine_X" && a.predicate.as_deref() == Some("has"))
2709                .count(),
2710            2
2711        );
2712    }
2713
2714    #[test]
2715    fn import_alias_binds_a_local_domain_name() {
2716        // `AS phys` lets the consumer reference the imported domain by a local name.
2717        let mut r = MemoryResolver::new();
2718        r.add("lib.vrf", "DOMAIN physics\nFACT Motor over_200\n");
2719        r.add(
2720            "main.vrf",
2721            "DOMAIN main\nIMPORT \"lib.vrf\" AS phys\nFACT phys.Motor over_100\n",
2722        );
2723        let c = compile("main.vrf", &r).unwrap();
2724        // Both facts live in the physics domain (one via its own name, one via alias).
2725        assert_eq!(c.atoms.iter().filter(|a| a.domain == "physics").count(), 2);
2726    }
2727
2728    #[test]
2729    fn unknown_domain_reference_errors() {
2730        // Referencing a domain that is neither this file's nor imported here fails.
2731        let err = cs("FACT ghost.x a\n").unwrap_err();
2732        assert!(matches!(err, CompileError::UnknownDomain { .. }));
2733    }
2734
2735    #[test]
2736    fn imports_are_not_transitive_for_naming() {
2737        // main imports physics; physics imports math. main may NOT name math.
2738        let mut r = MemoryResolver::new();
2739        r.add("math.vrf", "DOMAIN math\nFACT foo bar\n");
2740        r.add(
2741            "physics.vrf",
2742            "DOMAIN physics\nIMPORT \"math.vrf\"\nFACT Motor over_100\n",
2743        );
2744        r.add(
2745            "main.vrf",
2746            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT math.foo bar\n",
2747        );
2748        let err = compile("main.vrf", &r).unwrap_err();
2749        assert!(matches!(err, CompileError::UnknownDomain { .. }));
2750    }
2751
2752    #[test]
2753    fn transitive_dependency_clauses_still_load() {
2754        // Even though main can't *name* math, math's clauses still participate.
2755        let mut r = MemoryResolver::new();
2756        r.add(
2757            "math.vrf",
2758            r"
2759        DOMAIN math
2760        PREMISE e:
2761            EXCLUSIVE
2762                x a
2763                x b
2764        ",
2765        );
2766        r.add("physics.vrf", "DOMAIN physics\nIMPORT \"math.vrf\"\n");
2767        r.add("main.vrf", "DOMAIN main\nIMPORT \"physics.vrf\"\n");
2768        let c = compile("main.vrf", &r).unwrap();
2769        assert_eq!(c.clauses.len(), 1); // math's clause loaded transitively
2770        assert!(c.clauses.iter().all(|cl| cl.origin.source == "math.vrf"));
2771    }
2772
2773    #[test]
2774    fn missing_domain_errors() {
2775        let err = compile_source("nodomain.vrf", "FACT x a\n").unwrap_err();
2776        assert!(matches!(err, CompileError::MissingDomain { .. }));
2777    }
2778
2779    #[test]
2780    fn duplicate_domain_errors() {
2781        let err = compile_source("dup.vrf", "DOMAIN a\nDOMAIN b\nFACT x a\n").unwrap_err();
2782        assert!(matches!(err, CompileError::DuplicateDomain { .. }));
2783    }
2784
2785    #[test]
2786    fn alias_clash_when_one_local_name_binds_two_domains() {
2787        // The same local alias `x` bound to two genuinely different domains is a
2788        // clash: disambiguate with distinct aliases.
2789        let mut r = MemoryResolver::new();
2790        r.add("a.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2791        r.add("b.vrf", "DOMAIN chemistry\nFACT atom reacts\n");
2792        r.add(
2793            "main.vrf",
2794            "DOMAIN main\nIMPORT \"a.vrf\" AS x\nIMPORT \"b.vrf\" AS x\n",
2795        );
2796        let err = compile("main.vrf", &r).unwrap_err();
2797        assert!(matches!(err, CompileError::DomainAliasClash { .. }));
2798    }
2799
2800    #[test]
2801    fn two_files_with_the_same_domain_name_merge() {
2802        // Nominal domains: two files both declaring DOMAIN physics share it (the
2803        // value of importing a premise library is exactly this unification).
2804        let mut r = MemoryResolver::new();
2805        r.add("a.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2806        r.add(
2807            "main.vrf",
2808            "DOMAIN physics\nIMPORT \"a.vrf\"\nFACT Motor over_200\n",
2809        );
2810        let c = compile("main.vrf", &r).unwrap();
2811        // Both motors live in the single shared `physics` domain.
2812        assert!(c.atoms.iter().all(|a| a.domain == "physics"));
2813        assert_eq!(c.atoms.len(), 2);
2814    }
2815
2816    #[test]
2817    fn diamond_import_is_deduped() {
2818        // main → a, c ; a → base ; c → base. base merged once.
2819        let mut r = MemoryResolver::new();
2820        r.add(
2821            "base.vrf",
2822            r#"
2823        DOMAIN base
2824        PREMISE b:
2825            EXCLUSIVE
2826                x a
2827                x b
2828        "#,
2829        );
2830        r.add("a.vrf", "DOMAIN a\nIMPORT \"base.vrf\"\n");
2831        r.add("c.vrf", "DOMAIN c\nIMPORT \"base.vrf\"\n");
2832        r.add(
2833            "main.vrf",
2834            "DOMAIN main\nIMPORT \"a.vrf\"\nIMPORT \"c.vrf\"\n",
2835        );
2836        let c = compile("main.vrf", &r).unwrap();
2837        assert_eq!(c.clauses.len(), 1); // base's single clause, not two
2838    }
2839
2840    #[test]
2841    fn circular_import_errors() {
2842        let mut r = MemoryResolver::new();
2843        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\n");
2844        r.add("b.vrf", "DOMAIN b\nIMPORT \"a.vrf\"\n");
2845        let err = compile("a.vrf", &r).unwrap_err();
2846        assert!(matches!(err, CompileError::CircularImport(_)));
2847    }
2848
2849    #[test]
2850    fn three_node_cycle_errors() {
2851        // a → b → c → a. The back-edge to the on-path ancestor is detected.
2852        let mut r = MemoryResolver::new();
2853        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\n");
2854        r.add("b.vrf", "DOMAIN b\nIMPORT \"c.vrf\"\n");
2855        r.add("c.vrf", "DOMAIN c\nIMPORT \"a.vrf\"\n");
2856        let err = compile("a.vrf", &r).unwrap_err();
2857        assert!(matches!(err, CompileError::CircularImport(_)));
2858    }
2859
2860    #[test]
2861    fn shared_grandchild_diamond_loads_once() {
2862        // The user's case: a imports B and C; C ALSO imports B. B must be compiled
2863        // exactly once (its single clause is not duplicated by the two paths to it).
2864        let mut r = MemoryResolver::new();
2865        r.add(
2866            "b.vrf",
2867            r"
2868        DOMAIN b
2869        PREMISE e:
2870            EXCLUSIVE
2871                x a
2872                x b
2873        ",
2874        );
2875        r.add("c.vrf", "DOMAIN c\nIMPORT \"b.vrf\"\n");
2876        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\nIMPORT \"c.vrf\"\n");
2877        let c = compile("a.vrf", &r).unwrap();
2878        assert_eq!(
2879            c.clauses.len(),
2880            1,
2881            "b.vrf's clause must appear exactly once"
2882        );
2883    }
2884
2885    #[test]
2886    fn exponential_fan_out_is_memoized_not_blown_up() {
2887        // f_k imports f_{k-1} TWICE. Without content-hash memoization the visit
2888        // count is 2^k (2^40 ≈ a trillion); with it, the work is linear, so this
2889        // finishes instantly. A guard against any combinatorial blow-up / DoS.
2890        let mut r = MemoryResolver::new();
2891        r.add("f0.vrf", "DOMAIN d0\nFACT x a\n");
2892        let n = 40;
2893        for k in 1..=n {
2894            r.add(
2895                &alloc::format!("f{k}.vrf"),
2896                &alloc::format!(
2897                    "DOMAIN d{k}\nIMPORT \"f{}.vrf\"\nIMPORT \"f{}.vrf\"\n",
2898                    k - 1,
2899                    k - 1
2900                ),
2901            );
2902        }
2903        let c = compile(&alloc::format!("f{n}.vrf"), &r).unwrap();
2904        assert_eq!(c.facts.len(), 1); // the single fact from f0, reached once
2905    }
2906
2907    #[test]
2908    fn very_deep_linear_chain_does_not_overflow() {
2909        // A long non-cyclic chain. Resolution is iterative (explicit work stack),
2910        // so a depth that would overflow a recursive loader compiles cleanly.
2911        let mut r = MemoryResolver::new();
2912        r.add("f0.vrf", "DOMAIN d0\nFACT x a\n");
2913        let n = 10_000;
2914        for k in 1..=n {
2915            r.add(
2916                &alloc::format!("f{k}.vrf"),
2917                &alloc::format!("DOMAIN d{k}\nIMPORT \"f{}.vrf\"\n", k - 1),
2918            );
2919        }
2920        let c = compile(&alloc::format!("f{n}.vrf"), &r).unwrap();
2921        assert_eq!(c.facts.len(), 1);
2922    }
2923
2924    #[test]
2925    fn missing_import_errors() {
2926        let mut r = MemoryResolver::new();
2927        r.add("main.vrf", "DOMAIN main\nIMPORT \"ghost.vrf\"\n");
2928        let err = compile("main.vrf", &r).unwrap_err();
2929        assert!(matches!(err, CompileError::ImportNotFound(_)));
2930    }
2931
2932    #[test]
2933    fn unused_import_is_flagged() {
2934        // main imports physics but never writes a `physics.` atom → unused.
2935        let mut r = MemoryResolver::new();
2936        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2937        r.add(
2938            "main.vrf",
2939            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT x a\n",
2940        );
2941        let c = compile("main.vrf", &r).unwrap();
2942        assert_eq!(c.unused_imports.len(), 1);
2943        assert_eq!(c.unused_imports[0].domain, "physics");
2944        assert_eq!(c.unused_imports[0].file, "main.vrf");
2945        assert_eq!(c.unused_imports[0].alias, None);
2946    }
2947
2948    #[test]
2949    fn referenced_import_is_not_unused() {
2950        // The same import, but now a `physics.` atom uses it → not flagged.
2951        let mut r = MemoryResolver::new();
2952        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2953        r.add(
2954            "main.vrf",
2955            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT physics.Motor over_200\n",
2956        );
2957        let c = compile("main.vrf", &r).unwrap();
2958        assert!(c.unused_imports.is_empty(), "{:?}", c.unused_imports);
2959    }
2960
2961    #[test]
2962    fn unused_import_records_its_alias() {
2963        let mut r = MemoryResolver::new();
2964        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2965        r.add(
2966            "main.vrf",
2967            "DOMAIN main\nIMPORT \"physics.vrf\" AS phys\nFACT x a\n",
2968        );
2969        let c = compile("main.vrf", &r).unwrap();
2970        assert_eq!(c.unused_imports.len(), 1);
2971        assert_eq!(c.unused_imports[0].alias.as_deref(), Some("phys"));
2972    }
2973
2974    #[test]
2975    fn import_referenced_only_inside_a_premise_is_used() {
2976        // The reference can be anywhere — here inside a premise body, not a fact.
2977        let mut r = MemoryResolver::new();
2978        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2979        r.add(
2980            "main.vrf",
2981            r#"
2982        DOMAIN main
2983        IMPORT "physics.vrf"
2984        PREMISE p:
2985            WHEN physics.Motor over_100
2986            THEN x ok
2987        "#,
2988        );
2989        let c = compile("main.vrf", &r).unwrap();
2990        assert!(c.unused_imports.is_empty(), "{:?}", c.unused_imports);
2991    }
2992
2993    #[test]
2994    fn same_premise_name_across_files_coexists() {
2995        // Two files may legitimately reuse a premise NAME with different bodies.
2996        // Names are per-source labels — both premises apply, qualified by source.
2997        // NOT a redefinition error. (Atoms stay apart too: different domains.)
2998        let mut r = MemoryResolver::new();
2999        r.add(
3000            "physics.vrf",
3001            r#"
3002        DOMAIN physics
3003        PREMISE safety:
3004            EXCLUSIVE
3005                x a
3006                x b
3007        "#,
3008        );
3009        r.add(
3010            "main.vrf",
3011            r#"
3012        DOMAIN main
3013        IMPORT "physics.vrf"
3014        PREMISE safety:
3015            EXCLUSIVE
3016                x a
3017                x c
3018        "#,
3019        );
3020        let c = compile("main.vrf", &r).unwrap();
3021        assert_eq!(c.clauses.len(), 2); // a-b from physics, a-c from main
3022        assert!(c.clauses.iter().any(|cl| cl.origin.source == "physics.vrf"));
3023        assert!(c.clauses.iter().any(|cl| cl.origin.source == "main.vrf"));
3024    }
3025
3026    #[test]
3027    fn redefinition_within_one_source_still_errors() {
3028        // But reusing a name with a different body *inside one source* is a mistake.
3029        let src = r#"
3030        DOMAIN m
3031        PREMISE e:
3032            EXCLUSIVE
3033                x a
3034                x b
3035        PREMISE e:
3036            EXCLUSIVE
3037                x a
3038                x c
3039        "#;
3040        let err = compile_source("main.vrf", src).unwrap_err();
3041        assert_eq!(
3042            err,
3043            CompileError::PremiseRedefinition {
3044                name: "e".to_string()
3045            }
3046        );
3047    }
3048
3049    #[test]
3050    fn import_demo_examples_resolve() {
3051        let mut r = MemoryResolver::new();
3052        r.add(
3053            "physics.vrf",
3054            include_str!("../../../docs/examples/physics.vrf"),
3055        );
3056        r.add(
3057            "import-demo.vrf",
3058            include_str!("../../../docs/examples/import-demo.vrf"),
3059        );
3060        let c = compile("import-demo.vrf", &r).unwrap();
3061        assert!(c.pending_imports.is_empty());
3062        // physics.vrf: one_path (EXCLUSIVE, 1 clause) + speed_order (impl, 1 clause)
3063        assert_eq!(c.clauses.len(), 2);
3064        // The qualified facts (`physics.Motor …`) share ids with the imported premise.
3065        let over_100 = id(&c, &key_in("physics", "Motor", "over_100", None));
3066        assert!(c.facts.iter().any(|f| f.atom == over_100));
3067        assert!(
3068            c.clauses
3069                .iter()
3070                .any(|cl| cl.lits.iter().any(|l| l.atom == over_100))
3071        );
3072    }
3073
3074    #[test]
3075    fn creature_example_compiles() {
3076        let src = include_str!("../../../docs/examples/creature.vrf");
3077        let c = compile_source("creature.vrf", src).unwrap();
3078        assert_eq!(c.facts.len(), 2); // flying, warm_blood
3079        assert_eq!(c.rules.len(), 1); // needs_oxygen
3080        assert_eq!(c.checks.len(), 1);
3081        // fly_xor_swim (1) + wings_need_bone (THEN wing AND bone → 2) + no_dual_temp (1) = 4
3082        assert_eq!(c.clauses.len(), 4);
3083        assert_eq!(c.atoms.len(), 7);
3084    }
3085
3086    #[test]
3087    fn forbids_unfolds_pairwise() {
3088        let src = r#"
3089        PREMISE f:
3090            FORBIDS
3091                x a
3092                x b
3093                x c
3094        "#;
3095        let c = cs(src).unwrap();
3096        assert_eq!(c.clauses.len(), 3); // C(3,2), like EXCLUSIVE
3097        assert!(
3098            c.clauses
3099                .iter()
3100                .all(|cl| cl.lits.len() == 2 && cl.lits.iter().all(|l| !l.negated))
3101        );
3102    }
3103
3104    #[test]
3105    fn rule_with_multiple_consequents() {
3106        let src = r#"
3107        RULE r:
3108            WHEN x a
3109            THEN x b
3110            AND  x c
3111        "#;
3112        let c = cs(src).unwrap();
3113        assert_eq!(c.rules.len(), 1);
3114        assert_eq!(c.rules[0].consequent.len(), 2);
3115    }
3116
3117    #[test]
3118    fn negated_antecedent_literal_keeps_polarity() {
3119        // WHEN NOT x a THEN x b  ==  Impossible([NOT x a, NOT x b])
3120        let src = r#"
3121        PREMISE a:
3122            WHEN NOT x a
3123            THEN x b
3124        "#;
3125        let c = cs(src).unwrap();
3126        let xa = id(&c, &key("x", "a", None));
3127        assert!(c.clauses[0].lits.contains(&Lit {
3128            atom: xa,
3129            negated: true
3130        }));
3131    }
3132
3133    #[test]
3134    fn rule_keeps_consequent_negation() {
3135        let src = r#"
3136        RULE r:
3137            WHEN x a
3138            THEN NOT x b
3139        "#;
3140        let c = cs(src).unwrap();
3141        assert!(c.rules[0].consequent[0].negated);
3142    }
3143
3144    #[test]
3145    fn compilation_is_deterministic() {
3146        let src = r#"
3147        PREMISE e:
3148            EXCLUSIVE
3149                z z
3150                a a
3151                m m
3152        FACT q q
3153        "#;
3154        assert_eq!(cs(src).unwrap(), cs(src).unwrap());
3155    }
3156
3157    #[test]
3158    fn empty_program_compiles_to_empty_ir() {
3159        let c = cs("// nothing here\n").unwrap();
3160        assert!(c.atoms.is_empty() && c.clauses.is_empty() && c.facts.is_empty());
3161    }
3162
3163    #[test]
3164    fn same_clause_from_two_named_premises_is_deduped() {
3165        // Different names, identical logical content → one clause, no redefinition.
3166        let src = r#"
3167        PREMISE e1:
3168            EXCLUSIVE
3169                x a
3170                x b
3171        PREMISE e2:
3172            EXCLUSIVE
3173                x a
3174                x b
3175        "#;
3176        let c = cs(src).unwrap();
3177        assert_eq!(c.clauses.len(), 1);
3178    }
3179
3180    #[test]
3181    fn object_distinguishes_atom_identity() {
3182        // `x p a` and `x p b` differ only by object → two distinct atoms.
3183        let c = cs("FACT x p a\nFACT x p b\n").unwrap();
3184        assert_eq!(c.atoms.len(), 2);
3185    }
3186
3187    // --- closed-world: ONEOF closes its variable's value set -----------------
3188
3189    /// A `ONEOF` body declaring three values of `resolved is …`. Flush-left so it
3190    /// concatenates cleanly in front of an appended line (CAPSTONE-style const).
3191    const ONEOF_RESOLVED: &str = r"PREMISE pick:
3192    ONEOF
3193        resolved is censored
3194        resolved is censored_mtp
3195        resolved is uncensored
3196";
3197
3198    #[test]
3199    fn value_outside_oneof_is_rejected() {
3200        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censoredmtp\n");
3201        let err = cs(&src).unwrap_err();
3202        let CompileError::UnknownValue(e) = err else {
3203            panic!("expected UnknownValue, got {err:?}");
3204        };
3205        assert_eq!(e.value, "censoredmtp");
3206        assert_eq!(e.subject, "resolved");
3207        assert_eq!(e.predicate, "is");
3208        assert_eq!(e.declared, "censored, censored_mtp, uncensored");
3209    }
3210
3211    #[test]
3212    fn near_miss_value_suggests_the_intended_one() {
3213        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censoredmtp\n");
3214        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
3215            panic!("expected UnknownValue");
3216        };
3217        assert_eq!(e.suggestion, " — did you mean `censored_mtp`?");
3218    }
3219
3220    #[test]
3221    fn far_off_value_offers_no_suggestion() {
3222        // `wildly_different` is past the edit-distance budget of every declared
3223        // value, so we reject it but do not guess.
3224        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is wildly_different\n");
3225        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
3226            panic!("expected UnknownValue");
3227        };
3228        assert_eq!(e.suggestion, "");
3229    }
3230
3231    #[test]
3232    fn declared_value_compiles_cleanly() {
3233        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censored_mtp\n");
3234        assert!(cs(&src).is_ok());
3235    }
3236
3237    #[test]
3238    fn oneof_declared_after_the_reference_still_catches_it() {
3239        // The check runs once every source is accumulated, so order is irrelevant.
3240        let src = alloc::format!("FACT resolved is censoredmtp\n{ONEOF_RESOLVED}");
3241        assert!(matches!(
3242            cs(&src).unwrap_err(),
3243            CompileError::UnknownValue(_)
3244        ));
3245    }
3246
3247    #[test]
3248    fn out_of_set_value_inside_a_premise_is_caught() {
3249        // Closed-world covers references anywhere — not just FACTs.
3250        let src = alloc::format!(
3251            r"{ONEOF_RESOLVED}
3252            PREMISE p:
3253                WHEN resolved is censoredmtp
3254                THEN x done
3255        "
3256        );
3257        assert!(matches!(
3258            cs(&src).unwrap_err(),
3259            CompileError::UnknownValue(_)
3260        ));
3261    }
3262
3263    #[test]
3264    fn out_of_set_value_inside_a_rule_is_caught() {
3265        let src = alloc::format!(
3266            r"{ONEOF_RESOLVED}
3267            RULE r:
3268                WHEN x go
3269                THEN resolved is censoredmtp
3270        "
3271        );
3272        assert!(matches!(
3273            cs(&src).unwrap_err(),
3274            CompileError::UnknownValue(_)
3275        ));
3276    }
3277
3278    #[test]
3279    fn binary_atoms_in_a_oneof_do_not_close_anything() {
3280        // `alice cooks` / `alice cleans` have no object slot, so there is no value
3281        // set to violate — a later `alice bakes` is just another atom, not an error.
3282        let src = r"
3283        PREMISE chores:
3284            ONEOF
3285                alice cooks
3286                alice cleans
3287        FACT alice bakes
3288        ";
3289        assert!(cs(src).is_ok());
3290    }
3291
3292    #[test]
3293    fn a_subject_without_a_oneof_stays_open() {
3294        // No ONEOF over `mood is …` → open world, any value is a fresh atom.
3295        let src = alloc::format!("{ONEOF_RESOLVED}FACT mood is anything_goes\n");
3296        assert!(cs(&src).is_ok());
3297    }
3298
3299    #[test]
3300    fn two_oneofs_union_their_declared_values() {
3301        // A value declared by either ONEOF for the same variable is legal.
3302        let src = r"
3303        PREMISE a:
3304            ONEOF
3305                v is one
3306                v is two
3307        PREMISE b:
3308            ONEOF
3309                v is two
3310                v is three
3311        FACT v is three
3312        ";
3313        assert!(cs(src).is_ok());
3314    }
3315
3316    #[test]
3317    fn earliest_offender_is_reported() {
3318        // Two violations; the diagnostic points at the first by line.
3319        let src = alloc::format!(
3320            "{ONEOF_RESOLVED}FACT resolved is firstbad\nFACT resolved is secondbad\n"
3321        );
3322        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
3323            panic!("expected UnknownValue");
3324        };
3325        assert_eq!(e.value, "firstbad");
3326    }
3327
3328    #[test]
3329    fn closed_world_spans_imported_domains() {
3330        // physics closes `Motor speed …`; main, referencing it via the prefix with
3331        // a typo, is rejected — the value set is shared across the domain boundary.
3332        let mut r = MemoryResolver::new();
3333        r.add(
3334            "physics.vrf",
3335            r"
3336        DOMAIN physics
3337        PREMISE g:
3338            ONEOF
3339                Motor speed slow
3340                Motor speed fast
3341        ",
3342        );
3343        r.add(
3344            "main.vrf",
3345            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT physics.Motor speed faast\n",
3346        );
3347        let CompileError::UnknownValue(e) = compile("main.vrf", &r).unwrap_err() else {
3348            panic!("expected UnknownValue");
3349        };
3350        assert_eq!(e.value, "faast");
3351        assert_eq!(e.suggestion, " — did you mean `fast`?");
3352    }
3353
3354    #[test]
3355    fn same_value_in_a_different_domain_does_not_clash() {
3356        // `state is open` is closed in domain a; domain b's own `state is shut`
3357        // (never declared in a) is fine — value sets are per-domain.
3358        let mut r = MemoryResolver::new();
3359        r.add(
3360            "a.vrf",
3361            r"
3362        DOMAIN a
3363        PREMISE s:
3364            ONEOF
3365                state is open
3366                state is closed
3367        ",
3368        );
3369        r.add("b.vrf", "DOMAIN b\nIMPORT \"a.vrf\"\nFACT state is shut\n");
3370        // `state is shut` is in domain b, which has no ONEOF → open, so it compiles.
3371        assert!(compile("b.vrf", &r).is_ok());
3372    }
3373
3374    #[test]
3375    fn levenshtein_basics() {
3376        // The canonical distance works on char slices; spell the string cases
3377        // through a tiny adapter so the table below reads as before.
3378        fn lev(a: &str, b: &str) -> usize {
3379            levenshtein(
3380                &a.chars().collect::<Vec<char>>(),
3381                &b.chars().collect::<Vec<char>>(),
3382            )
3383        }
3384        assert_eq!(lev("", ""), 0);
3385        assert_eq!(lev("abc", "abc"), 0);
3386        assert_eq!(lev("censoredmtp", "censored_mtp"), 1);
3387        assert_eq!(lev("norml", "normal"), 1);
3388        assert_eq!(lev("kitten", "sitting"), 3);
3389    }
3390
3391    #[test]
3392    fn nearest_respects_the_length_budget() {
3393        let cands = ["censored", "censored_mtp", "uncensored"];
3394        assert_eq!(nearest("censoredmtp", &cands), Some("censored_mtp"));
3395        // "zzz" is far from all; no suggestion.
3396        assert_eq!(nearest("zzz", &cands), None);
3397    }
3398
3399    #[test]
3400    fn nearest_offers_nothing_for_very_short_values() {
3401        // 1–2 character values get a budget of 0: every other short token is at
3402        // distance 1, so a "did you mean" carries no signal. True for single CJK
3403        // characters (one symbol = a whole word) and for two-letter codes alike.
3404        assert_eq!(nearest("七", &["一", "二", "三"]), None);
3405        assert_eq!(nearest("us", &["uk", "eu", "jp"]), None);
3406        // A multi-character CJK word still gets a sensible nearest (one wrong
3407        // character = distance 1, budget = 3/3 = 1).
3408        assert_eq!(nearest("中文字", &["中文学", "日本語"]), Some("中文学"));
3409    }
3410
3411    #[test]
3412    fn short_value_is_still_rejected_just_without_a_guess() {
3413        // The closed-world error does not depend on the suggestion: an out-of-set
3414        // single-character value is rejected exactly, only the `did you mean` is
3415        // suppressed.
3416        let src = r"
3417        PREMISE pick:
3418            ONEOF
3419                roll is 一
3420                roll is 二
3421        FACT roll is 七
3422        ";
3423        let CompileError::UnknownValue(e) = cs(src).unwrap_err() else {
3424            panic!("expected UnknownValue");
3425        };
3426        assert_eq!(e.value, "七");
3427        assert_eq!(e.suggestion, "");
3428    }
3429
3430    // --- FOR EACH / SET (bounded quantification, Phase 1) ------------------
3431
3432    #[test]
3433    fn for_each_grounds_once_per_element() {
3434        // A ONEOF body over a 2-element set: each element yields one pairwise
3435        // clause + one at-least-one clause = 2 clauses; 2 elements → 4 clauses,
3436        // and 4 distinct grounded atoms (a/b × slot m/n).
3437        let src = r"
3438        SET xs
3439            a
3440            b
3441        PREMISE slot FOR EACH t IN xs:
3442            ONEOF
3443                t slot m
3444                t slot n
3445        ";
3446        let c = cs(src).unwrap();
3447        assert_eq!(c.clauses.len(), 4);
3448        for s in ["a", "b"] {
3449            for o in ["m", "n"] {
3450                assert!(c.atoms.contains(&key(s, "slot", Some(o))));
3451            }
3452        }
3453    }
3454
3455    #[test]
3456    fn for_each_in_a_rule_derives_per_element() {
3457        // A quantified RULE grounds to one rule per element.
3458        let src = r"
3459        SET xs
3460            a
3461            b
3462        RULE r FOR EACH t IN xs:
3463            WHEN t on
3464            THEN t hot
3465        ";
3466        let c = cs(src).unwrap();
3467        assert_eq!(c.rules.len(), 2);
3468    }
3469
3470    #[test]
3471    fn for_each_over_an_undeclared_set_is_rejected() {
3472        let src = r"
3473        SET tasks
3474            a
3475        PREMISE p FOR EACH t IN taske:
3476            ONEOF
3477                t s x
3478                t s y
3479        ";
3480        let CompileError::UnknownSet {
3481            set, suggestion, ..
3482        } = cs(src).unwrap_err()
3483        else {
3484            panic!("expected UnknownSet");
3485        };
3486        assert_eq!(set, "taske");
3487        assert_eq!(suggestion, " — did you mean `tasks`?");
3488    }
3489
3490    #[test]
3491    fn for_each_closes_each_grounded_variable() {
3492        // ONEOF inside FOR EACH closes the variable per element, so an out-of-set
3493        // value on a grounded subject is a hard error (closed-world after subst).
3494        let src = r"
3495        SET xs
3496            a
3497            b
3498        PREMISE c FOR EACH t IN xs:
3499            ONEOF
3500                t color red
3501                t color blue
3502        FACT a color gren
3503        ";
3504        let CompileError::UnknownValue(e) = cs(src).unwrap_err() else {
3505            panic!("expected UnknownValue from the grounded ONEOF");
3506        };
3507        assert_eq!(e.value, "gren");
3508        assert_eq!(e.subject, "a");
3509    }
3510
3511    #[test]
3512    fn nested_for_each_is_a_parse_error() {
3513        // The structural guarantee: a second FOR EACH is unrepresentable — the
3514        // header carries exactly one, so nesting fails to parse (no domain
3515        // product can ever be written).
3516        let src = r"
3517        SET xs
3518            a
3519        PREMISE p FOR EACH x IN xs FOR EACH y IN xs:
3520            ONEOF
3521                x r y
3522                x s y
3523        ";
3524        assert!(matches!(cs(src), Err(CompileError::Parse(_))));
3525    }
3526
3527    #[test]
3528    fn relation_for_each_grounds_per_fact_pair() {
3529        // Two declared edges → the body is instantiated once per edge (two
3530        // pairwise clauses), and both edge atoms are recorded as consumed so the
3531        // ORPHAN lint will not flag them.
3532        let src = r"
3533        FACT a linked b
3534        FACT b linked c
3535        PREMISE p FOR EACH x linked y:
3536            FORBIDS
3537                x hot on
3538                y hot on
3539        ";
3540        let c = cs(src).unwrap();
3541        assert_eq!(c.clauses.len(), 2);
3542        assert_eq!(c.consumed.len(), 2);
3543        assert!(c.consumed.contains(&id(&c, &key("a", "linked", Some("b")))));
3544    }
3545
3546    #[test]
3547    fn relation_for_each_over_no_edges_is_inert() {
3548        // A relation with no matching facts grounds to nothing (vacuous), not an
3549        // error — unlike an undeclared SET.
3550        let src = r"
3551        PREMISE p FOR EACH x linked y:
3552            FORBIDS
3553                x hot on
3554                y hot on
3555        ";
3556        let c = cs(src).unwrap();
3557        assert_eq!(c.clauses.len(), 0);
3558        assert!(c.consumed.is_empty());
3559    }
3560
3561    #[test]
3562    fn close_transitive_extends_the_relation() {
3563        // a->b, b->c; CLOSE adds a->c, so a relation FOR EACH grounds over all
3564        // three pairs (without CLOSE it would be two).
3565        let src = r"
3566        FACT a r b
3567        FACT b r c
3568        CLOSE r TRANSITIVE
3569        PREMISE p FOR EACH x r y:
3570            FORBIDS
3571                x hot on
3572                y hot on
3573        ";
3574        let c = cs(src).unwrap();
3575        assert_eq!(c.clauses.len(), 3);
3576    }
3577
3578    #[test]
3579    fn close_transitive_rejects_a_cycle() {
3580        let src = r"
3581        FACT a r b
3582        FACT b r a
3583        CLOSE r TRANSITIVE
3584        ";
3585        let CompileError::CyclicRelation { relation, .. } = cs(src).unwrap_err() else {
3586            panic!("expected CyclicRelation");
3587        };
3588        assert_eq!(relation, "r");
3589    }
3590
3591    #[test]
3592    fn grounding_count_is_linear_in_the_set() {
3593        // No domain product: N elements → exactly N groundings (here N clauses,
3594        // one at-least-one per element), never N².
3595        let elems: alloc::string::String = (0..20).map(|i| alloc::format!("    e{i}\n")).collect();
3596        let src = alloc::format!(
3597            "SET xs\n{elems}PREMISE p FOR EACH t IN xs:\n    ATLEAST\n        t a\n        t b\n"
3598        );
3599        let c = cs(&src).unwrap();
3600        assert_eq!(c.clauses.len(), 20);
3601    }
3602}