Skip to main content

elenchus_compiler/
lib.rs

1//! elenchus-compiler — compiles parsed elenchus DSL into a canonical clause IR.
2//!
3//! This crate is **preparation, not solving**. It takes the AST (from
4//! `elenchus-parser`) and produces a deterministic, solver-ready intermediate
5//! representation:
6//!
7//! - **atom interner**: `(subject, predicate, object?)` → dense `u32` id,
8//!   canonically sorted so ids (and any later enumeration) are deterministic;
9//! - **desugaring**: surface CAPS sugar → `Impossible` clauses
10//!   (`EXCLUSIVE` pairwise, `WHEN…THEN` → `Impossible([A, …, NOT C])`, etc.);
11//! - **content-addressing** (sha256, mirroring vsm-guard's CAS): identical
12//!   clauses are deduped (idempotent — `P ∧ P ≡ P`), and a named construct
13//!   redefined with a different body is a `PremiseRedefinition` error.
14//!
15//! The actual reasoning (3-valued forward chaining, SAT, all-SAT, the WARNING
16//! pool, the four results) lives in `elenchus-solver`. `IMPORT` resolution is a
17//! source-agnostic [`Resolver`] that flat-merges another source into the shared
18//! atom universe ([`compile`] resolves imports; [`compile_source`] leaves them
19//! pending).
20//!
21//! # Example
22//!
23//! ```
24//! use elenchus_compiler::compile_source;
25//!
26//! // `ASSUME` lowers to a *soft* fact: the same atom universe as a `FACT`, but
27//! // one the solver may retract. Here `x a` is asserted both ways (hard + soft).
28//! let ir = compile_source("demo.vrf", "DOMAIN d\nFACT x a\nASSUME NOT x a\nCHECK x\n").unwrap();
29//! assert_eq!(ir.facts.len(), 2);
30//! assert!(ir.facts.iter().any(|f| f.soft)); // the ASSUME is the soft one
31//! ```
32#![no_std]
33// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
34#![warn(missing_docs)]
35
36extern crate alloc;
37
38#[cfg(feature = "std")]
39extern crate std;
40
41use alloc::boxed::Box;
42use alloc::collections::{BTreeMap, BTreeSet};
43use alloc::string::{String, ToString};
44use alloc::vec;
45use alloc::vec::Vec;
46use core::fmt::Write as _;
47
48/// Re-exported so downstream crates can name the syntax diagnostics carried by
49/// [`CompileError::Parse`] (and render them with a custom error limit).
50pub use elenchus_parser::Diagnostics;
51use elenchus_parser::{Atom, Body, Conn, ListOp, Literal, Statement, kw};
52use sha2::{Digest, Sha256};
53use thiserror::Error;
54
55// --- content-addressing (mirrors vsm-guard::hashing) -----------------------
56
57/// SHA-256 content addressing. Used only for dedup / redefinition / provenance,
58/// never for namespacing atoms.
59pub fn hash_hex(data: &[u8]) -> String {
60    let mut hasher = Sha256::new();
61    hasher.update(data);
62    let out = hasher.finalize();
63    let mut s = String::with_capacity(out.len() * 2);
64    for b in out {
65        let _ = write!(s, "{:02x}", b);
66    }
67    s
68}
69
70// --- IR types --------------------------------------------------------------
71
72/// Dense atom identifier (also the SAT variable number).
73pub type AtomId = u32;
74
75/// The identity of an atom: the `domain` plus the triple
76/// `(subject, predicate, object?)`, owned so it survives across merged sources.
77/// The domain is the leading sort key, so atoms group by domain; ordering is
78/// otherwise lexicographic → canonical. Two atoms with the same triple in
79/// *different* domains are distinct (no cross-domain unification).
80#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
81pub struct AtomKey {
82    /// The domain this atom belongs to (the resolved namespace, never a raw
83    /// alias). `physics.engine` and `plan.engine` are different atoms.
84    pub domain: String,
85    /// The entity the claim is about (owned copy of the parser's `subject`).
86    pub subject: String,
87    /// The relation or property asserted.
88    pub predicate: String,
89    /// Optional object; part of identity, so `has flying` ≠ `has swimming`.
90    pub object: Option<String>,
91}
92
93/// The domain context of one file being compiled: its own declared domain (where
94/// bare atoms fall) and the local names — aliases or imported domain names — it
95/// may reference other domains by. Resolving an atom's optional `domain.` prefix
96/// against this context yields its canonical [`AtomKey`] domain.
97struct DomainCtx {
98    /// The file's own declared domain (the target for unqualified atoms).
99    current: String,
100    /// `local name -> canonical domain` for every name visible in this file
101    /// (always includes `current -> current`, plus one entry per `IMPORT`).
102    aliases: BTreeMap<String, String>,
103}
104
105impl DomainCtx {
106    /// Resolve an atom's optional `domain.` prefix to a canonical domain name.
107    /// `None` → the file's own domain; a prefix not imported here is an error.
108    fn resolve(&self, prefix: Option<&str>) -> Result<String, CompileError> {
109        match prefix {
110            None => Ok(self.current.clone()),
111            Some(p) => self
112                .aliases
113                .get(p)
114                .cloned()
115                .ok_or_else(|| CompileError::UnknownDomain {
116                    domain: p.to_string(),
117                }),
118        }
119    }
120
121    /// Build the owned [`AtomKey`] for a borrowed parser [`Atom`], resolving its
122    /// domain prefix against this file's context.
123    fn key(&self, a: &Atom) -> Result<AtomKey, CompileError> {
124        Ok(AtomKey {
125            domain: self.resolve(a.domain)?,
126            subject: a.subject.to_string(),
127            predicate: a.predicate.to_string(),
128            object: a.object.map(|o| o.to_string()),
129        })
130    }
131}
132
133/// A literal as it appears *inside* an `Impossible` clause: an atom, optionally
134/// negated. `negated = true` means the literal is `NOT atom`.
135#[derive(Debug, Clone, Copy, PartialEq, Eq)]
136pub struct Lit {
137    /// Interned id of the atom (also its SAT variable number).
138    pub atom: AtomId,
139    /// `true` means this literal is `NOT atom` inside the clause.
140    pub negated: bool,
141}
142
143/// A confident truth value. UNKNOWN is the *absence* of a fact, never stored.
144#[derive(Debug, Clone, Copy, PartialEq, Eq)]
145pub enum Value {
146    /// The atom is asserted TRUE (from `FACT`).
147    True,
148    /// The atom is asserted FALSE (from `NOT`).
149    False,
150}
151
152/// Where a piece of IR came from — for readable conflict/warning pools.
153#[derive(Debug, Clone, PartialEq, Eq)]
154pub struct Origin {
155    /// The source label this came from (file name or `"<root>"`/`"<text>"`).
156    pub source: String,
157    /// 1-based line number of the originating statement.
158    pub line: u32,
159    /// The premise/rule name, if it came from a named construct.
160    pub premise: Option<String>,
161    /// Surface kind for the report, e.g. `"FACT"`, `"EXCLUSIVE"`, `"PREMISE"`.
162    pub kind: &'static str,
163}
164
165/// A confident fact (from `FACT` / `NOT`). Conflicting facts on the same atom
166/// are preserved (both kept) — the solver reports that as a CONFLICT.
167#[derive(Debug, Clone, PartialEq, Eq)]
168pub struct Fact {
169    /// The atom this fact pins down.
170    pub atom: AtomId,
171    /// The asserted truth value.
172    pub value: Value,
173    /// Where it came from (for the report).
174    pub origin: Origin,
175    /// `true` for an `ASSUME` (a *soft*, retractable hypothesis). A soft fact
176    /// behaves like a normal fact in the forward pass, but when the assumptions
177    /// cannot all hold the solver may drop it (and only it) to explain the
178    /// contradiction — a `FACT`/`NOT` is never retractable.
179    pub soft: bool,
180}
181
182/// An `Impossible` clause: the listed literals cannot all hold simultaneously.
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub struct Clause {
185    /// The literals that cannot all hold at once (an `Impossible([...])`).
186    pub lits: Vec<Lit>,
187    /// Where it came from (for the report).
188    pub origin: Origin,
189}
190
191/// A forward-chaining rule (from `RULE`): if all antecedent literals hold, derive
192/// the consequent literals.
193#[derive(Debug, Clone, PartialEq, Eq)]
194pub struct Rule {
195    /// Literals that must all hold for the rule to fire.
196    pub antecedent: Vec<Lit>,
197    /// Literals derived (asserted) when the antecedent holds.
198    pub consequent: Vec<Lit>,
199    /// Where it came from (for the report).
200    pub origin: Origin,
201}
202
203/// A `CHECK` query.
204#[derive(Debug, Clone, PartialEq, Eq)]
205pub struct Check {
206    /// Restrict the report to this subject; `None` means check everything.
207    pub subject: Option<String>,
208    /// `true` runs the backward (all-SAT) pass to detect UNDERDETERMINED.
209    pub bidirectional: bool,
210}
211
212/// The compiled IR: the solver's input.
213#[derive(Debug, Clone, PartialEq, Eq)]
214pub struct Compiled {
215    /// Indexed by [`AtomId`]; canonically sorted.
216    pub atoms: Vec<AtomKey>,
217    /// Confident assertions from `FACT`/`NOT`.
218    pub facts: Vec<Fact>,
219    /// `Impossible` clauses (desugared premises + the built-in non-contradiction).
220    pub clauses: Vec<Clause>,
221    /// Forward-chaining rules from `RULE`.
222    pub rules: Vec<Rule>,
223    /// `CHECK` queries.
224    pub checks: Vec<Check>,
225    /// Imports seen but not yet resolved (only populated by [`compile_source`];
226    /// [`compile`] resolves them, leaving this empty).
227    pub pending_imports: Vec<String>,
228    /// Advisory: imports that a file makes but never references (no `domain.atom`
229    /// from that file uses the imported domain). Structural, per-file, and inert —
230    /// it never affects the solve. Only populated by [`compile`] (an unresolved
231    /// import in [`compile_source`] cannot be classified). See [`UnusedImport`].
232    pub unused_imports: Vec<UnusedImport>,
233}
234
235/// An advisory record: a file `IMPORT`s a domain it never references. Such an
236/// import is inert — no `domain.atom` in that file mentions it, so removing it
237/// would not change the result. It is almost always a leftover or a forgotten
238/// `domain.` prefix. **Purely informational** — it never changes the verdict.
239#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
240pub struct UnusedImport {
241    /// The source that declared the unused `IMPORT`.
242    pub file: String,
243    /// The imported domain that is never referenced from `file`.
244    pub domain: String,
245    /// The local alias, if the import used `AS <alias>`.
246    pub alias: Option<String>,
247    /// 1-based line of the `IMPORT` statement in `file`.
248    pub line: u32,
249}
250
251/// Anything that can go wrong while compiling (and resolving imports).
252#[derive(Debug, Error, PartialEq, Eq)]
253pub enum CompileError {
254    /// A source failed to parse; carries the full syntax diagnostics (every
255    /// error, each as a caret block with the keyword's correct syntax). The
256    /// source label is already inside the [`Diagnostics`] header.
257    #[error("{0}")]
258    Parse(elenchus_parser::Diagnostics),
259    /// A name was reused with a different body *within the same source*.
260    #[error("'{name}' redefined with a different body")]
261    PremiseRedefinition {
262        /// The clashing premise/rule name.
263        name: String,
264    },
265    /// A source did not declare its `DOMAIN` (required, once, as the first
266    /// statement).
267    #[error("{file}: missing a DOMAIN declaration (every file must start with `DOMAIN <name>`)")]
268    MissingDomain {
269        /// The source label that lacked a `DOMAIN`.
270        file: String,
271    },
272    /// A source declared `DOMAIN` more than once (a file has exactly one domain).
273    #[error("{file}: more than one DOMAIN declaration (a file has exactly one domain)")]
274    DuplicateDomain {
275        /// The source label with the duplicate `DOMAIN`.
276        file: String,
277    },
278    /// An atom referenced a `domain.` prefix that is not the file's own domain and
279    /// was not imported in this file.
280    #[error("unknown domain '{domain}' — declare it with DOMAIN, or IMPORT it in this file")]
281    UnknownDomain {
282        /// The unresolved domain prefix.
283        domain: String,
284    },
285    /// Two imports bound the same local domain name to different domains (use a
286    /// distinct `AS <alias>` to tell them apart).
287    #[error("domain name '{alias}' is bound to two different imports (disambiguate with AS)")]
288    DomainAliasClash {
289        /// The clashing local domain name.
290        alias: String,
291    },
292    /// An `IMPORT` target could not be loaded by the [`Resolver`].
293    #[error("import not found: {0}")]
294    ImportNotFound(String),
295    /// Imports form a cycle (a source transitively imports itself).
296    #[error("circular import: {0}")]
297    CircularImport(String),
298    /// A `RULE` used `OR` in its `THEN`: forward chaining cannot derive a
299    /// disjunction (it would not know which literal to assert). Model it as a
300    /// `PREMISE` constraint instead.
301    #[error("rule '{name}' cannot derive a disjunction (OR in THEN); use a PREMISE instead")]
302    RuleDisjunctiveConsequent {
303        /// The offending rule name.
304        name: String,
305    },
306    /// A reference used a value outside the closed set an `ONEOF` declared for that
307    /// variable. Almost always a typo: the misspelling would otherwise mint a new
308    /// atom that hangs in the air as UNKNOWN. Closed-world is opt-in — it only
309    /// applies to a `(subject, predicate)` whose values an `ONEOF` enumerated.
310    /// Boxed so this comparatively large payload does not bloat every `Result`.
311    #[error(transparent)]
312    UnknownValue(Box<UnknownValue>),
313}
314
315/// Details of a closed-world violation (see [`CompileError::UnknownValue`]). Kept
316/// in its own (boxed) struct so the common error path stays small.
317#[derive(Debug, Error, PartialEq, Eq)]
318#[error(
319    "{file}:{line}: '{value}' is not a declared value of '{subject} {predicate}' \
320     — ONEOF declares {{ {declared} }}{suggestion}"
321)]
322pub struct UnknownValue {
323    /// The source the offending reference is in.
324    pub file: String,
325    /// 1-based line of the offending reference.
326    pub line: u32,
327    /// The variable's subject.
328    pub subject: String,
329    /// The variable's predicate.
330    pub predicate: String,
331    /// The out-of-set value that was used.
332    pub value: String,
333    /// The declared legal values, comma-joined (sorted).
334    pub declared: String,
335    /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
336    pub suggestion: String,
337}
338
339// --- raw (key-based) intermediate, before interning ------------------------
340// While accumulating we key everything by `AtomKey` (the owned triple) rather
341// than by `AtomId`, because ids only become stable once *all* sources are merged
342// and the atom set is sorted in `finalize`. These mirror the public IR types but
343// hold keys instead of ids.
344
345/// A literal keyed by atom identity (pre-interning counterpart of [`Lit`]).
346#[derive(Clone)]
347struct RawLit {
348    key: AtomKey,
349    negated: bool,
350}
351
352/// A fact keyed by atom identity (pre-interning counterpart of [`Fact`]).
353struct RawFact {
354    key: AtomKey,
355    value: Value,
356    origin: Origin,
357    soft: bool,
358}
359
360/// A clause keyed by atom identity (pre-interning counterpart of [`Clause`]).
361struct RawClause {
362    lits: Vec<RawLit>,
363    origin: Origin,
364}
365
366/// A rule keyed by atom identity (pre-interning counterpart of [`Rule`]).
367struct RawRule {
368    antecedent: Vec<RawLit>,
369    consequent: Vec<RawLit>,
370    origin: Origin,
371}
372
373// --- compiler --------------------------------------------------------------
374
375/// Accumulates statements from one or more sources, then interns + emits the IR.
376#[derive(Default)]
377pub struct Compiler {
378    keys: BTreeSet<AtomKey>,
379    facts: Vec<RawFact>,
380    clauses: Vec<RawClause>,
381    rules: Vec<RawRule>,
382    checks: Vec<Check>,
383    pending_imports: Vec<String>,
384    /// (source, name) → content hash of its body, for redefinition detection.
385    /// Scoped per source: premise/rule names are labels, not global identifiers,
386    /// so different files (domains) may reuse a name. A clash is only an error
387    /// *within the same source*.
388    defined: BTreeMap<(String, String), String>,
389    /// dedup of identical clauses by canonical content hash.
390    clause_sigs: BTreeSet<String>,
391    /// dedup of identical facts by (key, value).
392    fact_sigs: BTreeSet<String>,
393    /// Closed-world value sets declared by `ONEOF`: `(domain, subject, predicate)`
394    /// → the set of legal objects. Once a variable's values are enumerated by a
395    /// `ONEOF`, a reference to that variable with an object *outside* the set is a
396    /// compile error (a likely typo), not a silent new atom. Only `ONEOF` members
397    /// that carry an object register here (binary atoms have no value slot to
398    /// close). See [`Compiler::validate_closed_world`].
399    oneof_values: BTreeMap<(String, String, String), BTreeSet<String>>,
400}
401
402impl Compiler {
403    /// A fresh, empty compiler.
404    pub fn new() -> Self {
405        Self::default()
406    }
407
408    /// Parse one source and accumulate its statements. `source` is a label used
409    /// in provenance (e.g. a file name or `"<root>"`). The source must declare its
410    /// `DOMAIN`; `IMPORT`s are recorded as pending (their domains cannot be bound
411    /// without a [`Resolver`]), so a single source may only reference its own
412    /// domain. Use [`compile`] for cross-domain references.
413    pub fn add_source(&mut self, source: &str, src: &str) -> Result<(), CompileError> {
414        let program = elenchus_parser::parse(src).map_err(|mut diag| {
415            diag.set_file(source);
416            CompileError::Parse(diag)
417        })?;
418        let domain = extract_domain(&program, source)?;
419        let mut aliases = BTreeMap::new();
420        aliases.insert(domain.clone(), domain.clone());
421        let ctx = DomainCtx {
422            current: domain,
423            aliases,
424        };
425        for stmt in &program.statements {
426            match stmt {
427                Statement::Domain(_) => {}
428                Statement::Import { path, .. } => {
429                    self.pending_imports.push(path.data.to_string());
430                }
431                other => self.add_statement(source, other, &ctx)?,
432            }
433        }
434        Ok(())
435    }
436
437    /// Compile one already-resolved file's statements under its domain context.
438    fn add_resolved(&mut self, file: &ResolvedFile) -> Result<(), CompileError> {
439        let program = elenchus_parser::parse(&file.content).map_err(|mut diag| {
440            diag.set_file(&file.path);
441            CompileError::Parse(diag)
442        })?;
443        for stmt in &program.statements {
444            match stmt {
445                Statement::Import { .. } | Statement::Domain(_) => {}
446                other => self.add_statement(&file.path, other, &file.ctx)?,
447            }
448        }
449        Ok(())
450    }
451
452    /// Route one statement (never `IMPORT`/`DOMAIN` — handled by the loaders) to
453    /// the right accumulator, resolving atom domains through `ctx`.
454    fn add_statement(
455        &mut self,
456        source: &str,
457        stmt: &Statement,
458        ctx: &DomainCtx,
459    ) -> Result<(), CompileError> {
460        match stmt {
461            // Handled by `add_source` / `load_recursive`, never reach here.
462            Statement::Import { .. } | Statement::Domain(_) => {}
463            Statement::Fact(a) => self.add_fact(source, a, Value::True, kw::FACT, false, ctx)?,
464            Statement::Negation(a) => {
465                self.add_fact(source, a, Value::False, kw::NOT, false, ctx)?
466            }
467            Statement::Assume(l) => {
468                let value = if l.data.negated {
469                    Value::False
470                } else {
471                    Value::True
472                };
473                // A soft assertion shares the FACT accumulator; the atom is the
474                // literal's atom, the polarity its `NOT`, and `soft` marks it
475                // retractable. The span is the whole `ASSUME` line.
476                let located = elenchus_parser::Located {
477                    data: l.data.atom.clone(),
478                    span: l.span,
479                };
480                self.add_fact(source, &located, value, kw::ASSUME, true, ctx)?;
481            }
482            Statement::Check {
483                subject,
484                bidirectional,
485            } => self.checks.push(Check {
486                subject: subject.as_ref().map(|s| s.data.to_string()),
487                bidirectional: *bidirectional,
488            }),
489            Statement::Premise { name, body } => {
490                let line = name.span.location_line();
491                self.add_named(source, name.data, line, body, false, ctx)?;
492            }
493            Statement::Rule { name, body } => {
494                let line = name.span.location_line();
495                self.add_named(source, name.data, line, body, true, ctx)?;
496            }
497        }
498        Ok(())
499    }
500
501    /// Record an atom identity in the shared universe (deduped by the `BTreeSet`).
502    fn intern(&mut self, key: &AtomKey) {
503        if !self.keys.contains(key) {
504            self.keys.insert(key.clone());
505        }
506    }
507
508    /// Accumulate a `FACT`/`NOT`; exact duplicates (same key+value+kind) are
509    /// dropped as idempotent, while a `FACT` and a `NOT` on the same atom are
510    /// both kept so the solver can report the CONFLICT.
511    fn add_fact(
512        &mut self,
513        source: &str,
514        a: &elenchus_parser::Located<Atom>,
515        value: Value,
516        kind: &'static str,
517        soft: bool,
518        ctx: &DomainCtx,
519    ) -> Result<(), CompileError> {
520        let key = ctx.key(&a.data)?;
521        self.intern(&key);
522        let sig = alloc::format!(
523            "{}|{}|{}|{}",
524            key_sig(&key),
525            matches!(value, Value::True) as u8,
526            kind,
527            "" // facts dedup ignores line; identical FACT twice is idempotent
528        );
529        if !self.fact_sigs.insert(sig) {
530            return Ok(()); // exact duplicate fact — idempotent
531        }
532        self.facts.push(RawFact {
533            key,
534            value,
535            origin: Origin {
536                source: source.to_string(),
537                line: a.span.location_line(),
538                premise: None,
539                kind,
540            },
541            soft,
542        });
543        Ok(())
544    }
545
546    /// Handle a named construct (`PREMISE` or `RULE`). `is_rule` selects derivation
547    /// vs checking. Returns an error on redefinition with a different body.
548    fn add_named(
549        &mut self,
550        source: &str,
551        name: &str,
552        line: u32,
553        body: &Body,
554        is_rule: bool,
555        ctx: &DomainCtx,
556    ) -> Result<(), CompileError> {
557        let body_hash = hash_hex(canonical_body(name, body, is_rule, ctx)?.as_bytes());
558        let key = (source.to_string(), name.to_string());
559        match self.defined.get(&key) {
560            Some(prev) if *prev == body_hash => return Ok(()), // identical → idempotent
561            Some(_) => {
562                // Same name + different body *in the same source* — a real mistake.
563                return Err(CompileError::PremiseRedefinition {
564                    name: name.to_string(),
565                });
566            }
567            None => {
568                self.defined.insert(key, body_hash);
569            }
570        }
571
572        if is_rule {
573            // RULE always has an implication body (guaranteed by the grammar).
574            if let Body::Impl {
575                antecedent,
576                ante_conn,
577                consequent,
578                cons_conn,
579            } = body
580            {
581                // A rule *derives* its consequent; an `OR` consequent is not a
582                // single fact to assert, so reject it (use a PREMISE instead).
583                if *cons_conn == Conn::Or {
584                    return Err(CompileError::RuleDisjunctiveConsequent {
585                        name: name.to_string(),
586                    });
587                }
588                let (ante, cons) = (raw_lits(antecedent, ctx)?, raw_lits(consequent, ctx)?);
589                for l in ante.iter().chain(cons.iter()) {
590                    self.intern(&l.key);
591                }
592                let origin = self.origin(source, line, Some(name), kw::RULE);
593                match ante_conn {
594                    // a ∧ b → C : one rule firing on the whole antecedent.
595                    Conn::And => self.rules.push(RawRule {
596                        antecedent: ante,
597                        consequent: cons,
598                        origin,
599                    }),
600                    // (a ∨ b) → C == (a → C) ∧ (b → C): one rule per antecedent.
601                    Conn::Or => {
602                        for a in &ante {
603                            self.rules.push(RawRule {
604                                antecedent: vec![a.clone()],
605                                consequent: cons.clone(),
606                                origin: origin.clone(),
607                            });
608                        }
609                    }
610                }
611            }
612            return Ok(());
613        }
614
615        match body {
616            Body::List { op, atoms } => {
617                let keys: Vec<AtomKey> = atoms
618                    .iter()
619                    .map(|a| ctx.key(&a.data))
620                    .collect::<Result<_, _>>()?;
621                for k in &keys {
622                    self.intern(k);
623                }
624                let kind = list_kind(*op);
625                let origin = self.origin(source, line, Some(name), kind);
626                match op {
627                    // EXCLUSIVE / FORBIDS: "at most one" → pairwise Impossible([a_i, a_j]).
628                    ListOp::Exclusive | ListOp::Forbids => {
629                        self.emit_pairwise(&keys, &origin);
630                    }
631                    // ONEOF: pairwise (at most one) + at-least-one. A ONEOF also
632                    // *closes* each of its variables: record every member's object
633                    // as a legal value of `(domain, subject, predicate)` so a later
634                    // out-of-set reference is caught as a typo (closed-world).
635                    ListOp::OneOf => {
636                        self.emit_pairwise(&keys, &origin);
637                        self.emit_at_least_one(&keys, &origin);
638                        for k in &keys {
639                            if let Some(obj) = &k.object {
640                                self.oneof_values
641                                    .entry((
642                                        k.domain.clone(),
643                                        k.subject.clone(),
644                                        k.predicate.clone(),
645                                    ))
646                                    .or_default()
647                                    .insert(obj.clone());
648                            }
649                        }
650                    }
651                    // ATLEAST: Impossible([NOT a_1, …, NOT a_n]).
652                    ListOp::AtLeast => {
653                        self.emit_at_least_one(&keys, &origin);
654                    }
655                }
656            }
657            Body::Impl {
658                antecedent,
659                ante_conn,
660                consequent,
661                cons_conn,
662            } => {
663                // Implication A → C as `Impossible(A_true ∧ ¬C)`. We group each
664                // side by its connective and emit one clause per (ante × cons)
665                // group pair — a uniform rule covering all AND/OR combinations:
666                //   AND-ante → all its literals share every clause;
667                //   OR-ante  → one clause per literal;
668                //   AND-cons → one clause per (negated) literal;
669                //   OR-cons  → all its (negated) literals share every clause.
670                let ante = raw_lits(antecedent, ctx)?;
671                let cons = raw_lits(consequent, ctx)?;
672                for l in ante.iter().chain(cons.iter()) {
673                    self.intern(&l.key);
674                }
675                let origin = self.origin(source, line, Some(name), kw::PREMISE);
676
677                let ante_groups: Vec<Vec<RawLit>> = match ante_conn {
678                    Conn::And => vec![ante.clone()],
679                    Conn::Or => ante.iter().map(|l| vec![l.clone()]).collect(),
680                };
681                let cons_groups: Vec<Vec<RawLit>> = match cons_conn {
682                    Conn::And => cons.iter().map(|l| vec![l.clone()]).collect(),
683                    Conn::Or => vec![cons.clone()],
684                };
685                for ag in &ante_groups {
686                    for cg in &cons_groups {
687                        let mut lits = ag.clone();
688                        for c in cg {
689                            lits.push(RawLit {
690                                key: c.key.clone(),
691                                negated: !c.negated,
692                            });
693                        }
694                        self.push_clause(lits, origin.clone());
695                    }
696                }
697            }
698        }
699        Ok(())
700    }
701
702    /// Emit "at most one TRUE" as one `Impossible([a_i, a_j])` per unordered
703    /// pair. Pairwise (not a single big clause) because `Impossible([a,b,c])`
704    /// only forbids *all three* together — it would still allow two.
705    fn emit_pairwise(&mut self, keys: &[AtomKey], origin: &Origin) {
706        for i in 0..keys.len() {
707            for j in (i + 1)..keys.len() {
708                let lits = vec![
709                    RawLit {
710                        key: keys[i].clone(),
711                        negated: false,
712                    },
713                    RawLit {
714                        key: keys[j].clone(),
715                        negated: false,
716                    },
717                ];
718                self.push_clause(lits, origin.clone());
719            }
720        }
721    }
722
723    /// Emit "at least one TRUE" as a single `Impossible([NOT a_1, …, NOT a_n])`
724    /// — it is impossible for all of them to be false at once.
725    fn emit_at_least_one(&mut self, keys: &[AtomKey], origin: &Origin) {
726        let lits = keys
727            .iter()
728            .map(|k| RawLit {
729                key: k.clone(),
730                negated: true,
731            })
732            .collect();
733        self.push_clause(lits, origin.clone());
734    }
735
736    /// Append a clause unless an identical one (by canonical [`clause_sig`]) is
737    /// already present — `P ∧ P ≡ P`, so dedup keeps the IR minimal.
738    fn push_clause(&mut self, lits: Vec<RawLit>, origin: Origin) {
739        let sig = clause_sig(&lits);
740        if self.clause_sigs.insert(sig) {
741            self.clauses.push(RawClause { lits, origin });
742        }
743        // else: identical clause already present — idempotent.
744    }
745
746    /// Build an [`Origin`] for provenance from the current source/line/name.
747    fn origin(&self, source: &str, line: u32, premise: Option<&str>, kind: &'static str) -> Origin {
748        Origin {
749            source: source.to_string(),
750            line,
751            premise: premise.map(|s| s.to_string()),
752            kind,
753        }
754    }
755
756    /// Closed-world check: once an `ONEOF` enumerates a variable's values, any
757    /// reference to that `(domain, subject, predicate)` with an object outside the
758    /// declared set is rejected as a likely typo — instead of silently minting a
759    /// new atom that then "hangs in the air" as an UNKNOWN. Reports the earliest
760    /// (by source, line) offender, with a `did you mean` suggestion when a declared
761    /// value is within edit distance. Must run after all sources are accumulated
762    /// (a `ONEOF` may follow the reference) and before [`finalize`].
763    fn validate_closed_world(&self) -> Result<(), CompileError> {
764        if self.oneof_values.is_empty() {
765            return Ok(());
766        }
767        // Every atom reference reachable from a fact, clause, or rule, with the
768        // line it came from. ONEOF members appear here too (as clause literals) but
769        // are in-set by construction, so they never trip the check. `out_of_set`
770        // tests one key against its variable's declared values.
771        let out_of_set = |key: &AtomKey| -> bool {
772            key.object.as_ref().is_some_and(|obj| {
773                self.oneof_values
774                    .get(&(
775                        key.domain.clone(),
776                        key.subject.clone(),
777                        key.predicate.clone(),
778                    ))
779                    .is_some_and(|set| !set.contains(obj))
780            })
781        };
782        let mut offenders: Vec<(&str, u32, &AtomKey)> = Vec::new();
783        for f in &self.facts {
784            if out_of_set(&f.key) {
785                offenders.push((&f.origin.source, f.origin.line, &f.key));
786            }
787        }
788        for c in &self.clauses {
789            for l in &c.lits {
790                if out_of_set(&l.key) {
791                    offenders.push((&c.origin.source, c.origin.line, &l.key));
792                }
793            }
794        }
795        for r in &self.rules {
796            for l in r.antecedent.iter().chain(r.consequent.iter()) {
797                if out_of_set(&l.key) {
798                    offenders.push((&r.origin.source, r.origin.line, &l.key));
799                }
800            }
801        }
802        // Earliest offender wins, for a stable, source-ordered diagnostic.
803        let Some(&(source, line, key)) = offenders.iter().min_by(|a, b| {
804            (a.0, a.1, &a.2.subject, &a.2.object).cmp(&(b.0, b.1, &b.2.subject, &b.2.object))
805        }) else {
806            return Ok(());
807        };
808        let set = &self.oneof_values[&(
809            key.domain.clone(),
810            key.subject.clone(),
811            key.predicate.clone(),
812        )];
813        let declared: Vec<&str> = set.iter().map(|s| s.as_str()).collect(); // BTreeSet → sorted
814        let value = key.object.clone().unwrap_or_default();
815        let suggestion = nearest(&value, &declared)
816            .map(|s| alloc::format!(" — did you mean `{s}`?"))
817            .unwrap_or_default();
818        Err(CompileError::UnknownValue(Box::new(UnknownValue {
819            file: source.to_string(),
820            line,
821            subject: key.subject.clone(),
822            predicate: key.predicate.clone(),
823            value,
824            declared: declared.join(", "),
825            suggestion,
826        })))
827    }
828
829    /// Intern all atoms (canonical sort), then lower the raw IR to ids.
830    pub fn finalize(self) -> Compiled {
831        let atoms: Vec<AtomKey> = self.keys.into_iter().collect(); // BTreeSet → sorted
832        let mut id_of: BTreeMap<AtomKey, AtomId> = BTreeMap::new();
833        for (i, k) in atoms.iter().enumerate() {
834            id_of.insert(k.clone(), i as AtomId);
835        }
836        let lower = |l: &RawLit| Lit {
837            atom: id_of[&l.key],
838            negated: l.negated,
839        };
840
841        let facts = self
842            .facts
843            .into_iter()
844            .map(|f| Fact {
845                atom: id_of[&f.key],
846                value: f.value,
847                origin: f.origin,
848                soft: f.soft,
849            })
850            .collect();
851        let clauses = self
852            .clauses
853            .into_iter()
854            .map(|c| Clause {
855                lits: c.lits.iter().map(lower).collect(),
856                origin: c.origin,
857            })
858            .collect();
859        let rules = self
860            .rules
861            .into_iter()
862            .map(|r| Rule {
863                antecedent: r.antecedent.iter().map(lower).collect(),
864                consequent: r.consequent.iter().map(lower).collect(),
865                origin: r.origin,
866            })
867            .collect();
868
869        Compiled {
870            atoms,
871            facts,
872            clauses,
873            rules,
874            checks: self.checks,
875            pending_imports: self.pending_imports,
876            unused_imports: Vec::new(), // filled by `compile` (advisory, post-resolution)
877        }
878    }
879}
880
881/// Convenience: compile a single source into the IR. `IMPORT`s are recorded as
882/// pending, not resolved (use [`compile`] with a [`Resolver`] to resolve them).
883pub fn compile_source(source: &str, src: &str) -> Result<Compiled, CompileError> {
884    let mut c = Compiler::new();
885    c.add_source(source, src)?;
886    c.validate_closed_world()?;
887    Ok(c.finalize())
888}
889
890// --- import resolution (source-agnostic) -----------------------------------
891
892/// Resolves `IMPORT "path"` to source text. The engine is source-agnostic: it
893/// consumes strings, so a file is merely one backing store. Mirrors
894/// vsm-grammar's `SourceResolver`.
895pub trait Resolver {
896    /// Load the raw source text for a resolved path.
897    fn load(&self, path: &str) -> Result<String, CompileError>;
898
899    /// Normalize `relative` against the importing source `base`.
900    /// Default: paths are absolute names, returned unchanged.
901    fn resolve(&self, _base: &str, relative: &str) -> String {
902        relative.to_string()
903    }
904}
905
906/// An in-memory resolver: serves sources from a name → content map. Pure
907/// `no_std`. Mirrors vsm-grammar's `MemoryResolver`.
908#[derive(Default)]
909pub struct MemoryResolver {
910    sources: BTreeMap<String, String>,
911}
912
913impl MemoryResolver {
914    /// An empty resolver with no sources.
915    pub fn new() -> Self {
916        Self::default()
917    }
918
919    /// Register `content` under `path`; returns `&mut self` for chaining.
920    pub fn add(&mut self, path: &str, content: &str) -> &mut Self {
921        self.sources.insert(path.to_string(), content.to_string());
922        self
923    }
924}
925
926impl Resolver for MemoryResolver {
927    fn load(&self, path: &str) -> Result<String, CompileError> {
928        self.sources
929            .get(path)
930            .cloned()
931            .ok_or_else(|| CompileError::ImportNotFound(path.to_string()))
932    }
933}
934
935/// A filesystem-backed resolver. Mirrors vsm-grammar's `FileResolver`:
936/// relative imports resolve against the importing file's directory, with manual
937/// `..` normalization (no canonicalization, to keep a virtual layout).
938#[cfg(feature = "std")]
939pub struct FileResolver;
940
941#[cfg(feature = "std")]
942impl Resolver for FileResolver {
943    fn load(&self, path: &str) -> Result<String, CompileError> {
944        std::fs::read_to_string(path)
945            .map_err(|e| CompileError::ImportNotFound(alloc::format!("{}: {}", path, e)))
946    }
947
948    fn resolve(&self, base: &str, relative: &str) -> String {
949        use std::path::{Component, Path, PathBuf};
950        let parent = Path::new(base).parent().unwrap_or_else(|| Path::new("."));
951        let joined = parent.join(relative);
952        let mut out = PathBuf::new();
953        for component in joined.components() {
954            match component {
955                Component::ParentDir => {
956                    out.pop();
957                }
958                Component::CurDir => {}
959                c => out.push(c),
960            }
961        }
962        // Normalize to forward slashes so resolved paths (and therefore the
963        // provenance recorded in the IR) are identical on Windows and Unix.
964        // Windows `std::fs` accepts `/` just fine.
965        out.to_string_lossy().replace('\\', "/")
966    }
967}
968
969/// One resolved source ready to compile: its provenance path, raw text, and the
970/// domain context (own domain + import-alias bindings) its atoms resolve against.
971struct ResolvedFile {
972    path: String,
973    content: String,
974    ctx: DomainCtx,
975}
976
977/// Compile a root source and all its transitive `IMPORT`s into one IR.
978///
979/// Each file is keyed by `DOMAIN`; atoms unify only within a domain. Imports are
980/// referenced by `<domain>.<atom>` and visibility is file-local (naming is not
981/// transitive, though a dependency's clauses still participate). Sources are
982/// content-addressed (sha256): a source reached by several paths is compiled once
983/// (so a diamond — or an exponential fan-out — stays linear, never blowing up),
984/// and an import cycle is an error.
985///
986/// Resolution is **iterative** (an explicit work stack, not native recursion), so
987/// an arbitrarily deep import chain cannot overflow the call stack.
988///
989/// Premise/rule names are per-source labels, not global identifiers: different
990/// files may reuse a name, and the report qualifies them by source. A name reused
991/// with a different body is an error only *within the same source*.
992pub fn compile<R: Resolver>(root: &str, resolver: &R) -> Result<Compiled, CompileError> {
993    let (files, unused_imports) = resolve_graph(root, resolver)?;
994    let mut c = Compiler::new();
995    for file in &files {
996        c.add_resolved(file)?;
997    }
998    c.validate_closed_world()?;
999    let mut compiled = c.finalize();
1000    compiled.unused_imports = unused_imports;
1001    Ok(compiled)
1002}
1003
1004/// One `IMPORT` edge: the optional local alias, the resolved child path, and the
1005/// `IMPORT` line (for the unused-import advisory).
1006struct ImportEdge {
1007    alias: Option<String>,
1008    child_path: String,
1009    line: u32,
1010}
1011
1012/// A discovered source during graph resolution: its first-seen path, raw text,
1013/// declared domain, import edges, and the set of domain prefixes its atoms use
1014/// (`None` = its own domain; `Some(p)` = a `p.` prefix) — used to flag imports
1015/// that the file never references.
1016struct DiscoveredFile {
1017    path: String,
1018    content: String,
1019    domain: String,
1020    edges: Vec<ImportEdge>,
1021    used_prefixes: BTreeSet<Option<String>>,
1022}
1023
1024/// Resolve the whole import graph reachable from `root` into a flat list of
1025/// [`ResolvedFile`]s, each distinct source appearing once.
1026///
1027/// Iterative depth-first traversal with an explicit work stack (`Enter`/`Exit`
1028/// frames) — no native recursion, so depth is unbounded without risking a stack
1029/// overflow. Memoized by content hash (a diamond/repeat is visited once); a hash
1030/// re-encountered while still on the active path is a [`CompileError::CircularImport`].
1031fn resolve_graph<R: Resolver>(
1032    root: &str,
1033    resolver: &R,
1034) -> Result<(Vec<ResolvedFile>, Vec<UnusedImport>), CompileError> {
1035    /// One unit of pending work on the traversal stack.
1036    enum Step {
1037        /// Visit a file at this resolved path (load, parse, enqueue its imports).
1038        Enter(String),
1039        /// Mark this content hash finished (pop it off the active path).
1040        Exit(String),
1041    }
1042
1043    let mut discovered: BTreeMap<String, DiscoveredFile> = BTreeMap::new(); // by hash
1044    let mut path_hash: BTreeMap<String, String> = BTreeMap::new(); // resolved path → hash
1045    let mut order: Vec<String> = Vec::new(); // finish order, by hash
1046    let mut active: BTreeSet<String> = BTreeSet::new(); // hashes on the current DFS path
1047    let mut work: Vec<Step> = vec![Step::Enter(root.to_string())];
1048
1049    while let Some(step) = work.pop() {
1050        match step {
1051            Step::Exit(hash) => {
1052                active.remove(&hash);
1053                order.push(hash);
1054            }
1055            Step::Enter(path) => {
1056                let content = resolver.load(&path)?;
1057                let hash = hash_hex(content.as_bytes());
1058                path_hash.insert(path.clone(), hash.clone());
1059                if active.contains(&hash) {
1060                    return Err(CompileError::CircularImport(path)); // back-edge to an ancestor
1061                }
1062                if discovered.contains_key(&hash) {
1063                    continue; // already fully resolved by another path — dedup
1064                }
1065                let program = elenchus_parser::parse(&content).map_err(|mut diag| {
1066                    diag.set_file(&path);
1067                    CompileError::Parse(diag)
1068                })?;
1069                let domain = extract_domain(&program, &path)?;
1070                let mut edges = Vec::new();
1071                let mut used_prefixes = BTreeSet::new();
1072                for stmt in &program.statements {
1073                    if let Statement::Import { path: p, alias } = stmt {
1074                        edges.push(ImportEdge {
1075                            alias: alias.as_ref().map(|a| a.data.to_string()),
1076                            child_path: resolver.resolve(&path, p.data),
1077                            line: p.span.location_line(),
1078                        });
1079                    } else {
1080                        collect_prefixes(stmt, &mut used_prefixes);
1081                    }
1082                }
1083                drop(program); // release the borrow on `content` before moving it
1084                active.insert(hash.clone());
1085                work.push(Step::Exit(hash.clone()));
1086                for e in edges.iter().rev() {
1087                    work.push(Step::Enter(e.child_path.clone()));
1088                }
1089                discovered.insert(
1090                    hash,
1091                    DiscoveredFile {
1092                        path,
1093                        content,
1094                        domain,
1095                        edges,
1096                        used_prefixes,
1097                    },
1098                );
1099            }
1100        }
1101    }
1102
1103    // Build each file's domain context now that every domain is known.
1104    // Look up every file's domain (small strings) so we can then *move* each
1105    // file's (potentially large) content out of `discovered` instead of cloning.
1106    let domain_of: BTreeMap<&str, &str> = discovered
1107        .iter()
1108        .map(|(h, f)| (h.as_str(), f.domain.as_str()))
1109        .collect();
1110
1111    let mut out = Vec::with_capacity(order.len());
1112    let mut unused: Vec<UnusedImport> = Vec::new();
1113    for hash in &order {
1114        let file = &discovered[hash];
1115        let mut aliases = BTreeMap::new();
1116        aliases.insert(file.domain.clone(), file.domain.clone());
1117        for edge in &file.edges {
1118            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
1119            let bind = edge
1120                .alias
1121                .clone()
1122                .unwrap_or_else(|| child_domain.to_string());
1123            match aliases.get(&bind) {
1124                Some(existing) if existing != child_domain => {
1125                    return Err(CompileError::DomainAliasClash { alias: bind });
1126                }
1127                _ => {
1128                    aliases.insert(bind, child_domain.to_string());
1129                }
1130            }
1131        }
1132
1133        // The domains this file actually references (each used prefix resolved
1134        // against its own domain / imports). An imported domain absent from this
1135        // set is an unused import.
1136        let referenced: BTreeSet<&str> = file
1137            .used_prefixes
1138            .iter()
1139            .filter_map(|p| match p {
1140                None => Some(file.domain.as_str()),
1141                Some(name) => aliases.get(name).map(|d| d.as_str()),
1142            })
1143            .collect();
1144        for edge in &file.edges {
1145            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
1146            if !referenced.contains(child_domain) {
1147                unused.push(UnusedImport {
1148                    file: file.path.clone(),
1149                    domain: child_domain.to_string(),
1150                    alias: edge.alias.clone(),
1151                    line: edge.line,
1152                });
1153            }
1154        }
1155
1156        let ctx = DomainCtx {
1157            current: file.domain.clone(),
1158            aliases,
1159        };
1160        out.push((hash.clone(), ctx));
1161    }
1162    unused.sort();
1163
1164    // Now move content/path out of `discovered` (no large clones) and pair with
1165    // the contexts built above.
1166    let files = out
1167        .into_iter()
1168        .map(|(hash, ctx)| {
1169            let file = discovered.remove(&hash).expect("hash was discovered");
1170            ResolvedFile {
1171                path: file.path,
1172                content: file.content,
1173                ctx,
1174            }
1175        })
1176        .collect();
1177    Ok((files, unused))
1178}
1179
1180/// Collect the domain prefixes used by a statement's atoms into `out` (`None` for
1181/// a bare atom, `Some(p)` for a `p.`-qualified one) — feeds the unused-import lint.
1182fn collect_prefixes(stmt: &Statement, out: &mut BTreeSet<Option<String>>) {
1183    let mut add = |a: &Atom| {
1184        out.insert(a.domain.map(|d| d.to_string()));
1185    };
1186    match stmt {
1187        Statement::Fact(a) | Statement::Negation(a) => add(&a.data),
1188        Statement::Assume(l) => add(&l.data.atom),
1189        Statement::Premise { body, .. } | Statement::Rule { body, .. } => match body {
1190            Body::List { atoms, .. } => atoms.iter().for_each(|a| add(&a.data)),
1191            Body::Impl {
1192                antecedent,
1193                consequent,
1194                ..
1195            } => antecedent
1196                .iter()
1197                .chain(consequent)
1198                .for_each(|l| add(&l.data.atom)),
1199        },
1200        Statement::Domain(_) | Statement::Import { .. } | Statement::Check { .. } => {}
1201    }
1202}
1203
1204/// The single `DOMAIN` a source declares, or an error if it has none or several.
1205fn extract_domain(
1206    program: &elenchus_parser::Program,
1207    source: &str,
1208) -> Result<String, CompileError> {
1209    let mut found: Option<String> = None;
1210    for stmt in &program.statements {
1211        if let Statement::Domain(name) = stmt {
1212            if found.is_some() {
1213                return Err(CompileError::DuplicateDomain {
1214                    file: source.to_string(),
1215                });
1216            }
1217            found = Some(name.data.to_string());
1218        }
1219    }
1220    found.ok_or_else(|| CompileError::MissingDomain {
1221        file: source.to_string(),
1222    })
1223}
1224
1225// --- helpers ---------------------------------------------------------------
1226
1227/// Levenshtein edit distance over Unicode scalar values. Small inputs (atom
1228/// names), so the simple two-row DP is plenty.
1229fn levenshtein(a: &str, b: &str) -> usize {
1230    let b: Vec<char> = b.chars().collect();
1231    let mut prev: Vec<usize> = (0..=b.len()).collect();
1232    let mut cur = vec![0usize; b.len() + 1];
1233    for (i, ca) in a.chars().enumerate() {
1234        cur[0] = i + 1;
1235        for (j, &cb) in b.iter().enumerate() {
1236            let cost = if ca == cb { 0 } else { 1 };
1237            cur[j + 1] = (prev[j] + cost).min(prev[j + 1] + 1).min(cur[j] + 1);
1238        }
1239        core::mem::swap(&mut prev, &mut cur);
1240    }
1241    prev[b.len()]
1242}
1243
1244/// The closest candidate to `word` within an edit-distance threshold, or `None`
1245/// when nothing is close enough to be a useful "did you mean".
1246///
1247/// The threshold scales with length (`len / 3`, in Unicode scalars) and is **not**
1248/// floored at 1: a value of 1–2 characters yields a budget of 0, so no suggestion
1249/// is offered. This is deliberate — for very short tokens (a single CJK character,
1250/// where one symbol is a whole word, or a two-letter code) every other short value
1251/// sits at distance 1, so a "did you mean" there is pure noise, not a typo cue. The
1252/// rejection itself is exact (set membership), so suppressing the guess never hides
1253/// a real error; it only withholds a meaningless one. Longer names tolerate a slip
1254/// or two, mirroring the spirit of the solver's typo-hint lint.
1255fn nearest<'a>(word: &str, candidates: &[&'a str]) -> Option<&'a str> {
1256    let budget = word.chars().count() / 3;
1257    if budget == 0 {
1258        return None;
1259    }
1260    candidates
1261        .iter()
1262        .map(|&c| (levenshtein(word, c), c))
1263        .filter(|&(d, _)| d <= budget)
1264        .min_by_key(|&(d, _)| d)
1265        .map(|(_, c)| c)
1266}
1267
1268/// Lower parsed, located literals to key-based [`RawLit`]s (drops spans),
1269/// resolving each atom's domain through `ctx`.
1270fn raw_lits(
1271    lits: &[elenchus_parser::Located<Literal>],
1272    ctx: &DomainCtx,
1273) -> Result<Vec<RawLit>, CompileError> {
1274    lits.iter()
1275        .map(|l| {
1276            Ok(RawLit {
1277                key: ctx.key(&l.data.atom)?,
1278                negated: l.data.negated,
1279            })
1280        })
1281        .collect()
1282}
1283
1284/// The surface keyword for a list op, used as [`Origin::kind`] in the report.
1285fn list_kind(op: ListOp) -> &'static str {
1286    match op {
1287        ListOp::Exclusive => kw::EXCLUSIVE,
1288        ListOp::Forbids => kw::FORBIDS,
1289        ListOp::OneOf => kw::ONEOF,
1290        ListOp::AtLeast => kw::ATLEAST,
1291    }
1292}
1293
1294/// Stable `domain|subject|predicate|object` string for an atom key (the unit from
1295/// which clause/fact/body signatures are built). Includes the domain so atoms in
1296/// different domains never share a signature.
1297fn key_sig(k: &AtomKey) -> String {
1298    alloc::format!(
1299        "{}|{}|{}|{}",
1300        k.domain,
1301        k.subject,
1302        k.predicate,
1303        k.object.as_deref().unwrap_or("")
1304    )
1305}
1306
1307/// Canonical, order-independent signature of a clause's literals (for dedup).
1308fn clause_sig(lits: &[RawLit]) -> String {
1309    let mut parts: Vec<String> = lits
1310        .iter()
1311        .map(|l| alloc::format!("{}|{}", key_sig(&l.key), l.negated as u8))
1312        .collect();
1313    parts.sort();
1314    parts.dedup();
1315    parts.join(";")
1316}
1317
1318/// Canonical body string for a named construct, hashed for redefinition checks.
1319/// Resolves atom domains through `ctx` so the signature keys on resolved identity.
1320fn canonical_body(
1321    name: &str,
1322    body: &Body,
1323    is_rule: bool,
1324    ctx: &DomainCtx,
1325) -> Result<String, CompileError> {
1326    let mut s = String::new();
1327    let _ = write!(s, "{}|{}|", if is_rule { "RULE" } else { "PREMISE" }, name);
1328    match body {
1329        Body::List { op, atoms } => {
1330            let _ = write!(s, "LIST|{}|", list_kind(*op));
1331            let mut keys: Vec<String> = atoms
1332                .iter()
1333                .map(|a| Ok(key_sig(&ctx.key(&a.data)?)))
1334                .collect::<Result<_, CompileError>>()?;
1335            keys.sort();
1336            s.push_str(&keys.join(";"));
1337        }
1338        Body::Impl {
1339            antecedent,
1340            ante_conn,
1341            consequent,
1342            cons_conn,
1343        } => {
1344            let conn = |c: &Conn| if *c == Conn::Or { "OR" } else { "AND" };
1345            s.push_str("IMPL|ANTE|");
1346            s.push_str(conn(ante_conn));
1347            s.push('|');
1348            s.push_str(&lit_sigs(antecedent, ctx)?);
1349            s.push_str("|CONS|");
1350            s.push_str(conn(cons_conn));
1351            s.push('|');
1352            s.push_str(&lit_sigs(consequent, ctx)?);
1353        }
1354    }
1355    Ok(s)
1356}
1357
1358/// Sorted `key|negated` signature of a literal list (order-independent), used
1359/// inside [`canonical_body`] so reordering a body does not look like a redefinition.
1360fn lit_sigs(
1361    lits: &[elenchus_parser::Located<Literal>],
1362    ctx: &DomainCtx,
1363) -> Result<String, CompileError> {
1364    let mut parts: Vec<String> = lits
1365        .iter()
1366        .map(|l| {
1367            Ok(alloc::format!(
1368                "{}|{}",
1369                key_sig(&ctx.key(&l.data.atom)?),
1370                l.data.negated as u8
1371            ))
1372        })
1373        .collect::<Result<_, CompileError>>()?;
1374    parts.sort();
1375    Ok(parts.join(";"))
1376}
1377
1378#[cfg(test)]
1379mod tests {
1380    use super::*;
1381
1382    /// Compile a single inline source under a default `DOMAIN t`, so test
1383    /// programs need not repeat the declaration. Atoms land in domain `t`.
1384    fn cs(src: &str) -> Result<Compiled, CompileError> {
1385        compile_source("<t>", &alloc::format!("DOMAIN t\n{src}"))
1386    }
1387
1388    /// An atom key in the default test domain `t`.
1389    fn key(subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
1390        key_in("t", subject, predicate, object)
1391    }
1392
1393    /// An atom key in an explicit domain.
1394    fn key_in(domain: &str, subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
1395        AtomKey {
1396            domain: domain.to_string(),
1397            subject: subject.to_string(),
1398            predicate: predicate.to_string(),
1399            object: object.map(|o| o.to_string()),
1400        }
1401    }
1402
1403    fn id(c: &Compiled, k: &AtomKey) -> AtomId {
1404        c.atoms.iter().position(|a| a == k).unwrap() as AtomId
1405    }
1406
1407    #[test]
1408    fn exclusive_unfolds_pairwise() {
1409        let src = r#"
1410        PREMISE e:
1411            EXCLUSIVE
1412                x a
1413                x b
1414                x c
1415        "#;
1416        let c = cs(src).unwrap();
1417        // C(3,2) = 3 clauses, each of 2 positive literals.
1418        assert_eq!(c.clauses.len(), 3);
1419        for cl in &c.clauses {
1420            assert_eq!(cl.lits.len(), 2);
1421            assert!(cl.lits.iter().all(|l| !l.negated));
1422        }
1423    }
1424
1425    #[test]
1426    fn implication_negates_consequent() {
1427        // WHEN x a THEN x b  ==  Impossible([x a, NOT x b])
1428        let src = r#"
1429        PREMISE r:
1430            WHEN x a
1431            THEN x b
1432        "#;
1433        let c = cs(src).unwrap();
1434        assert_eq!(c.clauses.len(), 1);
1435        let cl = &c.clauses[0];
1436        assert_eq!(cl.lits.len(), 2);
1437        let a = id(&c, &key("x", "a", None));
1438        let b = id(&c, &key("x", "b", None));
1439        assert!(cl.lits.contains(&Lit {
1440            atom: a,
1441            negated: false
1442        }));
1443        assert!(cl.lits.contains(&Lit {
1444            atom: b,
1445            negated: true
1446        }));
1447    }
1448
1449    #[test]
1450    fn negated_consequent_flips_to_positive() {
1451        // THEN NOT x b  →  NOT(NOT x b) = x b positive inside Impossible
1452        let src = r#"
1453        PREMISE r:
1454            WHEN x a
1455            THEN NOT x b
1456        "#;
1457        let c = cs(src).unwrap();
1458        let b = id(&c, &key("x", "b", None));
1459        assert!(c.clauses[0].lits.contains(&Lit {
1460            atom: b,
1461            negated: false
1462        }));
1463    }
1464
1465    #[test]
1466    fn consequent_or_is_one_clause_with_all_negated() {
1467        // WHEN x p THEN x a OR x b  ==  Impossible([x p, NOT x a, NOT x b])
1468        let src = r#"
1469        PREMISE r:
1470            WHEN x p
1471            THEN x a
1472            OR x b
1473        "#;
1474        let c = cs(src).unwrap();
1475        assert_eq!(c.clauses.len(), 1);
1476        let cl = &c.clauses[0];
1477        assert_eq!(cl.lits.len(), 3);
1478        let p = id(&c, &key("x", "p", None));
1479        let a = id(&c, &key("x", "a", None));
1480        let b = id(&c, &key("x", "b", None));
1481        assert!(cl.lits.contains(&Lit {
1482            atom: p,
1483            negated: false
1484        }));
1485        assert!(cl.lits.contains(&Lit {
1486            atom: a,
1487            negated: true
1488        }));
1489        assert!(cl.lits.contains(&Lit {
1490            atom: b,
1491            negated: true
1492        }));
1493    }
1494
1495    #[test]
1496    fn antecedent_or_is_one_clause_per_disjunct() {
1497        // WHEN x a OR x b THEN x c
1498        //   == Impossible([x a, NOT x c]) ∧ Impossible([x b, NOT x c])
1499        let src = r#"
1500        PREMISE r:
1501            WHEN x a
1502            OR x b
1503            THEN x c
1504        "#;
1505        let c = cs(src).unwrap();
1506        assert_eq!(c.clauses.len(), 2);
1507        let a = id(&c, &key("x", "a", None));
1508        let b = id(&c, &key("x", "b", None));
1509        let cc = id(&c, &key("x", "c", None));
1510        // every clause has exactly two lits and carries NOT c
1511        for cl in &c.clauses {
1512            assert_eq!(cl.lits.len(), 2);
1513            assert!(cl.lits.contains(&Lit {
1514                atom: cc,
1515                negated: true
1516            }));
1517        }
1518        let has = |atom| {
1519            c.clauses.iter().any(|cl| {
1520                cl.lits.contains(&Lit {
1521                    atom,
1522                    negated: false,
1523                })
1524            })
1525        };
1526        assert!(has(a) && has(b));
1527    }
1528
1529    #[test]
1530    fn antecedent_or_with_consequent_or_distributes() {
1531        // (a ∨ b) → (c ∨ d): Impossible([a,¬c,¬d]) ∧ Impossible([b,¬c,¬d])
1532        let src = r#"
1533        PREMISE r:
1534            WHEN x a
1535            OR x b
1536            THEN x c
1537            OR x d
1538        "#;
1539        let c = cs(src).unwrap();
1540        assert_eq!(c.clauses.len(), 2);
1541        for cl in &c.clauses {
1542            assert_eq!(cl.lits.len(), 3);
1543        }
1544    }
1545
1546    #[test]
1547    fn rule_with_or_antecedent_splits_into_two_rules() {
1548        // (a ∨ b) → c derives c whenever either fires: two single-antecedent rules.
1549        let src = r#"
1550        RULE r:
1551            WHEN x a
1552            OR x b
1553            THEN x c
1554        "#;
1555        let c = cs(src).unwrap();
1556        assert_eq!(c.rules.len(), 2);
1557        assert!(
1558            c.rules
1559                .iter()
1560                .all(|r| r.antecedent.len() == 1 && r.consequent.len() == 1)
1561        );
1562    }
1563
1564    #[test]
1565    fn rule_with_or_consequent_is_rejected() {
1566        // A rule cannot derive a disjunction — must be a PREMISE.
1567        let src = r#"
1568        RULE r:
1569            WHEN x a
1570            THEN x b
1571            OR x c
1572        "#;
1573        let err = cs(src).unwrap_err();
1574        assert!(matches!(
1575            err,
1576            CompileError::RuleDisjunctiveConsequent { .. }
1577        ));
1578    }
1579
1580    #[test]
1581    fn oneof_is_pairwise_plus_at_least_one() {
1582        let src = r#"
1583        PREMISE o:
1584            ONEOF
1585                x a
1586                x b
1587        "#;
1588        let c = cs(src).unwrap();
1589        // pairwise C(2,2)=1 + 1 at-least-one = 2 clauses
1590        assert_eq!(c.clauses.len(), 2);
1591        // the at-least-one clause is the all-negated one
1592        assert!(c.clauses.iter().any(|cl| cl.lits.iter().all(|l| l.negated)));
1593    }
1594
1595    #[test]
1596    fn atleast_is_one_negated_clause() {
1597        let src = r#"
1598        PREMISE a:
1599            ATLEAST
1600                x a
1601                x b
1602                x c
1603        "#;
1604        let c = cs(src).unwrap();
1605        assert_eq!(c.clauses.len(), 1);
1606        assert_eq!(c.clauses[0].lits.len(), 3);
1607        assert!(c.clauses[0].lits.iter().all(|l| l.negated));
1608    }
1609
1610    #[test]
1611    fn rules_are_separate_from_clauses() {
1612        let src = r#"
1613        RULE needs:
1614            WHEN x a
1615            THEN x b
1616        "#;
1617        let c = cs(src).unwrap();
1618        assert_eq!(c.clauses.len(), 0);
1619        assert_eq!(c.rules.len(), 1);
1620        assert_eq!(c.rules[0].antecedent.len(), 1);
1621        assert_eq!(c.rules[0].consequent.len(), 1);
1622    }
1623
1624    #[test]
1625    fn atoms_are_canonically_sorted() {
1626        let src = r#"
1627        FACT z z
1628        FACT a a
1629        FACT m m
1630        "#;
1631        let c = cs(src).unwrap();
1632        let mut sorted = c.atoms.clone();
1633        sorted.sort();
1634        assert_eq!(c.atoms, sorted);
1635    }
1636
1637    #[test]
1638    fn duplicate_premise_is_idempotent() {
1639        let src = r#"
1640        PREMISE e:
1641            EXCLUSIVE
1642                x a
1643                x b
1644        PREMISE e:
1645            EXCLUSIVE
1646                x a
1647                x b
1648        "#;
1649        let c = cs(src).unwrap();
1650        assert_eq!(c.clauses.len(), 1);
1651    }
1652
1653    #[test]
1654    fn redefinition_with_different_body_errors() {
1655        let src = r#"
1656        PREMISE e:
1657            EXCLUSIVE
1658                x a
1659                x b
1660        PREMISE e:
1661            EXCLUSIVE
1662                x a
1663                x c
1664        "#;
1665        let err = cs(src).unwrap_err();
1666        assert_eq!(
1667            err,
1668            CompileError::PremiseRedefinition {
1669                name: "e".to_string()
1670            }
1671        );
1672    }
1673
1674    #[test]
1675    fn duplicate_fact_is_idempotent() {
1676        let c = cs("FACT x a\nFACT x a\n").unwrap();
1677        assert_eq!(c.facts.len(), 1);
1678    }
1679
1680    #[test]
1681    fn conflicting_facts_are_both_kept() {
1682        // FACT X + NOT X is a CONFLICT for the solver, not a compile error.
1683        let c = cs("FACT x a\nNOT x a\n").unwrap();
1684        assert_eq!(c.facts.len(), 2);
1685    }
1686
1687    #[test]
1688    fn import_is_recorded_pending() {
1689        let c = cs("IMPORT \"physics.vrf\"\nFACT x a\n").unwrap();
1690        assert_eq!(c.pending_imports, vec!["physics.vrf".to_string()]);
1691    }
1692
1693    #[test]
1694    fn qualified_fact_lands_in_the_imported_domain() {
1695        // The library's premise is about `physics.Engine_X has fuel`; the main file
1696        // asserts a fact qualified INTO that domain, so the two share one atom id.
1697        let mut r = MemoryResolver::new();
1698        r.add(
1699            "lib.vrf",
1700            r#"
1701        DOMAIN physics
1702        PREMISE needs_fuel:
1703            WHEN Engine_X has engine
1704            THEN Engine_X has fuel
1705        "#,
1706        );
1707        r.add(
1708            "main.vrf",
1709            r#"
1710        DOMAIN main
1711        IMPORT "lib.vrf"
1712        FACT physics.Engine_X has engine
1713        FACT physics.Engine_X has fuel
1714        "#,
1715        );
1716        let c = compile("main.vrf", &r).unwrap();
1717        assert!(c.pending_imports.is_empty());
1718        assert_eq!(c.clauses.len(), 1); // the imported premise
1719        assert_eq!(c.facts.len(), 2);
1720
1721        // `physics.Engine_X has fuel` from the FACT and the imported premise share an id.
1722        let fuel = key_in("physics", "Engine_X", "has", Some("fuel"));
1723        let fuel_id = id(&c, &fuel);
1724        assert!(c.facts.iter().any(|f| f.atom == fuel_id));
1725        assert!(c.clauses[0].lits.iter().any(|l| l.atom == fuel_id));
1726    }
1727
1728    #[test]
1729    fn same_triple_in_different_domains_does_not_unify() {
1730        // Without a domain prefix the fact lands in `main`, NOT `physics`, so it is
1731        // a distinct atom from the library's `physics.Engine_X has fuel`.
1732        let mut r = MemoryResolver::new();
1733        r.add("lib.vrf", "DOMAIN physics\nFACT Engine_X has fuel\n");
1734        r.add(
1735            "main.vrf",
1736            "DOMAIN main\nIMPORT \"lib.vrf\"\nFACT Engine_X has fuel\n",
1737        );
1738        let c = compile("main.vrf", &r).unwrap();
1739        // Two distinct atoms: physics.Engine_X has fuel and main.Engine_X has fuel.
1740        assert!(c.atoms.iter().any(|a| a.domain == "physics"));
1741        assert!(c.atoms.iter().any(|a| a.domain == "main"));
1742        assert_eq!(
1743            c.atoms
1744                .iter()
1745                .filter(|a| a.subject == "Engine_X" && a.predicate == "has")
1746                .count(),
1747            2
1748        );
1749    }
1750
1751    #[test]
1752    fn import_alias_binds_a_local_domain_name() {
1753        // `AS phys` lets the consumer reference the imported domain by a local name.
1754        let mut r = MemoryResolver::new();
1755        r.add("lib.vrf", "DOMAIN physics\nFACT Motor over_200\n");
1756        r.add(
1757            "main.vrf",
1758            "DOMAIN main\nIMPORT \"lib.vrf\" AS phys\nFACT phys.Motor over_100\n",
1759        );
1760        let c = compile("main.vrf", &r).unwrap();
1761        // Both facts live in the physics domain (one via its own name, one via alias).
1762        assert_eq!(c.atoms.iter().filter(|a| a.domain == "physics").count(), 2);
1763    }
1764
1765    #[test]
1766    fn unknown_domain_reference_errors() {
1767        // Referencing a domain that is neither this file's nor imported here fails.
1768        let err = cs("FACT ghost.x a\n").unwrap_err();
1769        assert!(matches!(err, CompileError::UnknownDomain { .. }));
1770    }
1771
1772    #[test]
1773    fn imports_are_not_transitive_for_naming() {
1774        // main imports physics; physics imports math. main may NOT name math.
1775        let mut r = MemoryResolver::new();
1776        r.add("math.vrf", "DOMAIN math\nFACT foo bar\n");
1777        r.add(
1778            "physics.vrf",
1779            "DOMAIN physics\nIMPORT \"math.vrf\"\nFACT Motor over_100\n",
1780        );
1781        r.add(
1782            "main.vrf",
1783            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT math.foo bar\n",
1784        );
1785        let err = compile("main.vrf", &r).unwrap_err();
1786        assert!(matches!(err, CompileError::UnknownDomain { .. }));
1787    }
1788
1789    #[test]
1790    fn transitive_dependency_clauses_still_load() {
1791        // Even though main can't *name* math, math's clauses still participate.
1792        let mut r = MemoryResolver::new();
1793        r.add(
1794            "math.vrf",
1795            "DOMAIN math\nPREMISE e:\n    EXCLUSIVE\n        x a\n        x b\n",
1796        );
1797        r.add("physics.vrf", "DOMAIN physics\nIMPORT \"math.vrf\"\n");
1798        r.add("main.vrf", "DOMAIN main\nIMPORT \"physics.vrf\"\n");
1799        let c = compile("main.vrf", &r).unwrap();
1800        assert_eq!(c.clauses.len(), 1); // math's clause loaded transitively
1801        assert!(c.clauses.iter().all(|cl| cl.origin.source == "math.vrf"));
1802    }
1803
1804    #[test]
1805    fn missing_domain_errors() {
1806        let err = compile_source("nodomain.vrf", "FACT x a\n").unwrap_err();
1807        assert!(matches!(err, CompileError::MissingDomain { .. }));
1808    }
1809
1810    #[test]
1811    fn duplicate_domain_errors() {
1812        let err = compile_source("dup.vrf", "DOMAIN a\nDOMAIN b\nFACT x a\n").unwrap_err();
1813        assert!(matches!(err, CompileError::DuplicateDomain { .. }));
1814    }
1815
1816    #[test]
1817    fn alias_clash_when_one_local_name_binds_two_domains() {
1818        // The same local alias `x` bound to two genuinely different domains is a
1819        // clash: disambiguate with distinct aliases.
1820        let mut r = MemoryResolver::new();
1821        r.add("a.vrf", "DOMAIN physics\nFACT Motor over_100\n");
1822        r.add("b.vrf", "DOMAIN chemistry\nFACT atom reacts\n");
1823        r.add(
1824            "main.vrf",
1825            "DOMAIN main\nIMPORT \"a.vrf\" AS x\nIMPORT \"b.vrf\" AS x\n",
1826        );
1827        let err = compile("main.vrf", &r).unwrap_err();
1828        assert!(matches!(err, CompileError::DomainAliasClash { .. }));
1829    }
1830
1831    #[test]
1832    fn two_files_with_the_same_domain_name_merge() {
1833        // Nominal domains: two files both declaring DOMAIN physics share it (the
1834        // value of importing a premise library is exactly this unification).
1835        let mut r = MemoryResolver::new();
1836        r.add("a.vrf", "DOMAIN physics\nFACT Motor over_100\n");
1837        r.add(
1838            "main.vrf",
1839            "DOMAIN physics\nIMPORT \"a.vrf\"\nFACT Motor over_200\n",
1840        );
1841        let c = compile("main.vrf", &r).unwrap();
1842        // Both motors live in the single shared `physics` domain.
1843        assert!(c.atoms.iter().all(|a| a.domain == "physics"));
1844        assert_eq!(c.atoms.len(), 2);
1845    }
1846
1847    #[test]
1848    fn diamond_import_is_deduped() {
1849        // main → a, c ; a → base ; c → base. base merged once.
1850        let mut r = MemoryResolver::new();
1851        r.add(
1852            "base.vrf",
1853            r#"
1854        DOMAIN base
1855        PREMISE b:
1856            EXCLUSIVE
1857                x a
1858                x b
1859        "#,
1860        );
1861        r.add("a.vrf", "DOMAIN a\nIMPORT \"base.vrf\"\n");
1862        r.add("c.vrf", "DOMAIN c\nIMPORT \"base.vrf\"\n");
1863        r.add(
1864            "main.vrf",
1865            "DOMAIN main\nIMPORT \"a.vrf\"\nIMPORT \"c.vrf\"\n",
1866        );
1867        let c = compile("main.vrf", &r).unwrap();
1868        assert_eq!(c.clauses.len(), 1); // base's single clause, not two
1869    }
1870
1871    #[test]
1872    fn circular_import_errors() {
1873        let mut r = MemoryResolver::new();
1874        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\n");
1875        r.add("b.vrf", "DOMAIN b\nIMPORT \"a.vrf\"\n");
1876        let err = compile("a.vrf", &r).unwrap_err();
1877        assert!(matches!(err, CompileError::CircularImport(_)));
1878    }
1879
1880    #[test]
1881    fn three_node_cycle_errors() {
1882        // a → b → c → a. The back-edge to the on-path ancestor is detected.
1883        let mut r = MemoryResolver::new();
1884        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\n");
1885        r.add("b.vrf", "DOMAIN b\nIMPORT \"c.vrf\"\n");
1886        r.add("c.vrf", "DOMAIN c\nIMPORT \"a.vrf\"\n");
1887        let err = compile("a.vrf", &r).unwrap_err();
1888        assert!(matches!(err, CompileError::CircularImport(_)));
1889    }
1890
1891    #[test]
1892    fn shared_grandchild_diamond_loads_once() {
1893        // The user's case: a imports B and C; C ALSO imports B. B must be compiled
1894        // exactly once (its single clause is not duplicated by the two paths to it).
1895        let mut r = MemoryResolver::new();
1896        r.add(
1897            "b.vrf",
1898            "DOMAIN b\nPREMISE e:\n    EXCLUSIVE\n        x a\n        x b\n",
1899        );
1900        r.add("c.vrf", "DOMAIN c\nIMPORT \"b.vrf\"\n");
1901        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\nIMPORT \"c.vrf\"\n");
1902        let c = compile("a.vrf", &r).unwrap();
1903        assert_eq!(
1904            c.clauses.len(),
1905            1,
1906            "b.vrf's clause must appear exactly once"
1907        );
1908    }
1909
1910    #[test]
1911    fn exponential_fan_out_is_memoized_not_blown_up() {
1912        // f_k imports f_{k-1} TWICE. Without content-hash memoization the visit
1913        // count is 2^k (2^40 ≈ a trillion); with it, the work is linear, so this
1914        // finishes instantly. A guard against any combinatorial blow-up / DoS.
1915        let mut r = MemoryResolver::new();
1916        r.add("f0.vrf", "DOMAIN d0\nFACT x a\n");
1917        let n = 40;
1918        for k in 1..=n {
1919            r.add(
1920                &alloc::format!("f{k}.vrf"),
1921                &alloc::format!(
1922                    "DOMAIN d{k}\nIMPORT \"f{}.vrf\"\nIMPORT \"f{}.vrf\"\n",
1923                    k - 1,
1924                    k - 1
1925                ),
1926            );
1927        }
1928        let c = compile(&alloc::format!("f{n}.vrf"), &r).unwrap();
1929        assert_eq!(c.facts.len(), 1); // the single fact from f0, reached once
1930    }
1931
1932    #[test]
1933    fn very_deep_linear_chain_does_not_overflow() {
1934        // A long non-cyclic chain. Resolution is iterative (explicit work stack),
1935        // so a depth that would overflow a recursive loader compiles cleanly.
1936        let mut r = MemoryResolver::new();
1937        r.add("f0.vrf", "DOMAIN d0\nFACT x a\n");
1938        let n = 10_000;
1939        for k in 1..=n {
1940            r.add(
1941                &alloc::format!("f{k}.vrf"),
1942                &alloc::format!("DOMAIN d{k}\nIMPORT \"f{}.vrf\"\n", k - 1),
1943            );
1944        }
1945        let c = compile(&alloc::format!("f{n}.vrf"), &r).unwrap();
1946        assert_eq!(c.facts.len(), 1);
1947    }
1948
1949    #[test]
1950    fn missing_import_errors() {
1951        let mut r = MemoryResolver::new();
1952        r.add("main.vrf", "DOMAIN main\nIMPORT \"ghost.vrf\"\n");
1953        let err = compile("main.vrf", &r).unwrap_err();
1954        assert!(matches!(err, CompileError::ImportNotFound(_)));
1955    }
1956
1957    #[test]
1958    fn unused_import_is_flagged() {
1959        // main imports physics but never writes a `physics.` atom → unused.
1960        let mut r = MemoryResolver::new();
1961        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
1962        r.add(
1963            "main.vrf",
1964            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT x a\n",
1965        );
1966        let c = compile("main.vrf", &r).unwrap();
1967        assert_eq!(c.unused_imports.len(), 1);
1968        assert_eq!(c.unused_imports[0].domain, "physics");
1969        assert_eq!(c.unused_imports[0].file, "main.vrf");
1970        assert_eq!(c.unused_imports[0].alias, None);
1971    }
1972
1973    #[test]
1974    fn referenced_import_is_not_unused() {
1975        // The same import, but now a `physics.` atom uses it → not flagged.
1976        let mut r = MemoryResolver::new();
1977        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
1978        r.add(
1979            "main.vrf",
1980            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT physics.Motor over_200\n",
1981        );
1982        let c = compile("main.vrf", &r).unwrap();
1983        assert!(c.unused_imports.is_empty(), "{:?}", c.unused_imports);
1984    }
1985
1986    #[test]
1987    fn unused_import_records_its_alias() {
1988        let mut r = MemoryResolver::new();
1989        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
1990        r.add(
1991            "main.vrf",
1992            "DOMAIN main\nIMPORT \"physics.vrf\" AS phys\nFACT x a\n",
1993        );
1994        let c = compile("main.vrf", &r).unwrap();
1995        assert_eq!(c.unused_imports.len(), 1);
1996        assert_eq!(c.unused_imports[0].alias.as_deref(), Some("phys"));
1997    }
1998
1999    #[test]
2000    fn import_referenced_only_inside_a_premise_is_used() {
2001        // The reference can be anywhere — here inside a premise body, not a fact.
2002        let mut r = MemoryResolver::new();
2003        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2004        r.add(
2005            "main.vrf",
2006            "DOMAIN main\nIMPORT \"physics.vrf\"\nPREMISE p:\n    WHEN physics.Motor over_100\n    THEN x ok\n",
2007        );
2008        let c = compile("main.vrf", &r).unwrap();
2009        assert!(c.unused_imports.is_empty(), "{:?}", c.unused_imports);
2010    }
2011
2012    #[test]
2013    fn same_premise_name_across_files_coexists() {
2014        // Two files may legitimately reuse a premise NAME with different bodies.
2015        // Names are per-source labels — both premises apply, qualified by source.
2016        // NOT a redefinition error. (Atoms stay apart too: different domains.)
2017        let mut r = MemoryResolver::new();
2018        r.add(
2019            "physics.vrf",
2020            r#"
2021        DOMAIN physics
2022        PREMISE safety:
2023            EXCLUSIVE
2024                x a
2025                x b
2026        "#,
2027        );
2028        r.add(
2029            "main.vrf",
2030            r#"
2031        DOMAIN main
2032        IMPORT "physics.vrf"
2033        PREMISE safety:
2034            EXCLUSIVE
2035                x a
2036                x c
2037        "#,
2038        );
2039        let c = compile("main.vrf", &r).unwrap();
2040        assert_eq!(c.clauses.len(), 2); // a-b from physics, a-c from main
2041        assert!(c.clauses.iter().any(|cl| cl.origin.source == "physics.vrf"));
2042        assert!(c.clauses.iter().any(|cl| cl.origin.source == "main.vrf"));
2043    }
2044
2045    #[test]
2046    fn redefinition_within_one_source_still_errors() {
2047        // But reusing a name with a different body *inside one source* is a mistake.
2048        let src = r#"
2049        DOMAIN m
2050        PREMISE e:
2051            EXCLUSIVE
2052                x a
2053                x b
2054        PREMISE e:
2055            EXCLUSIVE
2056                x a
2057                x c
2058        "#;
2059        let err = compile_source("main.vrf", src).unwrap_err();
2060        assert_eq!(
2061            err,
2062            CompileError::PremiseRedefinition {
2063                name: "e".to_string()
2064            }
2065        );
2066    }
2067
2068    #[test]
2069    fn import_demo_examples_resolve() {
2070        let mut r = MemoryResolver::new();
2071        r.add(
2072            "physics.vrf",
2073            include_str!("../../../docs/examples/physics.vrf"),
2074        );
2075        r.add(
2076            "import-demo.vrf",
2077            include_str!("../../../docs/examples/import-demo.vrf"),
2078        );
2079        let c = compile("import-demo.vrf", &r).unwrap();
2080        assert!(c.pending_imports.is_empty());
2081        // physics.vrf: one_path (EXCLUSIVE, 1 clause) + speed_order (impl, 1 clause)
2082        assert_eq!(c.clauses.len(), 2);
2083        // The qualified facts (`physics.Motor …`) share ids with the imported premise.
2084        let over_100 = id(&c, &key_in("physics", "Motor", "over_100", None));
2085        assert!(c.facts.iter().any(|f| f.atom == over_100));
2086        assert!(
2087            c.clauses
2088                .iter()
2089                .any(|cl| cl.lits.iter().any(|l| l.atom == over_100))
2090        );
2091    }
2092
2093    #[test]
2094    fn creature_example_compiles() {
2095        let src = include_str!("../../../docs/examples/creature.vrf");
2096        let c = compile_source("creature.vrf", src).unwrap();
2097        assert_eq!(c.facts.len(), 2); // flying, warm_blood
2098        assert_eq!(c.rules.len(), 1); // needs_oxygen
2099        assert_eq!(c.checks.len(), 1);
2100        // fly_xor_swim (1) + wings_need_bone (THEN wing AND bone → 2) + no_dual_temp (1) = 4
2101        assert_eq!(c.clauses.len(), 4);
2102        assert_eq!(c.atoms.len(), 7);
2103    }
2104
2105    #[test]
2106    fn forbids_unfolds_pairwise() {
2107        let src = r#"
2108        PREMISE f:
2109            FORBIDS
2110                x a
2111                x b
2112                x c
2113        "#;
2114        let c = cs(src).unwrap();
2115        assert_eq!(c.clauses.len(), 3); // C(3,2), like EXCLUSIVE
2116        assert!(
2117            c.clauses
2118                .iter()
2119                .all(|cl| cl.lits.len() == 2 && cl.lits.iter().all(|l| !l.negated))
2120        );
2121    }
2122
2123    #[test]
2124    fn rule_with_multiple_consequents() {
2125        let src = r#"
2126        RULE r:
2127            WHEN x a
2128            THEN x b
2129            AND  x c
2130        "#;
2131        let c = cs(src).unwrap();
2132        assert_eq!(c.rules.len(), 1);
2133        assert_eq!(c.rules[0].consequent.len(), 2);
2134    }
2135
2136    #[test]
2137    fn negated_antecedent_literal_keeps_polarity() {
2138        // WHEN NOT x a THEN x b  ==  Impossible([NOT x a, NOT x b])
2139        let src = r#"
2140        PREMISE a:
2141            WHEN NOT x a
2142            THEN x b
2143        "#;
2144        let c = cs(src).unwrap();
2145        let xa = id(&c, &key("x", "a", None));
2146        assert!(c.clauses[0].lits.contains(&Lit {
2147            atom: xa,
2148            negated: true
2149        }));
2150    }
2151
2152    #[test]
2153    fn rule_keeps_consequent_negation() {
2154        let src = r#"
2155        RULE r:
2156            WHEN x a
2157            THEN NOT x b
2158        "#;
2159        let c = cs(src).unwrap();
2160        assert!(c.rules[0].consequent[0].negated);
2161    }
2162
2163    #[test]
2164    fn compilation_is_deterministic() {
2165        let src = r#"
2166        PREMISE e:
2167            EXCLUSIVE
2168                z z
2169                a a
2170                m m
2171        FACT q q
2172        "#;
2173        assert_eq!(cs(src).unwrap(), cs(src).unwrap());
2174    }
2175
2176    #[test]
2177    fn empty_program_compiles_to_empty_ir() {
2178        let c = cs("// nothing here\n").unwrap();
2179        assert!(c.atoms.is_empty() && c.clauses.is_empty() && c.facts.is_empty());
2180    }
2181
2182    #[test]
2183    fn same_clause_from_two_named_premises_is_deduped() {
2184        // Different names, identical logical content → one clause, no redefinition.
2185        let src = r#"
2186        PREMISE e1:
2187            EXCLUSIVE
2188                x a
2189                x b
2190        PREMISE e2:
2191            EXCLUSIVE
2192                x a
2193                x b
2194        "#;
2195        let c = cs(src).unwrap();
2196        assert_eq!(c.clauses.len(), 1);
2197    }
2198
2199    #[test]
2200    fn object_distinguishes_atom_identity() {
2201        // `x p a` and `x p b` differ only by object → two distinct atoms.
2202        let c = cs("FACT x p a\nFACT x p b\n").unwrap();
2203        assert_eq!(c.atoms.len(), 2);
2204    }
2205
2206    // --- closed-world: ONEOF closes its variable's value set -----------------
2207
2208    /// A `ONEOF` body declaring three values of `resolved is …`.
2209    const ONEOF_RESOLVED: &str = "PREMISE pick:\n    ONEOF\n        resolved is censored\n        resolved is censored_mtp\n        resolved is uncensored\n";
2210
2211    #[test]
2212    fn value_outside_oneof_is_rejected() {
2213        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censoredmtp\n");
2214        let err = cs(&src).unwrap_err();
2215        let CompileError::UnknownValue(e) = err else {
2216            panic!("expected UnknownValue, got {err:?}");
2217        };
2218        assert_eq!(e.value, "censoredmtp");
2219        assert_eq!(e.subject, "resolved");
2220        assert_eq!(e.predicate, "is");
2221        assert_eq!(e.declared, "censored, censored_mtp, uncensored");
2222    }
2223
2224    #[test]
2225    fn near_miss_value_suggests_the_intended_one() {
2226        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censoredmtp\n");
2227        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
2228            panic!("expected UnknownValue");
2229        };
2230        assert_eq!(e.suggestion, " — did you mean `censored_mtp`?");
2231    }
2232
2233    #[test]
2234    fn far_off_value_offers_no_suggestion() {
2235        // `wildly_different` is past the edit-distance budget of every declared
2236        // value, so we reject it but do not guess.
2237        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is wildly_different\n");
2238        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
2239            panic!("expected UnknownValue");
2240        };
2241        assert_eq!(e.suggestion, "");
2242    }
2243
2244    #[test]
2245    fn declared_value_compiles_cleanly() {
2246        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censored_mtp\n");
2247        assert!(cs(&src).is_ok());
2248    }
2249
2250    #[test]
2251    fn oneof_declared_after_the_reference_still_catches_it() {
2252        // The check runs once every source is accumulated, so order is irrelevant.
2253        let src = alloc::format!("FACT resolved is censoredmtp\n{ONEOF_RESOLVED}");
2254        assert!(matches!(
2255            cs(&src).unwrap_err(),
2256            CompileError::UnknownValue(_)
2257        ));
2258    }
2259
2260    #[test]
2261    fn out_of_set_value_inside_a_premise_is_caught() {
2262        // Closed-world covers references anywhere — not just FACTs.
2263        let src = alloc::format!(
2264            "{ONEOF_RESOLVED}PREMISE p:\n    WHEN resolved is censoredmtp\n    THEN x done\n"
2265        );
2266        assert!(matches!(
2267            cs(&src).unwrap_err(),
2268            CompileError::UnknownValue(_)
2269        ));
2270    }
2271
2272    #[test]
2273    fn out_of_set_value_inside_a_rule_is_caught() {
2274        let src = alloc::format!(
2275            "{ONEOF_RESOLVED}RULE r:\n    WHEN x go\n    THEN resolved is censoredmtp\n"
2276        );
2277        assert!(matches!(
2278            cs(&src).unwrap_err(),
2279            CompileError::UnknownValue(_)
2280        ));
2281    }
2282
2283    #[test]
2284    fn binary_atoms_in_a_oneof_do_not_close_anything() {
2285        // `alice cooks` / `alice cleans` have no object slot, so there is no value
2286        // set to violate — a later `alice bakes` is just another atom, not an error.
2287        let src = "PREMISE chores:\n    ONEOF\n        alice cooks\n        alice cleans\nFACT alice bakes\n";
2288        assert!(cs(src).is_ok());
2289    }
2290
2291    #[test]
2292    fn a_subject_without_a_oneof_stays_open() {
2293        // No ONEOF over `mood is …` → open world, any value is a fresh atom.
2294        let src = alloc::format!("{ONEOF_RESOLVED}FACT mood is anything_goes\n");
2295        assert!(cs(&src).is_ok());
2296    }
2297
2298    #[test]
2299    fn two_oneofs_union_their_declared_values() {
2300        // A value declared by either ONEOF for the same variable is legal.
2301        let src = "PREMISE a:\n    ONEOF\n        v is one\n        v is two\nPREMISE b:\n    ONEOF\n        v is two\n        v is three\nFACT v is three\n";
2302        assert!(cs(src).is_ok());
2303    }
2304
2305    #[test]
2306    fn earliest_offender_is_reported() {
2307        // Two violations; the diagnostic points at the first by line.
2308        let src = alloc::format!(
2309            "{ONEOF_RESOLVED}FACT resolved is firstbad\nFACT resolved is secondbad\n"
2310        );
2311        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
2312            panic!("expected UnknownValue");
2313        };
2314        assert_eq!(e.value, "firstbad");
2315    }
2316
2317    #[test]
2318    fn closed_world_spans_imported_domains() {
2319        // physics closes `Motor speed …`; main, referencing it via the prefix with
2320        // a typo, is rejected — the value set is shared across the domain boundary.
2321        let mut r = MemoryResolver::new();
2322        r.add(
2323            "physics.vrf",
2324            "DOMAIN physics\nPREMISE g:\n    ONEOF\n        Motor speed slow\n        Motor speed fast\n",
2325        );
2326        r.add(
2327            "main.vrf",
2328            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT physics.Motor speed faast\n",
2329        );
2330        let CompileError::UnknownValue(e) = compile("main.vrf", &r).unwrap_err() else {
2331            panic!("expected UnknownValue");
2332        };
2333        assert_eq!(e.value, "faast");
2334        assert_eq!(e.suggestion, " — did you mean `fast`?");
2335    }
2336
2337    #[test]
2338    fn same_value_in_a_different_domain_does_not_clash() {
2339        // `state is open` is closed in domain a; domain b's own `state is shut`
2340        // (never declared in a) is fine — value sets are per-domain.
2341        let mut r = MemoryResolver::new();
2342        r.add(
2343            "a.vrf",
2344            "DOMAIN a\nPREMISE s:\n    ONEOF\n        state is open\n        state is closed\n",
2345        );
2346        r.add("b.vrf", "DOMAIN b\nIMPORT \"a.vrf\"\nFACT state is shut\n");
2347        // `state is shut` is in domain b, which has no ONEOF → open, so it compiles.
2348        assert!(compile("b.vrf", &r).is_ok());
2349    }
2350
2351    #[test]
2352    fn levenshtein_basics() {
2353        assert_eq!(levenshtein("", ""), 0);
2354        assert_eq!(levenshtein("abc", "abc"), 0);
2355        assert_eq!(levenshtein("censoredmtp", "censored_mtp"), 1);
2356        assert_eq!(levenshtein("norml", "normal"), 1);
2357        assert_eq!(levenshtein("kitten", "sitting"), 3);
2358    }
2359
2360    #[test]
2361    fn nearest_respects_the_length_budget() {
2362        let cands = ["censored", "censored_mtp", "uncensored"];
2363        assert_eq!(nearest("censoredmtp", &cands), Some("censored_mtp"));
2364        // "zzz" is far from all; no suggestion.
2365        assert_eq!(nearest("zzz", &cands), None);
2366    }
2367
2368    #[test]
2369    fn nearest_offers_nothing_for_very_short_values() {
2370        // 1–2 character values get a budget of 0: every other short token is at
2371        // distance 1, so a "did you mean" carries no signal. True for single CJK
2372        // characters (one symbol = a whole word) and for two-letter codes alike.
2373        assert_eq!(nearest("七", &["一", "二", "三"]), None);
2374        assert_eq!(nearest("us", &["uk", "eu", "jp"]), None);
2375        // A multi-character CJK word still gets a sensible nearest (one wrong
2376        // character = distance 1, budget = 3/3 = 1).
2377        assert_eq!(nearest("中文字", &["中文学", "日本語"]), Some("中文学"));
2378    }
2379
2380    #[test]
2381    fn short_value_is_still_rejected_just_without_a_guess() {
2382        // The closed-world error does not depend on the suggestion: an out-of-set
2383        // single-character value is rejected exactly, only the `did you mean` is
2384        // suppressed.
2385        let src =
2386            "PREMISE pick:\n    ONEOF\n        roll is 一\n        roll is 二\nFACT roll is 七\n";
2387        let CompileError::UnknownValue(e) = cs(src).unwrap_err() else {
2388            panic!("expected UnknownValue");
2389        };
2390        assert_eq!(e.value, "七");
2391        assert_eq!(e.suggestion, "");
2392    }
2393}