elenchus-compiler 0.7.0

//! elenchus-compiler — compiles parsed elenchus DSL into a canonical clause IR.
//!
//! This crate is **preparation, not solving**. It takes the AST (from
//! `elenchus-parser`) and produces a deterministic, solver-ready intermediate
//! representation:
//!
//! - **atom interner**: `(subject, predicate, object?)` → dense `u32` id,
//!   canonically sorted so ids (and any later enumeration) are deterministic;
//! - **desugaring**: surface CAPS sugar → `Impossible` clauses
//!   (`EXCLUSIVE` pairwise, `WHEN…THEN` → `Impossible([A, …, NOT C])`, etc.);
//! - **content-addressing** (sha256, mirroring vsm-guard's CAS): identical
//!   clauses are deduped (idempotent — `P ∧ P ≡ P`), and a named construct
//!   redefined with a different body is a `PremiseRedefinition` error.
//!
//! The actual reasoning (3-valued forward chaining, SAT, all-SAT, the WARNING
//! pool, the four results) lives in `elenchus-solver`. `IMPORT` resolution is a
//! source-agnostic [`Resolver`] that flat-merges another source into the shared
//! atom universe ([`compile`] resolves imports; [`compile_source`] leaves them
//! pending).
//!
//! # Example
//!
//! ```
//! use elenchus_compiler::compile_source;
//!
//! // `ASSUME` lowers to a *soft* fact: the same atom universe as a `FACT`, but
//! // one the solver may retract. Here `x a` is asserted both ways (hard + soft).
//! let ir = compile_source("demo.vrf", "DOMAIN d\nFACT x a\nASSUME NOT x a\nCHECK x\n").unwrap();
//! assert_eq!(ir.facts.len(), 2);
//! assert!(ir.facts.iter().any(|f| f.soft)); // the ASSUME is the soft one
//! ```
#![no_std]
// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
#![warn(missing_docs)]

extern crate alloc;

#[cfg(feature = "std")]
extern crate std;

use alloc::collections::{BTreeMap, BTreeSet};
use alloc::string::{String, ToString};
use alloc::vec;
use alloc::vec::Vec;
use core::fmt::Write as _;

/// Re-exported so downstream crates can name the syntax diagnostics carried by
/// [`CompileError::Parse`] (and render them with a custom error limit).
pub use elenchus_parser::Diagnostics;
use elenchus_parser::{Atom, Body, Conn, ListOp, Literal, Statement, kw};
use sha2::{Digest, Sha256};
use thiserror::Error;

// --- content-addressing (mirrors vsm-guard::hashing) -----------------------

/// SHA-256 content addressing. Used only for dedup / redefinition / provenance,
/// never for namespacing atoms.
pub fn hash_hex(data: &[u8]) -> String {
    let mut hasher = Sha256::new();
    hasher.update(data);
    let out = hasher.finalize();
    let mut s = String::with_capacity(out.len() * 2);
    for b in out {
        let _ = write!(s, "{:02x}", b);
    }
    s
}

// --- IR types --------------------------------------------------------------

/// Dense atom identifier (also the SAT variable number).
pub type AtomId = u32;

/// The identity of an atom: the `domain` plus the triple
/// `(subject, predicate, object?)`, owned so it survives across merged sources.
/// The domain is the leading sort key, so atoms group by domain; ordering is
/// otherwise lexicographic → canonical. Two atoms with the same triple in
/// *different* domains are distinct (no cross-domain unification).
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct AtomKey {
    /// The domain this atom belongs to (the resolved namespace, never a raw
    /// alias). `physics.engine` and `plan.engine` are different atoms.
    pub domain: String,
    /// The entity the claim is about (owned copy of the parser's `subject`).
    pub subject: String,
    /// The relation or property asserted.
    pub predicate: String,
    /// Optional object; part of identity, so `has flying` ≠ `has swimming`.
    pub object: Option<String>,
}

/// The domain context of one file being compiled: its own declared domain (where
/// bare atoms fall) and the local names — aliases or imported domain names — it
/// may reference other domains by. Resolving an atom's optional `domain.` prefix
/// against this context yields its canonical [`AtomKey`] domain.
struct DomainCtx {
    /// The file's own declared domain (the target for unqualified atoms).
    current: String,
    /// `local name -> canonical domain` for every name visible in this file
    /// (always includes `current -> current`, plus one entry per `IMPORT`).
    aliases: BTreeMap<String, String>,
}

impl DomainCtx {
    /// Resolve an atom's optional `domain.` prefix to a canonical domain name.
    /// `None` → the file's own domain; a prefix not imported here is an error.
    fn resolve(&self, prefix: Option<&str>) -> Result<String, CompileError> {
        match prefix {
            None => Ok(self.current.clone()),
            Some(p) => self
                .aliases
                .get(p)
                .cloned()
                .ok_or_else(|| CompileError::UnknownDomain {
                    domain: p.to_string(),
                }),
        }
    }

    /// Build the owned [`AtomKey`] for a borrowed parser [`Atom`], resolving its
    /// domain prefix against this file's context.
    fn key(&self, a: &Atom) -> Result<AtomKey, CompileError> {
        Ok(AtomKey {
            domain: self.resolve(a.domain)?,
            subject: a.subject.to_string(),
            predicate: a.predicate.to_string(),
            object: a.object.map(|o| o.to_string()),
        })
    }
}

/// A literal as it appears *inside* an `Impossible` clause: an atom, optionally
/// negated. `negated = true` means the literal is `NOT atom`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Lit {
    /// Interned id of the atom (also its SAT variable number).
    pub atom: AtomId,
    /// `true` means this literal is `NOT atom` inside the clause.
    pub negated: bool,
}

/// A confident truth value. UNKNOWN is the *absence* of a fact, never stored.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Value {
    /// The atom is asserted TRUE (from `FACT`).
    True,
    /// The atom is asserted FALSE (from `NOT`).
    False,
}

/// Where a piece of IR came from — for readable conflict/warning pools.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Origin {
    /// The source label this came from (file name or `"<root>"`/`"<text>"`).
    pub source: String,
    /// 1-based line number of the originating statement.
    pub line: u32,
    /// The premise/rule name, if it came from a named construct.
    pub premise: Option<String>,
    /// Surface kind for the report, e.g. `"FACT"`, `"EXCLUSIVE"`, `"PREMISE"`.
    pub kind: &'static str,
}

/// A confident fact (from `FACT` / `NOT`). Conflicting facts on the same atom
/// are preserved (both kept) — the solver reports that as a CONFLICT.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Fact {
    /// The atom this fact pins down.
    pub atom: AtomId,
    /// The asserted truth value.
    pub value: Value,
    /// Where it came from (for the report).
    pub origin: Origin,
    /// `true` for an `ASSUME` (a *soft*, retractable hypothesis). A soft fact
    /// behaves like a normal fact in the forward pass, but when the assumptions
    /// cannot all hold the solver may drop it (and only it) to explain the
    /// contradiction — a `FACT`/`NOT` is never retractable.
    pub soft: bool,
}

/// An `Impossible` clause: the listed literals cannot all hold simultaneously.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Clause {
    /// The literals that cannot all hold at once (an `Impossible([...])`).
    pub lits: Vec<Lit>,
    /// Where it came from (for the report).
    pub origin: Origin,
}

/// A forward-chaining rule (from `RULE`): if all antecedent literals hold, derive
/// the consequent literals.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Rule {
    /// Literals that must all hold for the rule to fire.
    pub antecedent: Vec<Lit>,
    /// Literals derived (asserted) when the antecedent holds.
    pub consequent: Vec<Lit>,
    /// Where it came from (for the report).
    pub origin: Origin,
}

/// A `CHECK` query.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Check {
    /// Restrict the report to this subject; `None` means check everything.
    pub subject: Option<String>,
    /// `true` runs the backward (all-SAT) pass to detect UNDERDETERMINED.
    pub bidirectional: bool,
}

/// The compiled IR: the solver's input.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Compiled {
    /// Indexed by [`AtomId`]; canonically sorted.
    pub atoms: Vec<AtomKey>,
    /// Confident assertions from `FACT`/`NOT`.
    pub facts: Vec<Fact>,
    /// `Impossible` clauses (desugared premises + the built-in non-contradiction).
    pub clauses: Vec<Clause>,
    /// Forward-chaining rules from `RULE`.
    pub rules: Vec<Rule>,
    /// `CHECK` queries.
    pub checks: Vec<Check>,
    /// Imports seen but not yet resolved (only populated by [`compile_source`];
    /// [`compile`] resolves them, leaving this empty).
    pub pending_imports: Vec<String>,
    /// Advisory: imports that a file makes but never references (no `domain.atom`
    /// from that file uses the imported domain). Structural, per-file, and inert —
    /// it never affects the solve. Only populated by [`compile`] (an unresolved
    /// import in [`compile_source`] cannot be classified). See [`UnusedImport`].
    pub unused_imports: Vec<UnusedImport>,
}

/// An advisory record: a file `IMPORT`s a domain it never references. Such an
/// import is inert — no `domain.atom` in that file mentions it, so removing it
/// would not change the result. It is almost always a leftover or a forgotten
/// `domain.` prefix. **Purely informational** — it never changes the verdict.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct UnusedImport {
    /// The source that declared the unused `IMPORT`.
    pub file: String,
    /// The imported domain that is never referenced from `file`.
    pub domain: String,
    /// The local alias, if the import used `AS <alias>`.
    pub alias: Option<String>,
    /// 1-based line of the `IMPORT` statement in `file`.
    pub line: u32,
}

/// Anything that can go wrong while compiling (and resolving imports).
#[derive(Debug, Error, PartialEq, Eq)]
pub enum CompileError {
    /// A source failed to parse; carries the full syntax diagnostics (every
    /// error, each as a caret block with the keyword's correct syntax). The
    /// source label is already inside the [`Diagnostics`] header.
    #[error("{0}")]
    Parse(elenchus_parser::Diagnostics),
    /// A name was reused with a different body *within the same source*.
    #[error("'{name}' redefined with a different body")]
    PremiseRedefinition {
        /// The clashing premise/rule name.
        name: String,
    },
    /// A source did not declare its `DOMAIN` (required, once, as the first
    /// statement).
    #[error("{file}: missing a DOMAIN declaration (every file must start with `DOMAIN <name>`)")]
    MissingDomain {
        /// The source label that lacked a `DOMAIN`.
        file: String,
    },
    /// A source declared `DOMAIN` more than once (a file has exactly one domain).
    #[error("{file}: more than one DOMAIN declaration (a file has exactly one domain)")]
    DuplicateDomain {
        /// The source label with the duplicate `DOMAIN`.
        file: String,
    },
    /// An atom referenced a `domain.` prefix that is not the file's own domain and
    /// was not imported in this file.
    #[error("unknown domain '{domain}' — declare it with DOMAIN, or IMPORT it in this file")]
    UnknownDomain {
        /// The unresolved domain prefix.
        domain: String,
    },
    /// Two imports bound the same local domain name to different domains (use a
    /// distinct `AS <alias>` to tell them apart).
    #[error("domain name '{alias}' is bound to two different imports (disambiguate with AS)")]
    DomainAliasClash {
        /// The clashing local domain name.
        alias: String,
    },
    /// An `IMPORT` target could not be loaded by the [`Resolver`].
    #[error("import not found: {0}")]
    ImportNotFound(String),
    /// Imports form a cycle (a source transitively imports itself).
    #[error("circular import: {0}")]
    CircularImport(String),
    /// A `RULE` used `OR` in its `THEN`: forward chaining cannot derive a
    /// disjunction (it would not know which literal to assert). Model it as a
    /// `PREMISE` constraint instead.
    #[error("rule '{name}' cannot derive a disjunction (OR in THEN); use a PREMISE instead")]
    RuleDisjunctiveConsequent {
        /// The offending rule name.
        name: String,
    },
}

// --- raw (key-based) intermediate, before interning ------------------------
// While accumulating we key everything by `AtomKey` (the owned triple) rather
// than by `AtomId`, because ids only become stable once *all* sources are merged
// and the atom set is sorted in `finalize`. These mirror the public IR types but
// hold keys instead of ids.

/// A literal keyed by atom identity (pre-interning counterpart of [`Lit`]).
#[derive(Clone)]
struct RawLit {
    key: AtomKey,
    negated: bool,
}

/// A fact keyed by atom identity (pre-interning counterpart of [`Fact`]).
struct RawFact {
    key: AtomKey,
    value: Value,
    origin: Origin,
    soft: bool,
}

/// A clause keyed by atom identity (pre-interning counterpart of [`Clause`]).
struct RawClause {
    lits: Vec<RawLit>,
    origin: Origin,
}

/// A rule keyed by atom identity (pre-interning counterpart of [`Rule`]).
struct RawRule {
    antecedent: Vec<RawLit>,
    consequent: Vec<RawLit>,
    origin: Origin,
}

// --- compiler --------------------------------------------------------------

/// Accumulates statements from one or more sources, then interns + emits the IR.
#[derive(Default)]
pub struct Compiler {
    keys: BTreeSet<AtomKey>,
    facts: Vec<RawFact>,
    clauses: Vec<RawClause>,
    rules: Vec<RawRule>,
    checks: Vec<Check>,
    pending_imports: Vec<String>,
    /// (source, name) → content hash of its body, for redefinition detection.
    /// Scoped per source: premise/rule names are labels, not global identifiers,
    /// so different files (domains) may reuse a name. A clash is only an error
    /// *within the same source*.
    defined: BTreeMap<(String, String), String>,
    /// dedup of identical clauses by canonical content hash.
    clause_sigs: BTreeSet<String>,
    /// dedup of identical facts by (key, value).
    fact_sigs: BTreeSet<String>,
}

impl Compiler {
    /// A fresh, empty compiler.
    pub fn new() -> Self {
        Self::default()
    }

    /// Parse one source and accumulate its statements. `source` is a label used
    /// in provenance (e.g. a file name or `"<root>"`). The source must declare its
    /// `DOMAIN`; `IMPORT`s are recorded as pending (their domains cannot be bound
    /// without a [`Resolver`]), so a single source may only reference its own
    /// domain. Use [`compile`] for cross-domain references.
    pub fn add_source(&mut self, source: &str, src: &str) -> Result<(), CompileError> {
        let program = elenchus_parser::parse(src).map_err(|mut diag| {
            diag.set_file(source);
            CompileError::Parse(diag)
        })?;
        let domain = extract_domain(&program, source)?;
        let mut aliases = BTreeMap::new();
        aliases.insert(domain.clone(), domain.clone());
        let ctx = DomainCtx {
            current: domain,
            aliases,
        };
        for stmt in &program.statements {
            match stmt {
                Statement::Domain(_) => {}
                Statement::Import { path, .. } => {
                    self.pending_imports.push(path.data.to_string());
                }
                other => self.add_statement(source, other, &ctx)?,
            }
        }
        Ok(())
    }

    /// Compile one already-resolved file's statements under its domain context.
    fn add_resolved(&mut self, file: &ResolvedFile) -> Result<(), CompileError> {
        let program = elenchus_parser::parse(&file.content).map_err(|mut diag| {
            diag.set_file(&file.path);
            CompileError::Parse(diag)
        })?;
        for stmt in &program.statements {
            match stmt {
                Statement::Import { .. } | Statement::Domain(_) => {}
                other => self.add_statement(&file.path, other, &file.ctx)?,
            }
        }
        Ok(())
    }

    /// Route one statement (never `IMPORT`/`DOMAIN` — handled by the loaders) to
    /// the right accumulator, resolving atom domains through `ctx`.
    fn add_statement(
        &mut self,
        source: &str,
        stmt: &Statement,
        ctx: &DomainCtx,
    ) -> Result<(), CompileError> {
        match stmt {
            // Handled by `add_source` / `load_recursive`, never reach here.
            Statement::Import { .. } | Statement::Domain(_) => {}
            Statement::Fact(a) => self.add_fact(source, a, Value::True, kw::FACT, false, ctx)?,
            Statement::Negation(a) => {
                self.add_fact(source, a, Value::False, kw::NOT, false, ctx)?
            }
            Statement::Assume(l) => {
                let value = if l.data.negated {
                    Value::False
                } else {
                    Value::True
                };
                // A soft assertion shares the FACT accumulator; the atom is the
                // literal's atom, the polarity its `NOT`, and `soft` marks it
                // retractable. The span is the whole `ASSUME` line.
                let located = elenchus_parser::Located {
                    data: l.data.atom.clone(),
                    span: l.span,
                };
                self.add_fact(source, &located, value, kw::ASSUME, true, ctx)?;
            }
            Statement::Check {
                subject,
                bidirectional,
            } => self.checks.push(Check {
                subject: subject.as_ref().map(|s| s.data.to_string()),
                bidirectional: *bidirectional,
            }),
            Statement::Premise { name, body } => {
                let line = name.span.location_line();
                self.add_named(source, name.data, line, body, false, ctx)?;
            }
            Statement::Rule { name, body } => {
                let line = name.span.location_line();
                self.add_named(source, name.data, line, body, true, ctx)?;
            }
        }
        Ok(())
    }

    /// Record an atom identity in the shared universe (deduped by the `BTreeSet`).
    fn intern(&mut self, key: &AtomKey) {
        if !self.keys.contains(key) {
            self.keys.insert(key.clone());
        }
    }

    /// Accumulate a `FACT`/`NOT`; exact duplicates (same key+value+kind) are
    /// dropped as idempotent, while a `FACT` and a `NOT` on the same atom are
    /// both kept so the solver can report the CONFLICT.
    fn add_fact(
        &mut self,
        source: &str,
        a: &elenchus_parser::Located<Atom>,
        value: Value,
        kind: &'static str,
        soft: bool,
        ctx: &DomainCtx,
    ) -> Result<(), CompileError> {
        let key = ctx.key(&a.data)?;
        self.intern(&key);
        let sig = alloc::format!(
            "{}|{}|{}|{}",
            key_sig(&key),
            matches!(value, Value::True) as u8,
            kind,
            "" // facts dedup ignores line; identical FACT twice is idempotent
        );
        if !self.fact_sigs.insert(sig) {
            return Ok(()); // exact duplicate fact — idempotent
        }
        self.facts.push(RawFact {
            key,
            value,
            origin: Origin {
                source: source.to_string(),
                line: a.span.location_line(),
                premise: None,
                kind,
            },
            soft,
        });
        Ok(())
    }

    /// Handle a named construct (`PREMISE` or `RULE`). `is_rule` selects derivation
    /// vs checking. Returns an error on redefinition with a different body.
    fn add_named(
        &mut self,
        source: &str,
        name: &str,
        line: u32,
        body: &Body,
        is_rule: bool,
        ctx: &DomainCtx,
    ) -> Result<(), CompileError> {
        let body_hash = hash_hex(canonical_body(name, body, is_rule, ctx)?.as_bytes());
        let key = (source.to_string(), name.to_string());
        match self.defined.get(&key) {
            Some(prev) if *prev == body_hash => return Ok(()), // identical → idempotent
            Some(_) => {
                // Same name + different body *in the same source* — a real mistake.
                return Err(CompileError::PremiseRedefinition {
                    name: name.to_string(),
                });
            }
            None => {
                self.defined.insert(key, body_hash);
            }
        }

        if is_rule {
            // RULE always has an implication body (guaranteed by the grammar).
            if let Body::Impl {
                antecedent,
                ante_conn,
                consequent,
                cons_conn,
            } = body
            {
                // A rule *derives* its consequent; an `OR` consequent is not a
                // single fact to assert, so reject it (use a PREMISE instead).
                if *cons_conn == Conn::Or {
                    return Err(CompileError::RuleDisjunctiveConsequent {
                        name: name.to_string(),
                    });
                }
                let (ante, cons) = (raw_lits(antecedent, ctx)?, raw_lits(consequent, ctx)?);
                for l in ante.iter().chain(cons.iter()) {
                    self.intern(&l.key);
                }
                let origin = self.origin(source, line, Some(name), kw::RULE);
                match ante_conn {
                    // a ∧ b → C : one rule firing on the whole antecedent.
                    Conn::And => self.rules.push(RawRule {
                        antecedent: ante,
                        consequent: cons,
                        origin,
                    }),
                    // (a ∨ b) → C == (a → C) ∧ (b → C): one rule per antecedent.
                    Conn::Or => {
                        for a in &ante {
                            self.rules.push(RawRule {
                                antecedent: vec![a.clone()],
                                consequent: cons.clone(),
                                origin: origin.clone(),
                            });
                        }
                    }
                }
            }
            return Ok(());
        }

        match body {
            Body::List { op, atoms } => {
                let keys: Vec<AtomKey> = atoms
                    .iter()
                    .map(|a| ctx.key(&a.data))
                    .collect::<Result<_, _>>()?;
                for k in &keys {
                    self.intern(k);
                }
                let kind = list_kind(*op);
                let origin = self.origin(source, line, Some(name), kind);
                match op {
                    // EXCLUSIVE / FORBIDS: "at most one" → pairwise Impossible([a_i, a_j]).
                    ListOp::Exclusive | ListOp::Forbids => {
                        self.emit_pairwise(&keys, &origin);
                    }
                    // ONEOF: pairwise (at most one) + at-least-one.
                    ListOp::OneOf => {
                        self.emit_pairwise(&keys, &origin);
                        self.emit_at_least_one(&keys, &origin);
                    }
                    // ATLEAST: Impossible([NOT a_1, …, NOT a_n]).
                    ListOp::AtLeast => {
                        self.emit_at_least_one(&keys, &origin);
                    }
                }
            }
            Body::Impl {
                antecedent,
                ante_conn,
                consequent,
                cons_conn,
            } => {
                // Implication A → C as `Impossible(A_true ∧ ¬C)`. We group each
                // side by its connective and emit one clause per (ante × cons)
                // group pair — a uniform rule covering all AND/OR combinations:
                //   AND-ante → all its literals share every clause;
                //   OR-ante  → one clause per literal;
                //   AND-cons → one clause per (negated) literal;
                //   OR-cons  → all its (negated) literals share every clause.
                let ante = raw_lits(antecedent, ctx)?;
                let cons = raw_lits(consequent, ctx)?;
                for l in ante.iter().chain(cons.iter()) {
                    self.intern(&l.key);
                }
                let origin = self.origin(source, line, Some(name), kw::PREMISE);

                let ante_groups: Vec<Vec<RawLit>> = match ante_conn {
                    Conn::And => vec![ante.clone()],
                    Conn::Or => ante.iter().map(|l| vec![l.clone()]).collect(),
                };
                let cons_groups: Vec<Vec<RawLit>> = match cons_conn {
                    Conn::And => cons.iter().map(|l| vec![l.clone()]).collect(),
                    Conn::Or => vec![cons.clone()],
                };
                for ag in &ante_groups {
                    for cg in &cons_groups {
                        let mut lits = ag.clone();
                        for c in cg {
                            lits.push(RawLit {
                                key: c.key.clone(),
                                negated: !c.negated,
                            });
                        }
                        self.push_clause(lits, origin.clone());
                    }
                }
            }
        }
        Ok(())
    }

    /// Emit "at most one TRUE" as one `Impossible([a_i, a_j])` per unordered
    /// pair. Pairwise (not a single big clause) because `Impossible([a,b,c])`
    /// only forbids *all three* together — it would still allow two.
    fn emit_pairwise(&mut self, keys: &[AtomKey], origin: &Origin) {
        for i in 0..keys.len() {
            for j in (i + 1)..keys.len() {
                let lits = vec![
                    RawLit {
                        key: keys[i].clone(),
                        negated: false,
                    },
                    RawLit {
                        key: keys[j].clone(),
                        negated: false,
                    },
                ];
                self.push_clause(lits, origin.clone());
            }
        }
    }

    /// Emit "at least one TRUE" as a single `Impossible([NOT a_1, …, NOT a_n])`
    /// — it is impossible for all of them to be false at once.
    fn emit_at_least_one(&mut self, keys: &[AtomKey], origin: &Origin) {
        let lits = keys
            .iter()
            .map(|k| RawLit {
                key: k.clone(),
                negated: true,
            })
            .collect();
        self.push_clause(lits, origin.clone());
    }

    /// Append a clause unless an identical one (by canonical [`clause_sig`]) is
    /// already present — `P ∧ P ≡ P`, so dedup keeps the IR minimal.
    fn push_clause(&mut self, lits: Vec<RawLit>, origin: Origin) {
        let sig = clause_sig(&lits);
        if self.clause_sigs.insert(sig) {
            self.clauses.push(RawClause { lits, origin });
        }
        // else: identical clause already present — idempotent.
    }

    /// Build an [`Origin`] for provenance from the current source/line/name.
    fn origin(&self, source: &str, line: u32, premise: Option<&str>, kind: &'static str) -> Origin {
        Origin {
            source: source.to_string(),
            line,
            premise: premise.map(|s| s.to_string()),
            kind,
        }
    }

    /// Intern all atoms (canonical sort), then lower the raw IR to ids.
    pub fn finalize(self) -> Compiled {
        let atoms: Vec<AtomKey> = self.keys.into_iter().collect(); // BTreeSet → sorted
        let mut id_of: BTreeMap<AtomKey, AtomId> = BTreeMap::new();
        for (i, k) in atoms.iter().enumerate() {
            id_of.insert(k.clone(), i as AtomId);
        }
        let lower = |l: &RawLit| Lit {
            atom: id_of[&l.key],
            negated: l.negated,
        };

        let facts = self
            .facts
            .into_iter()
            .map(|f| Fact {
                atom: id_of[&f.key],
                value: f.value,
                origin: f.origin,
                soft: f.soft,
            })
            .collect();
        let clauses = self
            .clauses
            .into_iter()
            .map(|c| Clause {
                lits: c.lits.iter().map(lower).collect(),
                origin: c.origin,
            })
            .collect();
        let rules = self
            .rules
            .into_iter()
            .map(|r| Rule {
                antecedent: r.antecedent.iter().map(lower).collect(),
                consequent: r.consequent.iter().map(lower).collect(),
                origin: r.origin,
            })
            .collect();

        Compiled {
            atoms,
            facts,
            clauses,
            rules,
            checks: self.checks,
            pending_imports: self.pending_imports,
            unused_imports: Vec::new(), // filled by `compile` (advisory, post-resolution)
        }
    }
}

/// Convenience: compile a single source into the IR. `IMPORT`s are recorded as
/// pending, not resolved (use [`compile`] with a [`Resolver`] to resolve them).
pub fn compile_source(source: &str, src: &str) -> Result<Compiled, CompileError> {
    let mut c = Compiler::new();
    c.add_source(source, src)?;
    Ok(c.finalize())
}

// --- import resolution (source-agnostic) -----------------------------------

/// Resolves `IMPORT "path"` to source text. The engine is source-agnostic: it
/// consumes strings, so a file is merely one backing store. Mirrors
/// vsm-grammar's `SourceResolver`.
pub trait Resolver {
    /// Load the raw source text for a resolved path.
    fn load(&self, path: &str) -> Result<String, CompileError>;

    /// Normalize `relative` against the importing source `base`.
    /// Default: paths are absolute names, returned unchanged.
    fn resolve(&self, _base: &str, relative: &str) -> String {
        relative.to_string()
    }
}

/// An in-memory resolver: serves sources from a name → content map. Pure
/// `no_std`. Mirrors vsm-grammar's `MemoryResolver`.
#[derive(Default)]
pub struct MemoryResolver {
    sources: BTreeMap<String, String>,
}

impl MemoryResolver {
    /// An empty resolver with no sources.
    pub fn new() -> Self {
        Self::default()
    }

    /// Register `content` under `path`; returns `&mut self` for chaining.
    pub fn add(&mut self, path: &str, content: &str) -> &mut Self {
        self.sources.insert(path.to_string(), content.to_string());
        self
    }
}

impl Resolver for MemoryResolver {
    fn load(&self, path: &str) -> Result<String, CompileError> {
        self.sources
            .get(path)
            .cloned()
            .ok_or_else(|| CompileError::ImportNotFound(path.to_string()))
    }
}

/// A filesystem-backed resolver. Mirrors vsm-grammar's `FileResolver`:
/// relative imports resolve against the importing file's directory, with manual
/// `..` normalization (no canonicalization, to keep a virtual layout).
#[cfg(feature = "std")]
pub struct FileResolver;

#[cfg(feature = "std")]
impl Resolver for FileResolver {
    fn load(&self, path: &str) -> Result<String, CompileError> {
        std::fs::read_to_string(path)
            .map_err(|e| CompileError::ImportNotFound(alloc::format!("{}: {}", path, e)))
    }

    fn resolve(&self, base: &str, relative: &str) -> String {
        use std::path::{Component, Path, PathBuf};
        let parent = Path::new(base).parent().unwrap_or_else(|| Path::new("."));
        let joined = parent.join(relative);
        let mut out = PathBuf::new();
        for component in joined.components() {
            match component {
                Component::ParentDir => {
                    out.pop();
                }
                Component::CurDir => {}
                c => out.push(c),
            }
        }
        // Normalize to forward slashes so resolved paths (and therefore the
        // provenance recorded in the IR) are identical on Windows and Unix.
        // Windows `std::fs` accepts `/` just fine.
        out.to_string_lossy().replace('\\', "/")
    }
}

/// One resolved source ready to compile: its provenance path, raw text, and the
/// domain context (own domain + import-alias bindings) its atoms resolve against.
struct ResolvedFile {
    path: String,
    content: String,
    ctx: DomainCtx,
}

/// Compile a root source and all its transitive `IMPORT`s into one IR.
///
/// Each file is keyed by `DOMAIN`; atoms unify only within a domain. Imports are
/// referenced by `<domain>.<atom>` and visibility is file-local (naming is not
/// transitive, though a dependency's clauses still participate). Sources are
/// content-addressed (sha256): a source reached by several paths is compiled once
/// (so a diamond — or an exponential fan-out — stays linear, never blowing up),
/// and an import cycle is an error.
///
/// Resolution is **iterative** (an explicit work stack, not native recursion), so
/// an arbitrarily deep import chain cannot overflow the call stack.
///
/// Premise/rule names are per-source labels, not global identifiers: different
/// files may reuse a name, and the report qualifies them by source. A name reused
/// with a different body is an error only *within the same source*.
pub fn compile<R: Resolver>(root: &str, resolver: &R) -> Result<Compiled, CompileError> {
    let (files, unused_imports) = resolve_graph(root, resolver)?;
    let mut c = Compiler::new();
    for file in &files {
        c.add_resolved(file)?;
    }
    let mut compiled = c.finalize();
    compiled.unused_imports = unused_imports;
    Ok(compiled)
}

/// One `IMPORT` edge: the optional local alias, the resolved child path, and the
/// `IMPORT` line (for the unused-import advisory).
struct ImportEdge {
    alias: Option<String>,
    child_path: String,
    line: u32,
}

/// A discovered source during graph resolution: its first-seen path, raw text,
/// declared domain, import edges, and the set of domain prefixes its atoms use
/// (`None` = its own domain; `Some(p)` = a `p.` prefix) — used to flag imports
/// that the file never references.
struct DiscoveredFile {
    path: String,
    content: String,
    domain: String,
    edges: Vec<ImportEdge>,
    used_prefixes: BTreeSet<Option<String>>,
}

/// Resolve the whole import graph reachable from `root` into a flat list of
/// [`ResolvedFile`]s, each distinct source appearing once.
///
/// Iterative depth-first traversal with an explicit work stack (`Enter`/`Exit`
/// frames) — no native recursion, so depth is unbounded without risking a stack
/// overflow. Memoized by content hash (a diamond/repeat is visited once); a hash
/// re-encountered while still on the active path is a [`CompileError::CircularImport`].
fn resolve_graph<R: Resolver>(
    root: &str,
    resolver: &R,
) -> Result<(Vec<ResolvedFile>, Vec<UnusedImport>), CompileError> {
    /// One unit of pending work on the traversal stack.
    enum Step {
        /// Visit a file at this resolved path (load, parse, enqueue its imports).
        Enter(String),
        /// Mark this content hash finished (pop it off the active path).
        Exit(String),
    }

    let mut discovered: BTreeMap<String, DiscoveredFile> = BTreeMap::new(); // by hash
    let mut path_hash: BTreeMap<String, String> = BTreeMap::new(); // resolved path → hash
    let mut order: Vec<String> = Vec::new(); // finish order, by hash
    let mut active: BTreeSet<String> = BTreeSet::new(); // hashes on the current DFS path
    let mut work: Vec<Step> = vec![Step::Enter(root.to_string())];

    while let Some(step) = work.pop() {
        match step {
            Step::Exit(hash) => {
                active.remove(&hash);
                order.push(hash);
            }
            Step::Enter(path) => {
                let content = resolver.load(&path)?;
                let hash = hash_hex(content.as_bytes());
                path_hash.insert(path.clone(), hash.clone());
                if active.contains(&hash) {
                    return Err(CompileError::CircularImport(path)); // back-edge to an ancestor
                }
                if discovered.contains_key(&hash) {
                    continue; // already fully resolved by another path — dedup
                }
                let program = elenchus_parser::parse(&content).map_err(|mut diag| {
                    diag.set_file(&path);
                    CompileError::Parse(diag)
                })?;
                let domain = extract_domain(&program, &path)?;
                let mut edges = Vec::new();
                let mut used_prefixes = BTreeSet::new();
                for stmt in &program.statements {
                    if let Statement::Import { path: p, alias } = stmt {
                        edges.push(ImportEdge {
                            alias: alias.as_ref().map(|a| a.data.to_string()),
                            child_path: resolver.resolve(&path, p.data),
                            line: p.span.location_line(),
                        });
                    } else {
                        collect_prefixes(stmt, &mut used_prefixes);
                    }
                }
                drop(program); // release the borrow on `content` before moving it
                active.insert(hash.clone());
                work.push(Step::Exit(hash.clone()));
                for e in edges.iter().rev() {
                    work.push(Step::Enter(e.child_path.clone()));
                }
                discovered.insert(
                    hash,
                    DiscoveredFile {
                        path,
                        content,
                        domain,
                        edges,
                        used_prefixes,
                    },
                );
            }
        }
    }

    // Build each file's domain context now that every domain is known.
    // Look up every file's domain (small strings) so we can then *move* each
    // file's (potentially large) content out of `discovered` instead of cloning.
    let domain_of: BTreeMap<&str, &str> = discovered
        .iter()
        .map(|(h, f)| (h.as_str(), f.domain.as_str()))
        .collect();

    let mut out = Vec::with_capacity(order.len());
    let mut unused: Vec<UnusedImport> = Vec::new();
    for hash in &order {
        let file = &discovered[hash];
        let mut aliases = BTreeMap::new();
        aliases.insert(file.domain.clone(), file.domain.clone());
        for edge in &file.edges {
            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
            let bind = edge
                .alias
                .clone()
                .unwrap_or_else(|| child_domain.to_string());
            match aliases.get(&bind) {
                Some(existing) if existing != child_domain => {
                    return Err(CompileError::DomainAliasClash { alias: bind });
                }
                _ => {
                    aliases.insert(bind, child_domain.to_string());
                }
            }
        }

        // The domains this file actually references (each used prefix resolved
        // against its own domain / imports). An imported domain absent from this
        // set is an unused import.
        let referenced: BTreeSet<&str> = file
            .used_prefixes
            .iter()
            .filter_map(|p| match p {
                None => Some(file.domain.as_str()),
                Some(name) => aliases.get(name).map(|d| d.as_str()),
            })
            .collect();
        for edge in &file.edges {
            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
            if !referenced.contains(child_domain) {
                unused.push(UnusedImport {
                    file: file.path.clone(),
                    domain: child_domain.to_string(),
                    alias: edge.alias.clone(),
                    line: edge.line,
                });
            }
        }

        let ctx = DomainCtx {
            current: file.domain.clone(),
            aliases,
        };
        out.push((hash.clone(), ctx));
    }
    unused.sort();

    // Now move content/path out of `discovered` (no large clones) and pair with
    // the contexts built above.
    let files = out
        .into_iter()
        .map(|(hash, ctx)| {
            let file = discovered.remove(&hash).expect("hash was discovered");
            ResolvedFile {
                path: file.path,
                content: file.content,
                ctx,
            }
        })
        .collect();
    Ok((files, unused))
}

/// Collect the domain prefixes used by a statement's atoms into `out` (`None` for
/// a bare atom, `Some(p)` for a `p.`-qualified one) — feeds the unused-import lint.
fn collect_prefixes(stmt: &Statement, out: &mut BTreeSet<Option<String>>) {
    let mut add = |a: &Atom| {
        out.insert(a.domain.map(|d| d.to_string()));
    };
    match stmt {
        Statement::Fact(a) | Statement::Negation(a) => add(&a.data),
        Statement::Assume(l) => add(&l.data.atom),
        Statement::Premise { body, .. } | Statement::Rule { body, .. } => match body {
            Body::List { atoms, .. } => atoms.iter().for_each(|a| add(&a.data)),
            Body::Impl {
                antecedent,
                consequent,
                ..
            } => antecedent
                .iter()
                .chain(consequent)
                .for_each(|l| add(&l.data.atom)),
        },
        Statement::Domain(_) | Statement::Import { .. } | Statement::Check { .. } => {}
    }
}

/// The single `DOMAIN` a source declares, or an error if it has none or several.
fn extract_domain(
    program: &elenchus_parser::Program,
    source: &str,
) -> Result<String, CompileError> {
    let mut found: Option<String> = None;
    for stmt in &program.statements {
        if let Statement::Domain(name) = stmt {
            if found.is_some() {
                return Err(CompileError::DuplicateDomain {
                    file: source.to_string(),
                });
            }
            found = Some(name.data.to_string());
        }
    }
    found.ok_or_else(|| CompileError::MissingDomain {
        file: source.to_string(),
    })
}

// --- helpers ---------------------------------------------------------------

/// Lower parsed, located literals to key-based [`RawLit`]s (drops spans),
/// resolving each atom's domain through `ctx`.
fn raw_lits(
    lits: &[elenchus_parser::Located<Literal>],
    ctx: &DomainCtx,
) -> Result<Vec<RawLit>, CompileError> {
    lits.iter()
        .map(|l| {
            Ok(RawLit {
                key: ctx.key(&l.data.atom)?,
                negated: l.data.negated,
            })
        })
        .collect()
}

/// The surface keyword for a list op, used as [`Origin::kind`] in the report.
fn list_kind(op: ListOp) -> &'static str {
    match op {
        ListOp::Exclusive => kw::EXCLUSIVE,
        ListOp::Forbids => kw::FORBIDS,
        ListOp::OneOf => kw::ONEOF,
        ListOp::AtLeast => kw::ATLEAST,
    }
}

/// Stable `domain|subject|predicate|object` string for an atom key (the unit from
/// which clause/fact/body signatures are built). Includes the domain so atoms in
/// different domains never share a signature.
fn key_sig(k: &AtomKey) -> String {
    alloc::format!(
        "{}|{}|{}|{}",
        k.domain,
        k.subject,
        k.predicate,
        k.object.as_deref().unwrap_or("")
    )
}

/// Canonical, order-independent signature of a clause's literals (for dedup).
fn clause_sig(lits: &[RawLit]) -> String {
    let mut parts: Vec<String> = lits
        .iter()
        .map(|l| alloc::format!("{}|{}", key_sig(&l.key), l.negated as u8))
        .collect();
    parts.sort();
    parts.dedup();
    parts.join(";")
}

/// Canonical body string for a named construct, hashed for redefinition checks.
/// Resolves atom domains through `ctx` so the signature keys on resolved identity.
fn canonical_body(
    name: &str,
    body: &Body,
    is_rule: bool,
    ctx: &DomainCtx,
) -> Result<String, CompileError> {
    let mut s = String::new();
    let _ = write!(s, "{}|{}|", if is_rule { "RULE" } else { "PREMISE" }, name);
    match body {
        Body::List { op, atoms } => {
            let _ = write!(s, "LIST|{}|", list_kind(*op));
            let mut keys: Vec<String> = atoms
                .iter()
                .map(|a| Ok(key_sig(&ctx.key(&a.data)?)))
                .collect::<Result<_, CompileError>>()?;
            keys.sort();
            s.push_str(&keys.join(";"));
        }
        Body::Impl {
            antecedent,
            ante_conn,
            consequent,
            cons_conn,
        } => {
            let conn = |c: &Conn| if *c == Conn::Or { "OR" } else { "AND" };
            s.push_str("IMPL|ANTE|");
            s.push_str(conn(ante_conn));
            s.push('|');
            s.push_str(&lit_sigs(antecedent, ctx)?);
            s.push_str("|CONS|");
            s.push_str(conn(cons_conn));
            s.push('|');
            s.push_str(&lit_sigs(consequent, ctx)?);
        }
    }
    Ok(s)
}

/// Sorted `key|negated` signature of a literal list (order-independent), used
/// inside [`canonical_body`] so reordering a body does not look like a redefinition.
fn lit_sigs(
    lits: &[elenchus_parser::Located<Literal>],
    ctx: &DomainCtx,
) -> Result<String, CompileError> {
    let mut parts: Vec<String> = lits
        .iter()
        .map(|l| {
            Ok(alloc::format!(
                "{}|{}",
                key_sig(&ctx.key(&l.data.atom)?),
                l.data.negated as u8
            ))
        })
        .collect::<Result<_, CompileError>>()?;
    parts.sort();
    Ok(parts.join(";"))
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Compile a single inline source under a default `DOMAIN t`, so test
    /// programs need not repeat the declaration. Atoms land in domain `t`.
    fn cs(src: &str) -> Result<Compiled, CompileError> {
        compile_source("<t>", &alloc::format!("DOMAIN t\n{src}"))
    }

    /// An atom key in the default test domain `t`.
    fn key(subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
        key_in("t", subject, predicate, object)
    }

    /// An atom key in an explicit domain.
    fn key_in(domain: &str, subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
        AtomKey {
            domain: domain.to_string(),
            subject: subject.to_string(),
            predicate: predicate.to_string(),
            object: object.map(|o| o.to_string()),
        }
    }

    fn id(c: &Compiled, k: &AtomKey) -> AtomId {
        c.atoms.iter().position(|a| a == k).unwrap() as AtomId
    }

    #[test]
    fn exclusive_unfolds_pairwise() {
        let src = r#"
        PREMISE e:
            EXCLUSIVE
                x a
                x b
                x c
        "#;
        let c = cs(src).unwrap();
        // C(3,2) = 3 clauses, each of 2 positive literals.
        assert_eq!(c.clauses.len(), 3);
        for cl in &c.clauses {
            assert_eq!(cl.lits.len(), 2);
            assert!(cl.lits.iter().all(|l| !l.negated));
        }
    }

    #[test]
    fn implication_negates_consequent() {
        // WHEN x a THEN x b  ==  Impossible([x a, NOT x b])
        let src = r#"
        PREMISE r:
            WHEN x a
            THEN x b
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.clauses.len(), 1);
        let cl = &c.clauses[0];
        assert_eq!(cl.lits.len(), 2);
        let a = id(&c, &key("x", "a", None));
        let b = id(&c, &key("x", "b", None));
        assert!(cl.lits.contains(&Lit {
            atom: a,
            negated: false
        }));
        assert!(cl.lits.contains(&Lit {
            atom: b,
            negated: true
        }));
    }

    #[test]
    fn negated_consequent_flips_to_positive() {
        // THEN NOT x b  →  NOT(NOT x b) = x b positive inside Impossible
        let src = r#"
        PREMISE r:
            WHEN x a
            THEN NOT x b
        "#;
        let c = cs(src).unwrap();
        let b = id(&c, &key("x", "b", None));
        assert!(c.clauses[0].lits.contains(&Lit {
            atom: b,
            negated: false
        }));
    }

    #[test]
    fn consequent_or_is_one_clause_with_all_negated() {
        // WHEN x p THEN x a OR x b  ==  Impossible([x p, NOT x a, NOT x b])
        let src = r#"
        PREMISE r:
            WHEN x p
            THEN x a
            OR x b
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.clauses.len(), 1);
        let cl = &c.clauses[0];
        assert_eq!(cl.lits.len(), 3);
        let p = id(&c, &key("x", "p", None));
        let a = id(&c, &key("x", "a", None));
        let b = id(&c, &key("x", "b", None));
        assert!(cl.lits.contains(&Lit {
            atom: p,
            negated: false
        }));
        assert!(cl.lits.contains(&Lit {
            atom: a,
            negated: true
        }));
        assert!(cl.lits.contains(&Lit {
            atom: b,
            negated: true
        }));
    }

    #[test]
    fn antecedent_or_is_one_clause_per_disjunct() {
        // WHEN x a OR x b THEN x c
        //   == Impossible([x a, NOT x c]) ∧ Impossible([x b, NOT x c])
        let src = r#"
        PREMISE r:
            WHEN x a
            OR x b
            THEN x c
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.clauses.len(), 2);
        let a = id(&c, &key("x", "a", None));
        let b = id(&c, &key("x", "b", None));
        let cc = id(&c, &key("x", "c", None));
        // every clause has exactly two lits and carries NOT c
        for cl in &c.clauses {
            assert_eq!(cl.lits.len(), 2);
            assert!(cl.lits.contains(&Lit {
                atom: cc,
                negated: true
            }));
        }
        let has = |atom| {
            c.clauses.iter().any(|cl| {
                cl.lits.contains(&Lit {
                    atom,
                    negated: false,
                })
            })
        };
        assert!(has(a) && has(b));
    }

    #[test]
    fn antecedent_or_with_consequent_or_distributes() {
        // (a ∨ b) → (c ∨ d): Impossible([a,¬c,¬d]) ∧ Impossible([b,¬c,¬d])
        let src = r#"
        PREMISE r:
            WHEN x a
            OR x b
            THEN x c
            OR x d
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.clauses.len(), 2);
        for cl in &c.clauses {
            assert_eq!(cl.lits.len(), 3);
        }
    }

    #[test]
    fn rule_with_or_antecedent_splits_into_two_rules() {
        // (a ∨ b) → c derives c whenever either fires: two single-antecedent rules.
        let src = r#"
        RULE r:
            WHEN x a
            OR x b
            THEN x c
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.rules.len(), 2);
        assert!(
            c.rules
                .iter()
                .all(|r| r.antecedent.len() == 1 && r.consequent.len() == 1)
        );
    }

    #[test]
    fn rule_with_or_consequent_is_rejected() {
        // A rule cannot derive a disjunction — must be a PREMISE.
        let src = r#"
        RULE r:
            WHEN x a
            THEN x b
            OR x c
        "#;
        let err = cs(src).unwrap_err();
        assert!(matches!(
            err,
            CompileError::RuleDisjunctiveConsequent { .. }
        ));
    }

    #[test]
    fn oneof_is_pairwise_plus_at_least_one() {
        let src = r#"
        PREMISE o:
            ONEOF
                x a
                x b
        "#;
        let c = cs(src).unwrap();
        // pairwise C(2,2)=1 + 1 at-least-one = 2 clauses
        assert_eq!(c.clauses.len(), 2);
        // the at-least-one clause is the all-negated one
        assert!(c.clauses.iter().any(|cl| cl.lits.iter().all(|l| l.negated)));
    }

    #[test]
    fn atleast_is_one_negated_clause() {
        let src = r#"
        PREMISE a:
            ATLEAST
                x a
                x b
                x c
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.clauses.len(), 1);
        assert_eq!(c.clauses[0].lits.len(), 3);
        assert!(c.clauses[0].lits.iter().all(|l| l.negated));
    }

    #[test]
    fn rules_are_separate_from_clauses() {
        let src = r#"
        RULE needs:
            WHEN x a
            THEN x b
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.clauses.len(), 0);
        assert_eq!(c.rules.len(), 1);
        assert_eq!(c.rules[0].antecedent.len(), 1);
        assert_eq!(c.rules[0].consequent.len(), 1);
    }

    #[test]
    fn atoms_are_canonically_sorted() {
        let src = r#"
        FACT z z
        FACT a a
        FACT m m
        "#;
        let c = cs(src).unwrap();
        let mut sorted = c.atoms.clone();
        sorted.sort();
        assert_eq!(c.atoms, sorted);
    }

    #[test]
    fn duplicate_premise_is_idempotent() {
        let src = r#"
        PREMISE e:
            EXCLUSIVE
                x a
                x b
        PREMISE e:
            EXCLUSIVE
                x a
                x b
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.clauses.len(), 1);
    }

    #[test]
    fn redefinition_with_different_body_errors() {
        let src = r#"
        PREMISE e:
            EXCLUSIVE
                x a
                x b
        PREMISE e:
            EXCLUSIVE
                x a
                x c
        "#;
        let err = cs(src).unwrap_err();
        assert_eq!(
            err,
            CompileError::PremiseRedefinition {
                name: "e".to_string()
            }
        );
    }

    #[test]
    fn duplicate_fact_is_idempotent() {
        let c = cs("FACT x a\nFACT x a\n").unwrap();
        assert_eq!(c.facts.len(), 1);
    }

    #[test]
    fn conflicting_facts_are_both_kept() {
        // FACT X + NOT X is a CONFLICT for the solver, not a compile error.
        let c = cs("FACT x a\nNOT x a\n").unwrap();
        assert_eq!(c.facts.len(), 2);
    }

    #[test]
    fn import_is_recorded_pending() {
        let c = cs("IMPORT \"physics.vrf\"\nFACT x a\n").unwrap();
        assert_eq!(c.pending_imports, vec!["physics.vrf".to_string()]);
    }

    #[test]
    fn qualified_fact_lands_in_the_imported_domain() {
        // The library's premise is about `physics.Engine_X has fuel`; the main file
        // asserts a fact qualified INTO that domain, so the two share one atom id.
        let mut r = MemoryResolver::new();
        r.add(
            "lib.vrf",
            r#"
        DOMAIN physics
        PREMISE needs_fuel:
            WHEN Engine_X has engine
            THEN Engine_X has fuel
        "#,
        );
        r.add(
            "main.vrf",
            r#"
        DOMAIN main
        IMPORT "lib.vrf"
        FACT physics.Engine_X has engine
        FACT physics.Engine_X has fuel
        "#,
        );
        let c = compile("main.vrf", &r).unwrap();
        assert!(c.pending_imports.is_empty());
        assert_eq!(c.clauses.len(), 1); // the imported premise
        assert_eq!(c.facts.len(), 2);

        // `physics.Engine_X has fuel` from the FACT and the imported premise share an id.
        let fuel = key_in("physics", "Engine_X", "has", Some("fuel"));
        let fuel_id = id(&c, &fuel);
        assert!(c.facts.iter().any(|f| f.atom == fuel_id));
        assert!(c.clauses[0].lits.iter().any(|l| l.atom == fuel_id));
    }

    #[test]
    fn same_triple_in_different_domains_does_not_unify() {
        // Without a domain prefix the fact lands in `main`, NOT `physics`, so it is
        // a distinct atom from the library's `physics.Engine_X has fuel`.
        let mut r = MemoryResolver::new();
        r.add("lib.vrf", "DOMAIN physics\nFACT Engine_X has fuel\n");
        r.add(
            "main.vrf",
            "DOMAIN main\nIMPORT \"lib.vrf\"\nFACT Engine_X has fuel\n",
        );
        let c = compile("main.vrf", &r).unwrap();
        // Two distinct atoms: physics.Engine_X has fuel and main.Engine_X has fuel.
        assert!(c.atoms.iter().any(|a| a.domain == "physics"));
        assert!(c.atoms.iter().any(|a| a.domain == "main"));
        assert_eq!(
            c.atoms
                .iter()
                .filter(|a| a.subject == "Engine_X" && a.predicate == "has")
                .count(),
            2
        );
    }

    #[test]
    fn import_alias_binds_a_local_domain_name() {
        // `AS phys` lets the consumer reference the imported domain by a local name.
        let mut r = MemoryResolver::new();
        r.add("lib.vrf", "DOMAIN physics\nFACT Motor over_200\n");
        r.add(
            "main.vrf",
            "DOMAIN main\nIMPORT \"lib.vrf\" AS phys\nFACT phys.Motor over_100\n",
        );
        let c = compile("main.vrf", &r).unwrap();
        // Both facts live in the physics domain (one via its own name, one via alias).
        assert_eq!(c.atoms.iter().filter(|a| a.domain == "physics").count(), 2);
    }

    #[test]
    fn unknown_domain_reference_errors() {
        // Referencing a domain that is neither this file's nor imported here fails.
        let err = cs("FACT ghost.x a\n").unwrap_err();
        assert!(matches!(err, CompileError::UnknownDomain { .. }));
    }

    #[test]
    fn imports_are_not_transitive_for_naming() {
        // main imports physics; physics imports math. main may NOT name math.
        let mut r = MemoryResolver::new();
        r.add("math.vrf", "DOMAIN math\nFACT foo bar\n");
        r.add(
            "physics.vrf",
            "DOMAIN physics\nIMPORT \"math.vrf\"\nFACT Motor over_100\n",
        );
        r.add(
            "main.vrf",
            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT math.foo bar\n",
        );
        let err = compile("main.vrf", &r).unwrap_err();
        assert!(matches!(err, CompileError::UnknownDomain { .. }));
    }

    #[test]
    fn transitive_dependency_clauses_still_load() {
        // Even though main can't *name* math, math's clauses still participate.
        let mut r = MemoryResolver::new();
        r.add(
            "math.vrf",
            "DOMAIN math\nPREMISE e:\n    EXCLUSIVE\n        x a\n        x b\n",
        );
        r.add("physics.vrf", "DOMAIN physics\nIMPORT \"math.vrf\"\n");
        r.add("main.vrf", "DOMAIN main\nIMPORT \"physics.vrf\"\n");
        let c = compile("main.vrf", &r).unwrap();
        assert_eq!(c.clauses.len(), 1); // math's clause loaded transitively
        assert!(c.clauses.iter().all(|cl| cl.origin.source == "math.vrf"));
    }

    #[test]
    fn missing_domain_errors() {
        let err = compile_source("nodomain.vrf", "FACT x a\n").unwrap_err();
        assert!(matches!(err, CompileError::MissingDomain { .. }));
    }

    #[test]
    fn duplicate_domain_errors() {
        let err = compile_source("dup.vrf", "DOMAIN a\nDOMAIN b\nFACT x a\n").unwrap_err();
        assert!(matches!(err, CompileError::DuplicateDomain { .. }));
    }

    #[test]
    fn alias_clash_when_one_local_name_binds_two_domains() {
        // The same local alias `x` bound to two genuinely different domains is a
        // clash: disambiguate with distinct aliases.
        let mut r = MemoryResolver::new();
        r.add("a.vrf", "DOMAIN physics\nFACT Motor over_100\n");
        r.add("b.vrf", "DOMAIN chemistry\nFACT atom reacts\n");
        r.add(
            "main.vrf",
            "DOMAIN main\nIMPORT \"a.vrf\" AS x\nIMPORT \"b.vrf\" AS x\n",
        );
        let err = compile("main.vrf", &r).unwrap_err();
        assert!(matches!(err, CompileError::DomainAliasClash { .. }));
    }

    #[test]
    fn two_files_with_the_same_domain_name_merge() {
        // Nominal domains: two files both declaring DOMAIN physics share it (the
        // value of importing a premise library is exactly this unification).
        let mut r = MemoryResolver::new();
        r.add("a.vrf", "DOMAIN physics\nFACT Motor over_100\n");
        r.add(
            "main.vrf",
            "DOMAIN physics\nIMPORT \"a.vrf\"\nFACT Motor over_200\n",
        );
        let c = compile("main.vrf", &r).unwrap();
        // Both motors live in the single shared `physics` domain.
        assert!(c.atoms.iter().all(|a| a.domain == "physics"));
        assert_eq!(c.atoms.len(), 2);
    }

    #[test]
    fn diamond_import_is_deduped() {
        // main → a, c ; a → base ; c → base. base merged once.
        let mut r = MemoryResolver::new();
        r.add(
            "base.vrf",
            r#"
        DOMAIN base
        PREMISE b:
            EXCLUSIVE
                x a
                x b
        "#,
        );
        r.add("a.vrf", "DOMAIN a\nIMPORT \"base.vrf\"\n");
        r.add("c.vrf", "DOMAIN c\nIMPORT \"base.vrf\"\n");
        r.add(
            "main.vrf",
            "DOMAIN main\nIMPORT \"a.vrf\"\nIMPORT \"c.vrf\"\n",
        );
        let c = compile("main.vrf", &r).unwrap();
        assert_eq!(c.clauses.len(), 1); // base's single clause, not two
    }

    #[test]
    fn circular_import_errors() {
        let mut r = MemoryResolver::new();
        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\n");
        r.add("b.vrf", "DOMAIN b\nIMPORT \"a.vrf\"\n");
        let err = compile("a.vrf", &r).unwrap_err();
        assert!(matches!(err, CompileError::CircularImport(_)));
    }

    #[test]
    fn three_node_cycle_errors() {
        // a → b → c → a. The back-edge to the on-path ancestor is detected.
        let mut r = MemoryResolver::new();
        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\n");
        r.add("b.vrf", "DOMAIN b\nIMPORT \"c.vrf\"\n");
        r.add("c.vrf", "DOMAIN c\nIMPORT \"a.vrf\"\n");
        let err = compile("a.vrf", &r).unwrap_err();
        assert!(matches!(err, CompileError::CircularImport(_)));
    }

    #[test]
    fn shared_grandchild_diamond_loads_once() {
        // The user's case: a imports B and C; C ALSO imports B. B must be compiled
        // exactly once (its single clause is not duplicated by the two paths to it).
        let mut r = MemoryResolver::new();
        r.add(
            "b.vrf",
            "DOMAIN b\nPREMISE e:\n    EXCLUSIVE\n        x a\n        x b\n",
        );
        r.add("c.vrf", "DOMAIN c\nIMPORT \"b.vrf\"\n");
        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\nIMPORT \"c.vrf\"\n");
        let c = compile("a.vrf", &r).unwrap();
        assert_eq!(
            c.clauses.len(),
            1,
            "b.vrf's clause must appear exactly once"
        );
    }

    #[test]
    fn exponential_fan_out_is_memoized_not_blown_up() {
        // f_k imports f_{k-1} TWICE. Without content-hash memoization the visit
        // count is 2^k (2^40 ≈ a trillion); with it, the work is linear, so this
        // finishes instantly. A guard against any combinatorial blow-up / DoS.
        let mut r = MemoryResolver::new();
        r.add("f0.vrf", "DOMAIN d0\nFACT x a\n");
        let n = 40;
        for k in 1..=n {
            r.add(
                &alloc::format!("f{k}.vrf"),
                &alloc::format!(
                    "DOMAIN d{k}\nIMPORT \"f{}.vrf\"\nIMPORT \"f{}.vrf\"\n",
                    k - 1,
                    k - 1
                ),
            );
        }
        let c = compile(&alloc::format!("f{n}.vrf"), &r).unwrap();
        assert_eq!(c.facts.len(), 1); // the single fact from f0, reached once
    }

    #[test]
    fn very_deep_linear_chain_does_not_overflow() {
        // A long non-cyclic chain. Resolution is iterative (explicit work stack),
        // so a depth that would overflow a recursive loader compiles cleanly.
        let mut r = MemoryResolver::new();
        r.add("f0.vrf", "DOMAIN d0\nFACT x a\n");
        let n = 10_000;
        for k in 1..=n {
            r.add(
                &alloc::format!("f{k}.vrf"),
                &alloc::format!("DOMAIN d{k}\nIMPORT \"f{}.vrf\"\n", k - 1),
            );
        }
        let c = compile(&alloc::format!("f{n}.vrf"), &r).unwrap();
        assert_eq!(c.facts.len(), 1);
    }

    #[test]
    fn missing_import_errors() {
        let mut r = MemoryResolver::new();
        r.add("main.vrf", "DOMAIN main\nIMPORT \"ghost.vrf\"\n");
        let err = compile("main.vrf", &r).unwrap_err();
        assert!(matches!(err, CompileError::ImportNotFound(_)));
    }

    #[test]
    fn unused_import_is_flagged() {
        // main imports physics but never writes a `physics.` atom → unused.
        let mut r = MemoryResolver::new();
        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
        r.add(
            "main.vrf",
            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT x a\n",
        );
        let c = compile("main.vrf", &r).unwrap();
        assert_eq!(c.unused_imports.len(), 1);
        assert_eq!(c.unused_imports[0].domain, "physics");
        assert_eq!(c.unused_imports[0].file, "main.vrf");
        assert_eq!(c.unused_imports[0].alias, None);
    }

    #[test]
    fn referenced_import_is_not_unused() {
        // The same import, but now a `physics.` atom uses it → not flagged.
        let mut r = MemoryResolver::new();
        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
        r.add(
            "main.vrf",
            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT physics.Motor over_200\n",
        );
        let c = compile("main.vrf", &r).unwrap();
        assert!(c.unused_imports.is_empty(), "{:?}", c.unused_imports);
    }

    #[test]
    fn unused_import_records_its_alias() {
        let mut r = MemoryResolver::new();
        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
        r.add(
            "main.vrf",
            "DOMAIN main\nIMPORT \"physics.vrf\" AS phys\nFACT x a\n",
        );
        let c = compile("main.vrf", &r).unwrap();
        assert_eq!(c.unused_imports.len(), 1);
        assert_eq!(c.unused_imports[0].alias.as_deref(), Some("phys"));
    }

    #[test]
    fn import_referenced_only_inside_a_premise_is_used() {
        // The reference can be anywhere — here inside a premise body, not a fact.
        let mut r = MemoryResolver::new();
        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
        r.add(
            "main.vrf",
            "DOMAIN main\nIMPORT \"physics.vrf\"\nPREMISE p:\n    WHEN physics.Motor over_100\n    THEN x ok\n",
        );
        let c = compile("main.vrf", &r).unwrap();
        assert!(c.unused_imports.is_empty(), "{:?}", c.unused_imports);
    }

    #[test]
    fn same_premise_name_across_files_coexists() {
        // Two files may legitimately reuse a premise NAME with different bodies.
        // Names are per-source labels — both premises apply, qualified by source.
        // NOT a redefinition error. (Atoms stay apart too: different domains.)
        let mut r = MemoryResolver::new();
        r.add(
            "physics.vrf",
            r#"
        DOMAIN physics
        PREMISE safety:
            EXCLUSIVE
                x a
                x b
        "#,
        );
        r.add(
            "main.vrf",
            r#"
        DOMAIN main
        IMPORT "physics.vrf"
        PREMISE safety:
            EXCLUSIVE
                x a
                x c
        "#,
        );
        let c = compile("main.vrf", &r).unwrap();
        assert_eq!(c.clauses.len(), 2); // a-b from physics, a-c from main
        assert!(c.clauses.iter().any(|cl| cl.origin.source == "physics.vrf"));
        assert!(c.clauses.iter().any(|cl| cl.origin.source == "main.vrf"));
    }

    #[test]
    fn redefinition_within_one_source_still_errors() {
        // But reusing a name with a different body *inside one source* is a mistake.
        let src = r#"
        DOMAIN m
        PREMISE e:
            EXCLUSIVE
                x a
                x b
        PREMISE e:
            EXCLUSIVE
                x a
                x c
        "#;
        let err = compile_source("main.vrf", src).unwrap_err();
        assert_eq!(
            err,
            CompileError::PremiseRedefinition {
                name: "e".to_string()
            }
        );
    }

    #[test]
    fn import_demo_examples_resolve() {
        let mut r = MemoryResolver::new();
        r.add(
            "physics.vrf",
            include_str!("../../../docs/examples/physics.vrf"),
        );
        r.add(
            "import-demo.vrf",
            include_str!("../../../docs/examples/import-demo.vrf"),
        );
        let c = compile("import-demo.vrf", &r).unwrap();
        assert!(c.pending_imports.is_empty());
        // physics.vrf: one_path (EXCLUSIVE, 1 clause) + speed_order (impl, 1 clause)
        assert_eq!(c.clauses.len(), 2);
        // The qualified facts (`physics.Motor …`) share ids with the imported premise.
        let over_100 = id(&c, &key_in("physics", "Motor", "over_100", None));
        assert!(c.facts.iter().any(|f| f.atom == over_100));
        assert!(
            c.clauses
                .iter()
                .any(|cl| cl.lits.iter().any(|l| l.atom == over_100))
        );
    }

    #[test]
    fn creature_example_compiles() {
        let src = include_str!("../../../docs/examples/creature.vrf");
        let c = compile_source("creature.vrf", src).unwrap();
        assert_eq!(c.facts.len(), 2); // flying, warm_blood
        assert_eq!(c.rules.len(), 1); // needs_oxygen
        assert_eq!(c.checks.len(), 1);
        // fly_xor_swim (1) + wings_need_bone (THEN wing AND bone → 2) + no_dual_temp (1) = 4
        assert_eq!(c.clauses.len(), 4);
        assert_eq!(c.atoms.len(), 7);
    }

    #[test]
    fn forbids_unfolds_pairwise() {
        let src = r#"
        PREMISE f:
            FORBIDS
                x a
                x b
                x c
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.clauses.len(), 3); // C(3,2), like EXCLUSIVE
        assert!(
            c.clauses
                .iter()
                .all(|cl| cl.lits.len() == 2 && cl.lits.iter().all(|l| !l.negated))
        );
    }

    #[test]
    fn rule_with_multiple_consequents() {
        let src = r#"
        RULE r:
            WHEN x a
            THEN x b
            AND  x c
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.rules.len(), 1);
        assert_eq!(c.rules[0].consequent.len(), 2);
    }

    #[test]
    fn negated_antecedent_literal_keeps_polarity() {
        // WHEN NOT x a THEN x b  ==  Impossible([NOT x a, NOT x b])
        let src = r#"
        PREMISE a:
            WHEN NOT x a
            THEN x b
        "#;
        let c = cs(src).unwrap();
        let xa = id(&c, &key("x", "a", None));
        assert!(c.clauses[0].lits.contains(&Lit {
            atom: xa,
            negated: true
        }));
    }

    #[test]
    fn rule_keeps_consequent_negation() {
        let src = r#"
        RULE r:
            WHEN x a
            THEN NOT x b
        "#;
        let c = cs(src).unwrap();
        assert!(c.rules[0].consequent[0].negated);
    }

    #[test]
    fn compilation_is_deterministic() {
        let src = r#"
        PREMISE e:
            EXCLUSIVE
                z z
                a a
                m m
        FACT q q
        "#;
        assert_eq!(cs(src).unwrap(), cs(src).unwrap());
    }

    #[test]
    fn empty_program_compiles_to_empty_ir() {
        let c = cs("// nothing here\n").unwrap();
        assert!(c.atoms.is_empty() && c.clauses.is_empty() && c.facts.is_empty());
    }

    #[test]
    fn same_clause_from_two_named_premises_is_deduped() {
        // Different names, identical logical content → one clause, no redefinition.
        let src = r#"
        PREMISE e1:
            EXCLUSIVE
                x a
                x b
        PREMISE e2:
            EXCLUSIVE
                x a
                x b
        "#;
        let c = cs(src).unwrap();
        assert_eq!(c.clauses.len(), 1);
    }

    #[test]
    fn object_distinguishes_atom_identity() {
        // `x p a` and `x p b` differ only by object → two distinct atoms.
        let c = cs("FACT x p a\nFACT x p b\n").unwrap();
        assert_eq!(c.atoms.len(), 2);
    }
}