Skip to main content

elenchus_compiler/
lib.rs

1//! elenchus-compiler — compiles parsed elenchus DSL into a canonical clause IR.
2//!
3//! This crate is **preparation, not solving**. It takes the AST (from
4//! `elenchus-parser`) and produces a deterministic, solver-ready intermediate
5//! representation:
6//!
7//! - **atom interner**: `(subject, predicate, object?)` → dense `u32` id,
8//!   canonically sorted so ids (and any later enumeration) are deterministic;
9//! - **desugaring**: surface CAPS sugar → `Impossible` clauses
10//!   (`EXCLUSIVE` pairwise, `WHEN…THEN` → `Impossible([A, …, NOT C])`, etc.);
11//! - **content-addressing** (sha256, mirroring vsm-guard's CAS): identical
12//!   clauses are deduped (idempotent — `P ∧ P ≡ P`), and a named construct
13//!   redefined with a different body is a `PremiseRedefinition` error.
14//!
15//! The actual reasoning (3-valued forward chaining, SAT, all-SAT, the WARNING
16//! pool, the four results) lives in `elenchus-solver`. `IMPORT` resolution is a
17//! source-agnostic [`Resolver`] that flat-merges another source into the shared
18//! atom universe ([`compile`] resolves imports; [`compile_source`] leaves them
19//! pending).
20//!
21//! # Example
22//!
23//! ```
24//! use elenchus_compiler::compile_source;
25//!
26//! // `ASSUME` lowers to a *soft* fact: the same atom universe as a `FACT`, but
27//! // one the solver may retract. Here `x a` is asserted both ways (hard + soft).
28//! let ir = compile_source("demo.vrf", "DOMAIN d\nFACT x a\nASSUME NOT x a\nCHECK x\n").unwrap();
29//! assert_eq!(ir.facts.len(), 2);
30//! assert!(ir.facts.iter().any(|f| f.soft)); // the ASSUME is the soft one
31//! ```
32#![no_std]
33// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
34#![warn(missing_docs)]
35
36extern crate alloc;
37
38#[cfg(feature = "std")]
39extern crate std;
40
41use alloc::boxed::Box;
42use alloc::collections::{BTreeMap, BTreeSet};
43use alloc::string::{String, ToString};
44use alloc::vec;
45use alloc::vec::Vec;
46use core::fmt::Write as _;
47
48use elenchus_parser::{Atom, Body, CloseKind, Conn, ListOp, Literal, Located, Quant, Statement};
49/// Re-exported so downstream crates name one source of truth: [`Diagnostics`] for
50/// the syntax errors carried by [`CompileError::Parse`], and [`kw`] for the
51/// keyword spellings an [`Origin::kind`] is built from (so the solver matches a
52/// `kind` against `kw::PREMISE`, not a re-typed `"PREMISE"`).
53pub use elenchus_parser::{Diagnostics, kw};
54use sha2::{Digest, Sha256};
55use thiserror::Error;
56
57// --- content-addressing (mirrors vsm-guard::hashing) -----------------------
58
59/// SHA-256 content addressing. Used only for dedup / redefinition / provenance,
60/// never for namespacing atoms.
61pub fn hash_hex(data: &[u8]) -> String {
62    let mut hasher = Sha256::new();
63    hasher.update(data);
64    let out = hasher.finalize();
65    let mut s = String::with_capacity(out.len() * 2);
66    for b in out {
67        let _ = write!(s, "{:02x}", b);
68    }
69    s
70}
71
72// --- IR types --------------------------------------------------------------
73
74/// Dense atom identifier (also the SAT variable number).
75pub type AtomId = u32;
76
77/// The identity of an atom: the `domain` plus the triple
78/// `(subject, predicate, object?)`, owned so it survives across merged sources.
79/// The domain is the leading sort key, so atoms group by domain; ordering is
80/// otherwise lexicographic → canonical. Two atoms with the same triple in
81/// *different* domains are distinct (no cross-domain unification).
82#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
83pub struct AtomKey {
84    /// The domain this atom belongs to (the resolved namespace, never a raw
85    /// alias). `physics.engine` and `plan.engine` are different atoms.
86    pub domain: String,
87    /// The entity the claim is about (owned copy of the parser's `subject`).
88    pub subject: String,
89    /// The relation or property asserted.
90    pub predicate: String,
91    /// Optional object; part of identity, so `has flying` ≠ `has swimming`.
92    pub object: Option<String>,
93}
94
95/// The domain context of one file being compiled: its own declared domain (where
96/// bare atoms fall) and the local names — aliases or imported domain names — it
97/// may reference other domains by. Resolving an atom's optional `domain.` prefix
98/// against this context yields its canonical [`AtomKey`] domain.
99struct DomainCtx {
100    /// The file's own declared domain (the target for unqualified atoms).
101    current: String,
102    /// `local name -> canonical domain` for every name visible in this file
103    /// (always includes `current -> current`, plus one entry per `IMPORT`).
104    aliases: BTreeMap<String, String>,
105}
106
107impl DomainCtx {
108    /// Resolve an atom's optional `domain.` prefix to a canonical domain name.
109    /// `None` → the file's own domain; a prefix not imported here is an error.
110    fn resolve(&self, prefix: Option<&str>) -> Result<String, CompileError> {
111        match prefix {
112            None => Ok(self.current.clone()),
113            Some(p) => self
114                .aliases
115                .get(p)
116                .cloned()
117                .ok_or_else(|| CompileError::UnknownDomain {
118                    domain: p.to_string(),
119                }),
120        }
121    }
122
123    /// Build the owned [`AtomKey`] for a borrowed parser [`Atom`], resolving its
124    /// domain prefix against this file's context.
125    fn key(&self, a: &Atom) -> Result<AtomKey, CompileError> {
126        Ok(AtomKey {
127            domain: self.resolve(a.domain)?,
128            subject: a.subject.to_string(),
129            predicate: a.predicate.to_string(),
130            object: a.object.map(|o| o.to_string()),
131        })
132    }
133}
134
135/// A literal as it appears *inside* an `Impossible` clause: an atom, optionally
136/// negated. `negated = true` means the literal is `NOT atom`.
137#[derive(Debug, Clone, Copy, PartialEq, Eq)]
138pub struct Lit {
139    /// Interned id of the atom (also its SAT variable number).
140    pub atom: AtomId,
141    /// `true` means this literal is `NOT atom` inside the clause.
142    pub negated: bool,
143}
144
145/// A confident truth value. UNKNOWN is the *absence* of a fact, never stored.
146#[derive(Debug, Clone, Copy, PartialEq, Eq)]
147pub enum Value {
148    /// The atom is asserted TRUE (from `FACT`).
149    True,
150    /// The atom is asserted FALSE (from `NOT`).
151    False,
152}
153
154/// Where a piece of IR came from — for readable conflict/warning pools.
155#[derive(Debug, Clone, PartialEq, Eq)]
156pub struct Origin {
157    /// The source label this came from (file name or `"<root>"`/`"<text>"`).
158    pub source: String,
159    /// 1-based line number of the originating statement.
160    pub line: u32,
161    /// The premise/rule name, if it came from a named construct.
162    pub premise: Option<String>,
163    /// Surface kind for the report. A surface keyword (a [`kw`] constant such as
164    /// `kw::FACT` / `kw::PREMISE`) for source constructs, or [`KIND_UNSAT`] for
165    /// the synthetic origin the solver attaches to a global unsatisfiability.
166    pub kind: &'static str,
167}
168
169/// The [`Origin::kind`] the solver stamps on a conflict that is not pinned to one
170/// source construct but to the program being jointly unsatisfiable. Not a
171/// keyword — so it lives here, next to the other kinds, as the one spelling both
172/// the solver (which sets it) and any reader (which matches it) share.
173pub const KIND_UNSAT: &str = "UNSAT";
174
175/// A confident fact (from `FACT` / `NOT`). Conflicting facts on the same atom
176/// are preserved (both kept) — the solver reports that as a CONFLICT.
177#[derive(Debug, Clone, PartialEq, Eq)]
178pub struct Fact {
179    /// The atom this fact pins down.
180    pub atom: AtomId,
181    /// The asserted truth value.
182    pub value: Value,
183    /// Where it came from (for the report).
184    pub origin: Origin,
185    /// `true` for an `ASSUME` (a *soft*, retractable hypothesis). A soft fact
186    /// behaves like a normal fact in the forward pass, but when the assumptions
187    /// cannot all hold the solver may drop it (and only it) to explain the
188    /// contradiction — a `FACT`/`NOT` is never retractable.
189    pub soft: bool,
190}
191
192/// An `Impossible` clause: the listed literals cannot all hold simultaneously.
193#[derive(Debug, Clone, PartialEq, Eq)]
194pub struct Clause {
195    /// The literals that cannot all hold at once (an `Impossible([...])`).
196    pub lits: Vec<Lit>,
197    /// Where it came from (for the report).
198    pub origin: Origin,
199}
200
201/// A forward-chaining rule (from `RULE`): if all antecedent literals hold, derive
202/// the consequent literals.
203#[derive(Debug, Clone, PartialEq, Eq)]
204pub struct Rule {
205    /// Literals that must all hold for the rule to fire.
206    pub antecedent: Vec<Lit>,
207    /// Literals derived (asserted) when the antecedent holds.
208    pub consequent: Vec<Lit>,
209    /// Where it came from (for the report).
210    pub origin: Origin,
211}
212
213/// A `CHECK` query.
214#[derive(Debug, Clone, PartialEq, Eq)]
215pub struct Check {
216    /// Restrict the report to this subject; `None` means check everything.
217    pub subject: Option<String>,
218    /// `true` runs the backward (all-SAT) pass to detect UNDERDETERMINED.
219    pub bidirectional: bool,
220}
221
222/// The compiled IR: the solver's input.
223#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct Compiled {
225    /// Indexed by [`AtomId`]; canonically sorted.
226    pub atoms: Vec<AtomKey>,
227    /// Confident assertions from `FACT`/`NOT`.
228    pub facts: Vec<Fact>,
229    /// `Impossible` clauses (desugared premises + the built-in non-contradiction).
230    pub clauses: Vec<Clause>,
231    /// Forward-chaining rules from `RULE`.
232    pub rules: Vec<Rule>,
233    /// `CHECK` queries.
234    pub checks: Vec<Check>,
235    /// Imports seen but not yet resolved (only populated by [`compile_source`];
236    /// [`compile`] resolves them, leaving this empty).
237    pub pending_imports: Vec<String>,
238    /// Advisory: imports that a file makes but never references (no `domain.atom`
239    /// from that file uses the imported domain). Structural, per-file, and inert —
240    /// it never affects the solve. Only populated by [`compile`] (an unresolved
241    /// import in [`compile_source`] cannot be classified). See [`UnusedImport`].
242    pub unused_imports: Vec<UnusedImport>,
243    /// Atoms consumed as data by a relation `FOR EACH` (the edge facts, e.g. each
244    /// `a linked b`). They are read by the quantifier, so the solver must not
245    /// report them as ORPHAN facts even though no clause references them.
246    pub consumed: Vec<AtomId>,
247}
248
249/// An advisory record: a file `IMPORT`s a domain it never references. Such an
250/// import is inert — no `domain.atom` in that file mentions it, so removing it
251/// would not change the result. It is almost always a leftover or a forgotten
252/// `domain.` prefix. **Purely informational** — it never changes the verdict.
253#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
254pub struct UnusedImport {
255    /// The source that declared the unused `IMPORT`.
256    pub file: String,
257    /// The imported domain that is never referenced from `file`.
258    pub domain: String,
259    /// The local alias, if the import used `AS <alias>`.
260    pub alias: Option<String>,
261    /// 1-based line of the `IMPORT` statement in `file`.
262    pub line: u32,
263}
264
265/// Anything that can go wrong while compiling (and resolving imports).
266#[derive(Debug, Error, PartialEq, Eq)]
267pub enum CompileError {
268    /// A source failed to parse; carries the full syntax diagnostics (every
269    /// error, each as a caret block with the keyword's correct syntax). The
270    /// source label is already inside the [`Diagnostics`] header.
271    #[error("{0}")]
272    Parse(elenchus_parser::Diagnostics),
273    /// A name was reused with a different body *within the same source*.
274    #[error("'{name}' redefined with a different body")]
275    PremiseRedefinition {
276        /// The clashing premise/rule name.
277        name: String,
278    },
279    /// A source did not declare its `DOMAIN` (required, once, as the first
280    /// statement).
281    #[error("{file}: missing a DOMAIN declaration (every file must start with `DOMAIN <name>`)")]
282    MissingDomain {
283        /// The source label that lacked a `DOMAIN`.
284        file: String,
285    },
286    /// A source declared `DOMAIN` more than once (a file has exactly one domain).
287    #[error("{file}: more than one DOMAIN declaration (a file has exactly one domain)")]
288    DuplicateDomain {
289        /// The source label with the duplicate `DOMAIN`.
290        file: String,
291    },
292    /// An atom referenced a `domain.` prefix that is not the file's own domain and
293    /// was not imported in this file.
294    #[error("unknown domain '{domain}' — declare it with DOMAIN, or IMPORT it in this file")]
295    UnknownDomain {
296        /// The unresolved domain prefix.
297        domain: String,
298    },
299    /// Two imports bound the same local domain name to different domains (use a
300    /// distinct `AS <alias>` to tell them apart).
301    #[error("domain name '{alias}' is bound to two different imports (disambiguate with AS)")]
302    DomainAliasClash {
303        /// The clashing local domain name.
304        alias: String,
305    },
306    /// An `IMPORT` target could not be loaded by the [`Resolver`].
307    #[error("import not found: {0}")]
308    ImportNotFound(String),
309    /// Imports form a cycle (a source transitively imports itself).
310    #[error("circular import: {0}")]
311    CircularImport(String),
312    /// A `RULE` used `OR` in its `THEN`: forward chaining cannot derive a
313    /// disjunction (it would not know which literal to assert). Model it as a
314    /// `PREMISE` constraint instead.
315    #[error("rule '{name}' cannot derive a disjunction (OR in THEN); use a PREMISE instead")]
316    RuleDisjunctiveConsequent {
317        /// The offending rule name.
318        name: String,
319    },
320    /// A reference used a value outside the closed set an `ONEOF` declared for that
321    /// variable. Almost always a typo: the misspelling would otherwise mint a new
322    /// atom that hangs in the air as UNKNOWN. Closed-world is opt-in — it only
323    /// applies to a `(subject, predicate)` whose values an `ONEOF` enumerated.
324    /// Boxed so this comparatively large payload does not bloat every `Result`.
325    #[error(transparent)]
326    UnknownValue(Box<UnknownValue>),
327    /// A `FOR EACH … IN <set>` named a set that was never declared with `SET`.
328    /// Usually a typo in the set name; the suggestion offers the nearest declared
329    /// set when one is close.
330    #[error("{file}:{line}: FOR EACH ranges over '{set}', which is not a declared SET{suggestion}")]
331    UnknownSet {
332        /// The source the offending `FOR EACH` is in.
333        file: String,
334        /// 1-based line of the `FOR EACH`.
335        line: u32,
336        /// The undeclared set name that was referenced.
337        set: String,
338        /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
339        suggestion: String,
340    },
341    /// `CLOSE <relation> TRANSITIVE` found a cycle: a node transitively reaches
342    /// itself. Transitive closure requires a DAG (e.g. a dependency graph).
343    #[error(
344        "{file}:{line}: relation '{relation}' has a cycle (`{node}` reaches itself) \
345         — CLOSE … TRANSITIVE requires a DAG"
346    )]
347    CyclicRelation {
348        /// The source the `CLOSE` is in.
349        file: String,
350        /// 1-based line of the `CLOSE`.
351        line: u32,
352        /// The relation predicate being closed.
353        relation: String,
354        /// A node on the cycle (reaches itself).
355        node: String,
356    },
357}
358
359/// Details of a closed-world violation (see [`CompileError::UnknownValue`]). Kept
360/// in its own (boxed) struct so the common error path stays small.
361#[derive(Debug, Error, PartialEq, Eq)]
362#[error(
363    "{file}:{line}: '{value}' is not a declared value of '{subject} {predicate}' \
364     — ONEOF declares {{ {declared} }}{suggestion}"
365)]
366pub struct UnknownValue {
367    /// The source the offending reference is in.
368    pub file: String,
369    /// 1-based line of the offending reference.
370    pub line: u32,
371    /// The variable's subject.
372    pub subject: String,
373    /// The variable's predicate.
374    pub predicate: String,
375    /// The out-of-set value that was used.
376    pub value: String,
377    /// The declared legal values, comma-joined (sorted).
378    pub declared: String,
379    /// ` — did you mean \`x\`?`, or empty when nothing is close enough.
380    pub suggestion: String,
381}
382
383// --- raw (key-based) intermediate, before interning ------------------------
384// While accumulating we key everything by `AtomKey` (the owned triple) rather
385// than by `AtomId`, because ids only become stable once *all* sources are merged
386// and the atom set is sorted in `finalize`. These mirror the public IR types but
387// hold keys instead of ids.
388
389/// A literal keyed by atom identity (pre-interning counterpart of [`Lit`]).
390#[derive(Clone)]
391struct RawLit {
392    key: AtomKey,
393    negated: bool,
394}
395
396/// A fact keyed by atom identity (pre-interning counterpart of [`Fact`]).
397struct RawFact {
398    key: AtomKey,
399    value: Value,
400    origin: Origin,
401    soft: bool,
402}
403
404/// A clause keyed by atom identity (pre-interning counterpart of [`Clause`]).
405struct RawClause {
406    lits: Vec<RawLit>,
407    origin: Origin,
408}
409
410/// A rule keyed by atom identity (pre-interning counterpart of [`Rule`]).
411struct RawRule {
412    antecedent: Vec<RawLit>,
413    consequent: Vec<RawLit>,
414    origin: Origin,
415}
416
417// --- compiler --------------------------------------------------------------
418
419/// Accumulates statements from one or more sources, then interns + emits the IR.
420#[derive(Default)]
421pub struct Compiler {
422    keys: BTreeSet<AtomKey>,
423    facts: Vec<RawFact>,
424    clauses: Vec<RawClause>,
425    rules: Vec<RawRule>,
426    checks: Vec<Check>,
427    pending_imports: Vec<String>,
428    /// (source, name) → content hash of its body, for redefinition detection.
429    /// Scoped per source: premise/rule names are labels, not global identifiers,
430    /// so different files (domains) may reuse a name. A clash is only an error
431    /// *within the same source*.
432    defined: BTreeMap<(String, String), String>,
433    /// dedup of identical clauses by canonical content hash.
434    clause_sigs: BTreeSet<String>,
435    /// dedup of identical facts by (key, value).
436    fact_sigs: BTreeSet<String>,
437    /// Closed-world value sets declared by `ONEOF`: `(domain, subject, predicate)`
438    /// → the set of legal objects. Once a variable's values are enumerated by a
439    /// `ONEOF`, a reference to that variable with an object *outside* the set is a
440    /// compile error (a likely typo), not a silent new atom. Only `ONEOF` members
441    /// that carry an object register here (binary atoms have no value slot to
442    /// close). See [`Compiler::validate_closed_world`].
443    oneof_values: BTreeMap<(String, String, String), BTreeSet<String>>,
444    /// Declared `SET <name>` collections: name → elements, used to ground a
445    /// `FOR EACH <binder> IN <name>` quantifier by instantiating the body once
446    /// per element. Populated in a pre-pass so a `FOR EACH` may reference a set
447    /// declared later in the file.
448    sets: BTreeMap<String, Vec<String>>,
449    /// Declared relation pairs: predicate → `(subject, object)` of every 3-part
450    /// `FACT`, used to ground a `FOR EACH <a> <predicate> <b>` quantifier. Also a
451    /// pre-pass, so the edges may be declared after the quantifier.
452    relations: BTreeMap<String, Vec<(String, String)>>,
453    /// Edge atoms consumed by a relation `FOR EACH` (e.g. each `a linked b`).
454    /// They are *read as data* by the quantifier, so they are not idle facts —
455    /// [`Compiler::finalize`] passes them to the report to suppress the ORPHAN
456    /// lint.
457    relation_consumed: BTreeSet<AtomKey>,
458}
459
460impl Compiler {
461    /// A fresh, empty compiler.
462    pub fn new() -> Self {
463        Self::default()
464    }
465
466    /// Parse one source and accumulate its statements. `source` is a label used
467    /// in provenance (e.g. a file name or `"<root>"`). The source must declare its
468    /// `DOMAIN`; `IMPORT`s are recorded as pending (their domains cannot be bound
469    /// without a [`Resolver`]), so a single source may only reference its own
470    /// domain. Use [`compile`] for cross-domain references.
471    pub fn add_source(&mut self, source: &str, src: &str) -> Result<(), CompileError> {
472        let program = parse_tagged(source, src)?;
473        let domain = extract_domain(&program, source)?;
474        let mut aliases = BTreeMap::new();
475        aliases.insert(domain.clone(), domain.clone());
476        let ctx = DomainCtx {
477            current: domain,
478            aliases,
479        };
480        self.collect_decls(&program);
481        self.apply_closures(&program, source)?;
482        for stmt in &program.statements {
483            match stmt {
484                Statement::Domain(_) => {}
485                Statement::Import { path, .. } => {
486                    self.pending_imports.push(path.data.to_string());
487                }
488                other => self.add_statement(source, other, &ctx)?,
489            }
490        }
491        Ok(())
492    }
493
494    /// Apply every `CLOSE … TRANSITIVE`: replace the relation's pairs with their
495    /// transitive closure so a relation `FOR EACH` ranges over reachability, and
496    /// reject a cycle (a node reaching itself). A pure compile-time graph pass —
497    /// the solver never sees it. Runs after [`collect_decls`] (the direct edges
498    /// must be known) and before grounding.
499    fn apply_closures(
500        &mut self,
501        program: &elenchus_parser::Program,
502        source: &str,
503    ) -> Result<(), CompileError> {
504        for stmt in &program.statements {
505            if let Statement::Close { relation, kind } = stmt {
506                let CloseKind::Transitive = kind;
507                let pairs = self
508                    .relations
509                    .get(relation.data)
510                    .cloned()
511                    .unwrap_or_default();
512                let closed = transitive_closure(pairs);
513                if let Some((node, _)) = closed.iter().find(|(a, b)| a == b) {
514                    return Err(CompileError::CyclicRelation {
515                        file: source.to_string(),
516                        line: relation.span.location_line(),
517                        relation: relation.data.to_string(),
518                        node: node.clone(),
519                    });
520                }
521                self.relations.insert(relation.data.to_string(), closed);
522            }
523        }
524        Ok(())
525    }
526
527    /// Pre-pass: record every `SET` and every relation pair (3-part `FACT`) so a
528    /// `FOR EACH` may reference a set or relation declared anywhere in the same
529    /// source, including after the quantifier.
530    fn collect_decls(&mut self, program: &elenchus_parser::Program) {
531        for stmt in &program.statements {
532            match stmt {
533                Statement::Set { name, elements } => {
534                    self.sets.insert(
535                        name.data.to_string(),
536                        elements.iter().map(|e| e.data.to_string()).collect(),
537                    );
538                }
539                Statement::Fact(a) => {
540                    if let Some(obj) = a.data.object {
541                        self.relations
542                            .entry(a.data.predicate.to_string())
543                            .or_default()
544                            .push((a.data.subject.to_string(), obj.to_string()));
545                    }
546                }
547                _ => {}
548            }
549        }
550    }
551
552    /// Compile one already-resolved file's statements under its domain context.
553    fn add_resolved(&mut self, file: &ResolvedFile) -> Result<(), CompileError> {
554        let program = parse_tagged(&file.path, &file.content)?;
555        self.collect_decls(&program);
556        self.apply_closures(&program, &file.path)?;
557        for stmt in &program.statements {
558            match stmt {
559                Statement::Import { .. } | Statement::Domain(_) => {}
560                other => self.add_statement(&file.path, other, &file.ctx)?,
561            }
562        }
563        Ok(())
564    }
565
566    /// Route one statement (never `IMPORT`/`DOMAIN` — handled by the loaders) to
567    /// the right accumulator, resolving atom domains through `ctx`.
568    fn add_statement(
569        &mut self,
570        source: &str,
571        stmt: &Statement,
572        ctx: &DomainCtx,
573    ) -> Result<(), CompileError> {
574        match stmt {
575            // Handled by `add_source` / `load_recursive`, never reach here.
576            Statement::Import { .. } | Statement::Domain(_) => {}
577            Statement::Fact(a) => self.add_fact(source, a, Value::True, kw::FACT, false, ctx)?,
578            Statement::Negation(a) => {
579                self.add_fact(source, a, Value::False, kw::NOT, false, ctx)?
580            }
581            Statement::Assume(l) => {
582                let value = if l.data.negated {
583                    Value::False
584                } else {
585                    Value::True
586                };
587                // A soft assertion shares the FACT accumulator; the atom is the
588                // literal's atom, the polarity its `NOT`, and `soft` marks it
589                // retractable. The span is the whole `ASSUME` line.
590                let located = elenchus_parser::Located {
591                    data: l.data.atom.clone(),
592                    span: l.span,
593                };
594                self.add_fact(source, &located, value, kw::ASSUME, true, ctx)?;
595            }
596            Statement::Check {
597                subject,
598                bidirectional,
599            } => self.checks.push(Check {
600                subject: subject.as_ref().map(|s| s.data.to_string()),
601                bidirectional: *bidirectional,
602            }),
603            // Declared in the `collect_decls` / `apply_closures` pre-passes;
604            // nothing to emit here.
605            Statement::Set { .. } | Statement::Close { .. } => {}
606            Statement::Premise { name, quant, body } => {
607                self.add_named(source, name, quant.as_ref(), body, false, ctx)?;
608            }
609            Statement::Rule { name, quant, body } => {
610                self.add_named(source, name, quant.as_ref(), body, true, ctx)?;
611            }
612        }
613        Ok(())
614    }
615
616    /// Record an atom identity in the shared universe (deduped by the `BTreeSet`).
617    fn intern(&mut self, key: &AtomKey) {
618        if !self.keys.contains(key) {
619            self.keys.insert(key.clone());
620        }
621    }
622
623    /// Accumulate a `FACT`/`NOT`; exact duplicates (same key+value+kind) are
624    /// dropped as idempotent, while a `FACT` and a `NOT` on the same atom are
625    /// both kept so the solver can report the CONFLICT.
626    fn add_fact(
627        &mut self,
628        source: &str,
629        a: &elenchus_parser::Located<Atom>,
630        value: Value,
631        kind: &'static str,
632        soft: bool,
633        ctx: &DomainCtx,
634    ) -> Result<(), CompileError> {
635        let key = ctx.key(&a.data)?;
636        self.intern(&key);
637        let sig = alloc::format!(
638            "{}|{}|{}|{}",
639            key_sig(&key),
640            matches!(value, Value::True) as u8,
641            kind,
642            "" // facts dedup ignores line; identical FACT twice is idempotent
643        );
644        if !self.fact_sigs.insert(sig) {
645            return Ok(()); // exact duplicate fact — idempotent
646        }
647        self.facts.push(RawFact {
648            key,
649            value,
650            origin: Origin {
651                source: source.to_string(),
652                line: a.span.location_line(),
653                premise: None,
654                kind,
655            },
656            soft,
657        });
658        Ok(())
659    }
660
661    /// Handle a named construct (`PREMISE` or `RULE`). `is_rule` selects derivation
662    /// vs checking. Returns an error on redefinition with a different body.
663    fn add_named(
664        &mut self,
665        source: &str,
666        name: &Located<&str>,
667        quant: Option<&Quant>,
668        body: &Body,
669        is_rule: bool,
670        ctx: &DomainCtx,
671    ) -> Result<(), CompileError> {
672        let line = name.span.location_line();
673        let name = name.data;
674        // The redefinition hash covers the quantifier too, so two same-named
675        // premises that differ only in their `FOR EACH` are still a redefinition.
676        let mut canon = canonical_body(name, body, is_rule, ctx)?;
677        if let Some(q) = quant {
678            canon.push_str(&quant_sig(q));
679        }
680        let body_hash = hash_hex(canon.as_bytes());
681        let key = (source.to_string(), name.to_string());
682        match self.defined.get(&key) {
683            Some(prev) if *prev == body_hash => return Ok(()), // identical → idempotent
684            Some(_) => {
685                // Same name + different body *in the same source* — a real mistake.
686                return Err(CompileError::PremiseRedefinition {
687                    name: name.to_string(),
688                });
689            }
690            None => {
691                self.defined.insert(key, body_hash);
692            }
693        }
694
695        match quant {
696            // Unquantified: emit the body once, as before.
697            None => self.emit_named(source, name, line, body, is_rule, ctx),
698            // `FOR EACH <binder> IN <set>`: instantiate the body once per element,
699            // substituting the binder. Grounding is exactly `|set|` repetitions of
700            // the *same* desugar — linear, never a domain product (a second binder
701            // is unrepresentable in the grammar).
702            Some(Quant::InSet { binder, set }) => {
703                let elements = match self.sets.get(set.data) {
704                    Some(els) => els.clone(),
705                    None => {
706                        return Err(CompileError::UnknownSet {
707                            file: source.to_string(),
708                            line: set.span.location_line(),
709                            set: set.data.to_string(),
710                            suggestion: nearest_set_suggestion(set.data, &self.sets),
711                        });
712                    }
713                };
714                for el in &elements {
715                    let grounded = subst_body(body, &[(binder.data, el)]);
716                    self.emit_named(source, name, line, &grounded, is_rule, ctx)?;
717                }
718                Ok(())
719            }
720            // `FOR EACH <a> <relation> <b>`: instantiate the body once per declared
721            // FACT pair of that relation, binding `a`→subject, `b`→object. The pair
722            // is pinned by data, so this is linear in the number of facts — never a
723            // product of the domain with itself.
724            Some(Quant::Relation {
725                left,
726                predicate,
727                right,
728            }) => {
729                let pairs = self
730                    .relations
731                    .get(predicate.data)
732                    .cloned()
733                    .unwrap_or_default();
734                for (subj, obj) in &pairs {
735                    let grounded = subst_body(body, &[(left.data, subj), (right.data, obj)]);
736                    self.emit_named(source, name, line, &grounded, is_rule, ctx)?;
737                    // The edge atom is read as data by the quantifier, not idle —
738                    // record it so the ORPHAN lint does not flag it.
739                    if let Ok(k) = ctx.key(&Atom {
740                        domain: None,
741                        subject: subj,
742                        predicate: predicate.data,
743                        object: Some(obj),
744                    }) {
745                        self.relation_consumed.insert(k);
746                    }
747                }
748                Ok(())
749            }
750        }
751    }
752
753    /// Emit the clauses/rule for one (already-grounded) named construct's body.
754    /// Split out of [`Compiler::add_named`] so a `FOR EACH` can call it once per
755    /// element with the binder substituted, reusing the exact same desugar.
756    fn emit_named(
757        &mut self,
758        source: &str,
759        name: &str,
760        line: u32,
761        body: &Body,
762        is_rule: bool,
763        ctx: &DomainCtx,
764    ) -> Result<(), CompileError> {
765        if is_rule {
766            // RULE always has an implication body (guaranteed by the grammar).
767            if let Body::Impl {
768                antecedent,
769                ante_conn,
770                consequent,
771                cons_conn,
772            } = body
773            {
774                // A rule *derives* its consequent; an `OR` consequent is not a
775                // single fact to assert, so reject it (use a PREMISE instead).
776                if *cons_conn == Conn::Or {
777                    return Err(CompileError::RuleDisjunctiveConsequent {
778                        name: name.to_string(),
779                    });
780                }
781                let (ante, cons) = (raw_lits(antecedent, ctx)?, raw_lits(consequent, ctx)?);
782                for l in ante.iter().chain(cons.iter()) {
783                    self.intern(&l.key);
784                }
785                let origin = self.origin(source, line, Some(name), kw::RULE);
786                match ante_conn {
787                    // a ∧ b → C : one rule firing on the whole antecedent.
788                    Conn::And => self.rules.push(RawRule {
789                        antecedent: ante,
790                        consequent: cons,
791                        origin,
792                    }),
793                    // (a ∨ b) → C == (a → C) ∧ (b → C): one rule per antecedent.
794                    Conn::Or => {
795                        for a in &ante {
796                            self.rules.push(RawRule {
797                                antecedent: vec![a.clone()],
798                                consequent: cons.clone(),
799                                origin: origin.clone(),
800                            });
801                        }
802                    }
803                }
804            }
805            return Ok(());
806        }
807
808        match body {
809            Body::List { op, atoms } => {
810                let keys: Vec<AtomKey> = atoms
811                    .iter()
812                    .map(|a| ctx.key(&a.data))
813                    .collect::<Result<_, _>>()?;
814                for k in &keys {
815                    self.intern(k);
816                }
817                let kind = list_kind(*op);
818                let origin = self.origin(source, line, Some(name), kind);
819                match op {
820                    // EXCLUSIVE / FORBIDS: "at most one" → pairwise Impossible([a_i, a_j]).
821                    ListOp::Exclusive | ListOp::Forbids => {
822                        self.emit_pairwise(&keys, &origin);
823                    }
824                    // ONEOF: pairwise (at most one) + at-least-one. A ONEOF also
825                    // *closes* each of its variables: record every member's object
826                    // as a legal value of `(domain, subject, predicate)` so a later
827                    // out-of-set reference is caught as a typo (closed-world).
828                    ListOp::OneOf => {
829                        self.emit_pairwise(&keys, &origin);
830                        self.emit_at_least_one(&keys, &origin);
831                        for k in &keys {
832                            if let Some(obj) = &k.object {
833                                self.oneof_values
834                                    .entry((
835                                        k.domain.clone(),
836                                        k.subject.clone(),
837                                        k.predicate.clone(),
838                                    ))
839                                    .or_default()
840                                    .insert(obj.clone());
841                            }
842                        }
843                    }
844                    // ATLEAST: Impossible([NOT a_1, …, NOT a_n]).
845                    ListOp::AtLeast => {
846                        self.emit_at_least_one(&keys, &origin);
847                    }
848                }
849            }
850            Body::Impl {
851                antecedent,
852                ante_conn,
853                consequent,
854                cons_conn,
855            } => {
856                // Implication A → C as `Impossible(A_true ∧ ¬C)`. We group each
857                // side by its connective and emit one clause per (ante × cons)
858                // group pair — a uniform rule covering all AND/OR combinations:
859                //   AND-ante → all its literals share every clause;
860                //   OR-ante  → one clause per literal;
861                //   AND-cons → one clause per (negated) literal;
862                //   OR-cons  → all its (negated) literals share every clause.
863                let ante = raw_lits(antecedent, ctx)?;
864                let cons = raw_lits(consequent, ctx)?;
865                for l in ante.iter().chain(cons.iter()) {
866                    self.intern(&l.key);
867                }
868                let origin = self.origin(source, line, Some(name), kw::PREMISE);
869
870                let ante_groups: Vec<Vec<RawLit>> = match ante_conn {
871                    Conn::And => vec![ante.clone()],
872                    Conn::Or => ante.iter().map(|l| vec![l.clone()]).collect(),
873                };
874                let cons_groups: Vec<Vec<RawLit>> = match cons_conn {
875                    Conn::And => cons.iter().map(|l| vec![l.clone()]).collect(),
876                    Conn::Or => vec![cons.clone()],
877                };
878                for ag in &ante_groups {
879                    for cg in &cons_groups {
880                        let mut lits = ag.clone();
881                        for c in cg {
882                            lits.push(RawLit {
883                                key: c.key.clone(),
884                                negated: !c.negated,
885                            });
886                        }
887                        self.push_clause(lits, origin.clone());
888                    }
889                }
890            }
891        }
892        Ok(())
893    }
894
895    /// Emit "at most one TRUE" as one `Impossible([a_i, a_j])` per unordered
896    /// pair. Pairwise (not a single big clause) because `Impossible([a,b,c])`
897    /// only forbids *all three* together — it would still allow two.
898    fn emit_pairwise(&mut self, keys: &[AtomKey], origin: &Origin) {
899        for i in 0..keys.len() {
900            for j in (i + 1)..keys.len() {
901                let lits = vec![
902                    RawLit {
903                        key: keys[i].clone(),
904                        negated: false,
905                    },
906                    RawLit {
907                        key: keys[j].clone(),
908                        negated: false,
909                    },
910                ];
911                self.push_clause(lits, origin.clone());
912            }
913        }
914    }
915
916    /// Emit "at least one TRUE" as a single `Impossible([NOT a_1, …, NOT a_n])`
917    /// — it is impossible for all of them to be false at once.
918    fn emit_at_least_one(&mut self, keys: &[AtomKey], origin: &Origin) {
919        let lits = keys
920            .iter()
921            .map(|k| RawLit {
922                key: k.clone(),
923                negated: true,
924            })
925            .collect();
926        self.push_clause(lits, origin.clone());
927    }
928
929    /// Append a clause unless an identical one (by canonical [`clause_sig`]) is
930    /// already present — `P ∧ P ≡ P`, so dedup keeps the IR minimal.
931    fn push_clause(&mut self, lits: Vec<RawLit>, origin: Origin) {
932        let sig = clause_sig(&lits);
933        if self.clause_sigs.insert(sig) {
934            self.clauses.push(RawClause { lits, origin });
935        }
936        // else: identical clause already present — idempotent.
937    }
938
939    /// Build an [`Origin`] for provenance from the current source/line/name.
940    fn origin(&self, source: &str, line: u32, premise: Option<&str>, kind: &'static str) -> Origin {
941        Origin {
942            source: source.to_string(),
943            line,
944            premise: premise.map(|s| s.to_string()),
945            kind,
946        }
947    }
948
949    /// Closed-world check: once an `ONEOF` enumerates a variable's values, any
950    /// reference to that `(domain, subject, predicate)` with an object outside the
951    /// declared set is rejected as a likely typo — instead of silently minting a
952    /// new atom that then "hangs in the air" as an UNKNOWN. Reports the earliest
953    /// (by source, line) offender, with a `did you mean` suggestion when a declared
954    /// value is within edit distance. Must run after all sources are accumulated
955    /// (a `ONEOF` may follow the reference) and before [`finalize`].
956    fn validate_closed_world(&self) -> Result<(), CompileError> {
957        if self.oneof_values.is_empty() {
958            return Ok(());
959        }
960        // Every atom reference reachable from a fact, clause, or rule, with the
961        // line it came from. ONEOF members appear here too (as clause literals) but
962        // are in-set by construction, so they never trip the check. `out_of_set`
963        // tests one key against its variable's declared values.
964        let out_of_set = |key: &AtomKey| -> bool {
965            key.object.as_ref().is_some_and(|obj| {
966                self.oneof_values
967                    .get(&(
968                        key.domain.clone(),
969                        key.subject.clone(),
970                        key.predicate.clone(),
971                    ))
972                    .is_some_and(|set| !set.contains(obj))
973            })
974        };
975        let mut offenders: Vec<(&str, u32, &AtomKey)> = Vec::new();
976        for f in &self.facts {
977            if out_of_set(&f.key) {
978                offenders.push((&f.origin.source, f.origin.line, &f.key));
979            }
980        }
981        for c in &self.clauses {
982            for l in &c.lits {
983                if out_of_set(&l.key) {
984                    offenders.push((&c.origin.source, c.origin.line, &l.key));
985                }
986            }
987        }
988        for r in &self.rules {
989            for l in r.antecedent.iter().chain(r.consequent.iter()) {
990                if out_of_set(&l.key) {
991                    offenders.push((&r.origin.source, r.origin.line, &l.key));
992                }
993            }
994        }
995        // Earliest offender wins, for a stable, source-ordered diagnostic.
996        let Some(&(source, line, key)) = offenders.iter().min_by(|a, b| {
997            (a.0, a.1, &a.2.subject, &a.2.object).cmp(&(b.0, b.1, &b.2.subject, &b.2.object))
998        }) else {
999            return Ok(());
1000        };
1001        let set = &self.oneof_values[&(
1002            key.domain.clone(),
1003            key.subject.clone(),
1004            key.predicate.clone(),
1005        )];
1006        let declared: Vec<&str> = set.iter().map(|s| s.as_str()).collect(); // BTreeSet → sorted
1007        let value = key.object.clone().unwrap_or_default();
1008        let suggestion = did_you_mean(&value, &declared);
1009        Err(CompileError::UnknownValue(Box::new(UnknownValue {
1010            file: source.to_string(),
1011            line,
1012            subject: key.subject.clone(),
1013            predicate: key.predicate.clone(),
1014            value,
1015            declared: declared.join(", "),
1016            suggestion,
1017        })))
1018    }
1019
1020    /// Intern all atoms (canonical sort), then lower the raw IR to ids.
1021    pub fn finalize(self) -> Compiled {
1022        let atoms: Vec<AtomKey> = self.keys.into_iter().collect(); // BTreeSet → sorted
1023        let mut id_of: BTreeMap<AtomKey, AtomId> = BTreeMap::new();
1024        for (i, k) in atoms.iter().enumerate() {
1025            id_of.insert(k.clone(), i as AtomId);
1026        }
1027        let lower = |l: &RawLit| Lit {
1028            atom: id_of[&l.key],
1029            negated: l.negated,
1030        };
1031
1032        let facts = self
1033            .facts
1034            .into_iter()
1035            .map(|f| Fact {
1036                atom: id_of[&f.key],
1037                value: f.value,
1038                origin: f.origin,
1039                soft: f.soft,
1040            })
1041            .collect();
1042        let clauses = self
1043            .clauses
1044            .into_iter()
1045            .map(|c| Clause {
1046                lits: c.lits.iter().map(lower).collect(),
1047                origin: c.origin,
1048            })
1049            .collect();
1050        let rules = self
1051            .rules
1052            .into_iter()
1053            .map(|r| Rule {
1054                antecedent: r.antecedent.iter().map(lower).collect(),
1055                consequent: r.consequent.iter().map(lower).collect(),
1056                origin: r.origin,
1057            })
1058            .collect();
1059
1060        let consumed = self
1061            .relation_consumed
1062            .iter()
1063            .filter_map(|k| id_of.get(k).copied())
1064            .collect();
1065
1066        Compiled {
1067            atoms,
1068            facts,
1069            clauses,
1070            rules,
1071            checks: self.checks,
1072            pending_imports: self.pending_imports,
1073            unused_imports: Vec::new(), // filled by `compile` (advisory, post-resolution)
1074            consumed,
1075        }
1076    }
1077}
1078
1079/// Convenience: compile a single source into the IR. `IMPORT`s are recorded as
1080/// pending, not resolved (use [`compile`] with a [`Resolver`] to resolve them).
1081pub fn compile_source(source: &str, src: &str) -> Result<Compiled, CompileError> {
1082    let mut c = Compiler::new();
1083    c.add_source(source, src)?;
1084    c.validate_closed_world()?;
1085    Ok(c.finalize())
1086}
1087
1088// --- import resolution (source-agnostic) -----------------------------------
1089
1090/// Resolves `IMPORT "path"` to source text. The engine is source-agnostic: it
1091/// consumes strings, so a file is merely one backing store. Mirrors
1092/// vsm-grammar's `SourceResolver`.
1093pub trait Resolver {
1094    /// Load the raw source text for a resolved path.
1095    fn load(&self, path: &str) -> Result<String, CompileError>;
1096
1097    /// Normalize `relative` against the importing source `base`.
1098    /// Default: paths are absolute names, returned unchanged.
1099    fn resolve(&self, _base: &str, relative: &str) -> String {
1100        relative.to_string()
1101    }
1102}
1103
1104/// An in-memory resolver: serves sources from a name → content map. Pure
1105/// `no_std`. Mirrors vsm-grammar's `MemoryResolver`.
1106#[derive(Default)]
1107pub struct MemoryResolver {
1108    sources: BTreeMap<String, String>,
1109}
1110
1111impl MemoryResolver {
1112    /// An empty resolver with no sources.
1113    pub fn new() -> Self {
1114        Self::default()
1115    }
1116
1117    /// Register `content` under `path`; returns `&mut self` for chaining.
1118    pub fn add(&mut self, path: &str, content: &str) -> &mut Self {
1119        self.sources.insert(path.to_string(), content.to_string());
1120        self
1121    }
1122}
1123
1124impl Resolver for MemoryResolver {
1125    fn load(&self, path: &str) -> Result<String, CompileError> {
1126        self.sources
1127            .get(path)
1128            .cloned()
1129            .ok_or_else(|| CompileError::ImportNotFound(path.to_string()))
1130    }
1131}
1132
1133/// A filesystem-backed resolver. Mirrors vsm-grammar's `FileResolver`:
1134/// relative imports resolve against the importing file's directory, with manual
1135/// `..` normalization (no canonicalization, to keep a virtual layout).
1136#[cfg(feature = "std")]
1137pub struct FileResolver;
1138
1139#[cfg(feature = "std")]
1140impl Resolver for FileResolver {
1141    fn load(&self, path: &str) -> Result<String, CompileError> {
1142        std::fs::read_to_string(path)
1143            .map_err(|e| CompileError::ImportNotFound(alloc::format!("{}: {}", path, e)))
1144    }
1145
1146    fn resolve(&self, base: &str, relative: &str) -> String {
1147        use std::path::{Component, Path, PathBuf};
1148        let parent = Path::new(base).parent().unwrap_or_else(|| Path::new("."));
1149        let joined = parent.join(relative);
1150        let mut out = PathBuf::new();
1151        for component in joined.components() {
1152            match component {
1153                Component::ParentDir => {
1154                    out.pop();
1155                }
1156                Component::CurDir => {}
1157                c => out.push(c),
1158            }
1159        }
1160        // Normalize to forward slashes so resolved paths (and therefore the
1161        // provenance recorded in the IR) are identical on Windows and Unix.
1162        // Windows `std::fs` accepts `/` just fine.
1163        out.to_string_lossy().replace('\\', "/")
1164    }
1165}
1166
1167/// One resolved source ready to compile: its provenance path, raw text, and the
1168/// domain context (own domain + import-alias bindings) its atoms resolve against.
1169struct ResolvedFile {
1170    path: String,
1171    content: String,
1172    ctx: DomainCtx,
1173}
1174
1175/// Compile a root source and all its transitive `IMPORT`s into one IR.
1176///
1177/// Each file is keyed by `DOMAIN`; atoms unify only within a domain. Imports are
1178/// referenced by `<domain>.<atom>` and visibility is file-local (naming is not
1179/// transitive, though a dependency's clauses still participate). Sources are
1180/// content-addressed (sha256): a source reached by several paths is compiled once
1181/// (so a diamond — or an exponential fan-out — stays linear, never blowing up),
1182/// and an import cycle is an error.
1183///
1184/// Resolution is **iterative** (an explicit work stack, not native recursion), so
1185/// an arbitrarily deep import chain cannot overflow the call stack.
1186///
1187/// Premise/rule names are per-source labels, not global identifiers: different
1188/// files may reuse a name, and the report qualifies them by source. A name reused
1189/// with a different body is an error only *within the same source*.
1190pub fn compile<R: Resolver>(root: &str, resolver: &R) -> Result<Compiled, CompileError> {
1191    let (files, unused_imports) = resolve_graph(root, resolver)?;
1192    let mut c = Compiler::new();
1193    for file in &files {
1194        c.add_resolved(file)?;
1195    }
1196    c.validate_closed_world()?;
1197    let mut compiled = c.finalize();
1198    compiled.unused_imports = unused_imports;
1199    Ok(compiled)
1200}
1201
1202/// One `IMPORT` edge: the optional local alias, the resolved child path, and the
1203/// `IMPORT` line (for the unused-import advisory).
1204struct ImportEdge {
1205    alias: Option<String>,
1206    child_path: String,
1207    line: u32,
1208}
1209
1210/// A discovered source during graph resolution: its first-seen path, raw text,
1211/// declared domain, import edges, and the set of domain prefixes its atoms use
1212/// (`None` = its own domain; `Some(p)` = a `p.` prefix) — used to flag imports
1213/// that the file never references.
1214struct DiscoveredFile {
1215    path: String,
1216    content: String,
1217    domain: String,
1218    edges: Vec<ImportEdge>,
1219    used_prefixes: BTreeSet<Option<String>>,
1220}
1221
1222/// Resolve the whole import graph reachable from `root` into a flat list of
1223/// [`ResolvedFile`]s, each distinct source appearing once.
1224///
1225/// Iterative depth-first traversal with an explicit work stack (`Enter`/`Exit`
1226/// frames) — no native recursion, so depth is unbounded without risking a stack
1227/// overflow. Memoized by content hash (a diamond/repeat is visited once); a hash
1228/// re-encountered while still on the active path is a [`CompileError::CircularImport`].
1229fn resolve_graph<R: Resolver>(
1230    root: &str,
1231    resolver: &R,
1232) -> Result<(Vec<ResolvedFile>, Vec<UnusedImport>), CompileError> {
1233    /// One unit of pending work on the traversal stack.
1234    enum Step {
1235        /// Visit a file at this resolved path (load, parse, enqueue its imports).
1236        Enter(String),
1237        /// Mark this content hash finished (pop it off the active path).
1238        Exit(String),
1239    }
1240
1241    let mut discovered: BTreeMap<String, DiscoveredFile> = BTreeMap::new(); // by hash
1242    let mut path_hash: BTreeMap<String, String> = BTreeMap::new(); // resolved path → hash
1243    let mut order: Vec<String> = Vec::new(); // finish order, by hash
1244    let mut active: BTreeSet<String> = BTreeSet::new(); // hashes on the current DFS path
1245    let mut work: Vec<Step> = vec![Step::Enter(root.to_string())];
1246
1247    while let Some(step) = work.pop() {
1248        match step {
1249            Step::Exit(hash) => {
1250                active.remove(&hash);
1251                order.push(hash);
1252            }
1253            Step::Enter(path) => {
1254                let content = resolver.load(&path)?;
1255                let hash = hash_hex(content.as_bytes());
1256                path_hash.insert(path.clone(), hash.clone());
1257                if active.contains(&hash) {
1258                    return Err(CompileError::CircularImport(path)); // back-edge to an ancestor
1259                }
1260                if discovered.contains_key(&hash) {
1261                    continue; // already fully resolved by another path — dedup
1262                }
1263                let program = parse_tagged(&path, &content)?;
1264                let domain = extract_domain(&program, &path)?;
1265                let mut edges = Vec::new();
1266                let mut used_prefixes = BTreeSet::new();
1267                for stmt in &program.statements {
1268                    if let Statement::Import { path: p, alias } = stmt {
1269                        edges.push(ImportEdge {
1270                            alias: alias.as_ref().map(|a| a.data.to_string()),
1271                            child_path: resolver.resolve(&path, p.data),
1272                            line: p.span.location_line(),
1273                        });
1274                    } else {
1275                        collect_prefixes(stmt, &mut used_prefixes);
1276                    }
1277                }
1278                drop(program); // release the borrow on `content` before moving it
1279                active.insert(hash.clone());
1280                work.push(Step::Exit(hash.clone()));
1281                for e in edges.iter().rev() {
1282                    work.push(Step::Enter(e.child_path.clone()));
1283                }
1284                discovered.insert(
1285                    hash,
1286                    DiscoveredFile {
1287                        path,
1288                        content,
1289                        domain,
1290                        edges,
1291                        used_prefixes,
1292                    },
1293                );
1294            }
1295        }
1296    }
1297
1298    // Build each file's domain context now that every domain is known.
1299    // Look up every file's domain (small strings) so we can then *move* each
1300    // file's (potentially large) content out of `discovered` instead of cloning.
1301    let domain_of: BTreeMap<&str, &str> = discovered
1302        .iter()
1303        .map(|(h, f)| (h.as_str(), f.domain.as_str()))
1304        .collect();
1305
1306    let mut out = Vec::with_capacity(order.len());
1307    let mut unused: Vec<UnusedImport> = Vec::new();
1308    for hash in &order {
1309        let file = &discovered[hash];
1310        let mut aliases = BTreeMap::new();
1311        aliases.insert(file.domain.clone(), file.domain.clone());
1312        for edge in &file.edges {
1313            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
1314            let bind = edge
1315                .alias
1316                .clone()
1317                .unwrap_or_else(|| child_domain.to_string());
1318            match aliases.get(&bind) {
1319                Some(existing) if existing != child_domain => {
1320                    return Err(CompileError::DomainAliasClash { alias: bind });
1321                }
1322                _ => {
1323                    aliases.insert(bind, child_domain.to_string());
1324                }
1325            }
1326        }
1327
1328        // The domains this file actually references (each used prefix resolved
1329        // against its own domain / imports). An imported domain absent from this
1330        // set is an unused import.
1331        let referenced: BTreeSet<&str> = file
1332            .used_prefixes
1333            .iter()
1334            .filter_map(|p| match p {
1335                None => Some(file.domain.as_str()),
1336                Some(name) => aliases.get(name).map(|d| d.as_str()),
1337            })
1338            .collect();
1339        for edge in &file.edges {
1340            let child_domain = domain_of[path_hash[&edge.child_path].as_str()];
1341            if !referenced.contains(child_domain) {
1342                unused.push(UnusedImport {
1343                    file: file.path.clone(),
1344                    domain: child_domain.to_string(),
1345                    alias: edge.alias.clone(),
1346                    line: edge.line,
1347                });
1348            }
1349        }
1350
1351        let ctx = DomainCtx {
1352            current: file.domain.clone(),
1353            aliases,
1354        };
1355        out.push((hash.clone(), ctx));
1356    }
1357    unused.sort();
1358
1359    // Now move content/path out of `discovered` (no large clones) and pair with
1360    // the contexts built above.
1361    let files = out
1362        .into_iter()
1363        .map(|(hash, ctx)| {
1364            let file = discovered.remove(&hash).expect("hash was discovered");
1365            ResolvedFile {
1366                path: file.path,
1367                content: file.content,
1368                ctx,
1369            }
1370        })
1371        .collect();
1372    Ok((files, unused))
1373}
1374
1375/// A canonical signature of a `FOR EACH` quantifier, appended to the body hash so
1376/// two same-named premises that differ only in their quantifier still count as a
1377/// redefinition.
1378fn quant_sig(q: &Quant) -> String {
1379    match q {
1380        Quant::InSet { binder, set } => alloc::format!("|FOREACH {} IN {}", binder.data, set.data),
1381        Quant::Relation {
1382            left,
1383            predicate,
1384            right,
1385        } => alloc::format!("|FOREACH {} {} {}", left.data, predicate.data, right.data),
1386    }
1387}
1388
1389/// Parse one source, tagging any syntax [`Diagnostics`] with its file label so a
1390/// `CompileError::Parse` names the right file. The single spelling of "parse, and
1391/// on failure attach the file" — shared by the inline, resolved, and import paths.
1392fn parse_tagged<'a>(
1393    file: &str,
1394    content: &'a str,
1395) -> Result<elenchus_parser::Program<'a>, CompileError> {
1396    elenchus_parser::parse(content).map_err(|mut diag| {
1397        diag.set_file(file);
1398        CompileError::Parse(diag)
1399    })
1400}
1401
1402/// `" — did you mean \`x\`?"` for an undeclared set name, or empty when no
1403/// declared set name is close enough.
1404fn nearest_set_suggestion(set: &str, sets: &BTreeMap<String, Vec<String>>) -> String {
1405    let names: Vec<&str> = sets.keys().map(String::as_str).collect();
1406    did_you_mean(set, &names)
1407}
1408
1409/// A list of binder substitutions `(name, value)` applied during grounding: one
1410/// entry for an `IN <set>` quantifier, two for a `<a> <rel> <b>` relation.
1411type Subs<'s> = [(&'s str, &'s str)];
1412
1413/// Replace any binder with its value in one identifier; non-matching pass through.
1414fn subst_ident<'s>(s: &'s str, subs: &Subs<'s>) -> &'s str {
1415    subs.iter()
1416        .find_map(|&(b, v)| (s == b).then_some(v))
1417        .unwrap_or(s)
1418}
1419
1420/// Replace the binders in an atom (subject, predicate, and object positions).
1421fn subst_atom<'s>(a: &Atom<'s>, subs: &Subs<'s>) -> Atom<'s> {
1422    Atom {
1423        domain: a.domain,
1424        subject: subst_ident(a.subject, subs),
1425        predicate: subst_ident(a.predicate, subs),
1426        object: a.object.map(|o| subst_ident(o, subs)),
1427    }
1428}
1429
1430/// Replace the binders in one located literal (preserving its span and `NOT`).
1431fn subst_lit<'s>(ll: &Located<'s, Literal<'s>>, subs: &Subs<'s>) -> Located<'s, Literal<'s>> {
1432    Located {
1433        data: Literal {
1434            negated: ll.data.negated,
1435            atom: subst_atom(&ll.data.atom, subs),
1436        },
1437        span: ll.span,
1438    }
1439}
1440
1441/// Build a grounded copy of a body with the `FOR EACH` binders substituted.
1442/// Spans are preserved so any error still points at the original source line. The
1443/// result borrows from the original body and from the substitution values, so it
1444/// is consumed immediately (its keys interned) and never stored.
1445fn subst_body<'s>(body: &Body<'s>, subs: &Subs<'s>) -> Body<'s> {
1446    match body {
1447        Body::List { op, atoms } => Body::List {
1448            op: *op,
1449            atoms: atoms
1450                .iter()
1451                .map(|la| Located {
1452                    data: subst_atom(&la.data, subs),
1453                    span: la.span,
1454                })
1455                .collect(),
1456        },
1457        Body::Impl {
1458            antecedent,
1459            ante_conn,
1460            consequent,
1461            cons_conn,
1462        } => Body::Impl {
1463            antecedent: antecedent.iter().map(|l| subst_lit(l, subs)).collect(),
1464            ante_conn: *ante_conn,
1465            consequent: consequent.iter().map(|l| subst_lit(l, subs)).collect(),
1466            cons_conn: *cons_conn,
1467        },
1468    }
1469}
1470
1471/// Collect the domain prefixes used by a statement's atoms into `out` (`None` for
1472/// a bare atom, `Some(p)` for a `p.`-qualified one) — feeds the unused-import lint.
1473fn collect_prefixes(stmt: &Statement, out: &mut BTreeSet<Option<String>>) {
1474    let mut add = |a: &Atom| {
1475        out.insert(a.domain.map(|d| d.to_string()));
1476    };
1477    match stmt {
1478        Statement::Fact(a) | Statement::Negation(a) => add(&a.data),
1479        Statement::Assume(l) => add(&l.data.atom),
1480        Statement::Premise { body, .. } | Statement::Rule { body, .. } => match body {
1481            Body::List { atoms, .. } => atoms.iter().for_each(|a| add(&a.data)),
1482            Body::Impl {
1483                antecedent,
1484                consequent,
1485                ..
1486            } => antecedent
1487                .iter()
1488                .chain(consequent)
1489                .for_each(|l| add(&l.data.atom)),
1490        },
1491        Statement::Domain(_)
1492        | Statement::Import { .. }
1493        | Statement::Check { .. }
1494        | Statement::Set { .. }
1495        | Statement::Close { .. } => {}
1496    }
1497}
1498
1499/// The transitive closure of a relation's `(from, to)` pairs: add `(a, c)`
1500/// whenever `(a, b)` and `(b, c)` are present, to a fixpoint. A self-pair
1501/// `(x, x)` in the result marks a cycle. A small compile-time graph op.
1502fn transitive_closure(pairs: Vec<(String, String)>) -> Vec<(String, String)> {
1503    let mut set: BTreeSet<(String, String)> = pairs.into_iter().collect();
1504    loop {
1505        let mut added: Vec<(String, String)> = Vec::new();
1506        for (a, b) in &set {
1507            for (c, d) in &set {
1508                if b == c {
1509                    let p = (a.clone(), d.clone());
1510                    if !set.contains(&p) {
1511                        added.push(p);
1512                    }
1513                }
1514            }
1515        }
1516        if added.is_empty() {
1517            break;
1518        }
1519        set.extend(added);
1520    }
1521    set.into_iter().collect()
1522}
1523
1524/// The single `DOMAIN` a source declares, or an error if it has none or several.
1525fn extract_domain(
1526    program: &elenchus_parser::Program,
1527    source: &str,
1528) -> Result<String, CompileError> {
1529    let mut found: Option<String> = None;
1530    for stmt in &program.statements {
1531        if let Statement::Domain(name) = stmt {
1532            if found.is_some() {
1533                return Err(CompileError::DuplicateDomain {
1534                    file: source.to_string(),
1535                });
1536            }
1537            found = Some(name.data.to_string());
1538        }
1539    }
1540    found.ok_or_else(|| CompileError::MissingDomain {
1541        file: source.to_string(),
1542    })
1543}
1544
1545// --- helpers ---------------------------------------------------------------
1546
1547/// Levenshtein edit distance over Unicode scalars (rolling two-row DP). Small
1548/// inputs (atom/value names), so the simple DP is plenty. The one edit-distance
1549/// implementation in the workspace: the compiler's "did you mean" suggestions
1550/// (via [`nearest`]) and the solver's typo-hint lint both build on it.
1551pub fn levenshtein(a: &[char], b: &[char]) -> usize {
1552    let mut prev: Vec<usize> = (0..=b.len()).collect();
1553    let mut cur = vec![0usize; b.len() + 1];
1554    for (i, &ca) in a.iter().enumerate() {
1555        cur[0] = i + 1;
1556        for (j, &cb) in b.iter().enumerate() {
1557            let cost = usize::from(ca != cb);
1558            cur[j + 1] = (prev[j + 1] + 1).min(cur[j] + 1).min(prev[j] + cost);
1559        }
1560        core::mem::swap(&mut prev, &mut cur);
1561    }
1562    prev[b.len()]
1563}
1564
1565/// The closest candidate to `word` within an edit-distance threshold, or `None`
1566/// when nothing is close enough to be a useful "did you mean".
1567///
1568/// The threshold scales with length (`len / 3`, in Unicode scalars) and is **not**
1569/// floored at 1: a value of 1–2 characters yields a budget of 0, so no suggestion
1570/// is offered. This is deliberate — for very short tokens (a single CJK character,
1571/// where one symbol is a whole word, or a two-letter code) every other short value
1572/// sits at distance 1, so a "did you mean" there is pure noise, not a typo cue. The
1573/// rejection itself is exact (set membership), so suppressing the guess never hides
1574/// a real error; it only withholds a meaningless one. Longer names tolerate a slip
1575/// or two, mirroring the spirit of the solver's typo-hint lint.
1576fn nearest<'a>(word: &str, candidates: &[&'a str]) -> Option<&'a str> {
1577    let budget = word.chars().count() / 3;
1578    if budget == 0 {
1579        return None;
1580    }
1581    let w: Vec<char> = word.chars().collect();
1582    candidates
1583        .iter()
1584        .map(|&c| (levenshtein(&w, &c.chars().collect::<Vec<char>>()), c))
1585        .filter(|&(d, _)| d <= budget)
1586        .min_by_key(|&(d, _)| d)
1587        .map(|(_, c)| c)
1588}
1589
1590/// `" — did you mean `x`?"` for the nearest candidate to `word`, or empty when
1591/// none is close enough. The single spelling of the suggestion suffix, shared by
1592/// every "unknown name" diagnostic (values, sets, …).
1593fn did_you_mean(word: &str, candidates: &[&str]) -> String {
1594    match nearest(word, candidates) {
1595        Some(s) => alloc::format!(" — did you mean `{s}`?"),
1596        None => String::new(),
1597    }
1598}
1599
1600/// Lower parsed, located literals to key-based [`RawLit`]s (drops spans),
1601/// resolving each atom's domain through `ctx`.
1602fn raw_lits(
1603    lits: &[elenchus_parser::Located<Literal>],
1604    ctx: &DomainCtx,
1605) -> Result<Vec<RawLit>, CompileError> {
1606    lits.iter()
1607        .map(|l| {
1608            Ok(RawLit {
1609                key: ctx.key(&l.data.atom)?,
1610                negated: l.data.negated,
1611            })
1612        })
1613        .collect()
1614}
1615
1616/// The surface keyword for a list op, used as [`Origin::kind`] in the report.
1617fn list_kind(op: ListOp) -> &'static str {
1618    match op {
1619        ListOp::Exclusive => kw::EXCLUSIVE,
1620        ListOp::Forbids => kw::FORBIDS,
1621        ListOp::OneOf => kw::ONEOF,
1622        ListOp::AtLeast => kw::ATLEAST,
1623    }
1624}
1625
1626/// Stable `domain|subject|predicate|object` string for an atom key (the unit from
1627/// which clause/fact/body signatures are built). Includes the domain so atoms in
1628/// different domains never share a signature.
1629fn key_sig(k: &AtomKey) -> String {
1630    alloc::format!(
1631        "{}|{}|{}|{}",
1632        k.domain,
1633        k.subject,
1634        k.predicate,
1635        k.object.as_deref().unwrap_or("")
1636    )
1637}
1638
1639/// Canonical, order-independent signature of a clause's literals (for dedup).
1640fn clause_sig(lits: &[RawLit]) -> String {
1641    let mut parts: Vec<String> = lits
1642        .iter()
1643        .map(|l| alloc::format!("{}|{}", key_sig(&l.key), l.negated as u8))
1644        .collect();
1645    parts.sort();
1646    parts.dedup();
1647    parts.join(";")
1648}
1649
1650/// Canonical body string for a named construct, hashed for redefinition checks.
1651/// Resolves atom domains through `ctx` so the signature keys on resolved identity.
1652fn canonical_body(
1653    name: &str,
1654    body: &Body,
1655    is_rule: bool,
1656    ctx: &DomainCtx,
1657) -> Result<String, CompileError> {
1658    let mut s = String::new();
1659    let _ = write!(
1660        s,
1661        "{}|{}|",
1662        if is_rule { kw::RULE } else { kw::PREMISE },
1663        name
1664    );
1665    match body {
1666        Body::List { op, atoms } => {
1667            let _ = write!(s, "LIST|{}|", list_kind(*op));
1668            let mut keys: Vec<String> = atoms
1669                .iter()
1670                .map(|a| Ok(key_sig(&ctx.key(&a.data)?)))
1671                .collect::<Result<_, CompileError>>()?;
1672            keys.sort();
1673            s.push_str(&keys.join(";"));
1674        }
1675        Body::Impl {
1676            antecedent,
1677            ante_conn,
1678            consequent,
1679            cons_conn,
1680        } => {
1681            let conn = |c: &Conn| if *c == Conn::Or { "OR" } else { "AND" };
1682            s.push_str("IMPL|ANTE|");
1683            s.push_str(conn(ante_conn));
1684            s.push('|');
1685            s.push_str(&lit_sigs(antecedent, ctx)?);
1686            s.push_str("|CONS|");
1687            s.push_str(conn(cons_conn));
1688            s.push('|');
1689            s.push_str(&lit_sigs(consequent, ctx)?);
1690        }
1691    }
1692    Ok(s)
1693}
1694
1695/// Sorted `key|negated` signature of a literal list (order-independent), used
1696/// inside [`canonical_body`] so reordering a body does not look like a redefinition.
1697fn lit_sigs(
1698    lits: &[elenchus_parser::Located<Literal>],
1699    ctx: &DomainCtx,
1700) -> Result<String, CompileError> {
1701    let mut parts: Vec<String> = lits
1702        .iter()
1703        .map(|l| {
1704            Ok(alloc::format!(
1705                "{}|{}",
1706                key_sig(&ctx.key(&l.data.atom)?),
1707                l.data.negated as u8
1708            ))
1709        })
1710        .collect::<Result<_, CompileError>>()?;
1711    parts.sort();
1712    Ok(parts.join(";"))
1713}
1714
1715#[cfg(test)]
1716mod tests {
1717    use super::*;
1718
1719    /// Compile a single inline source under a default `DOMAIN t`, so test
1720    /// programs need not repeat the declaration. Atoms land in domain `t`.
1721    fn cs(src: &str) -> Result<Compiled, CompileError> {
1722        compile_source("<t>", &alloc::format!("DOMAIN t\n{src}"))
1723    }
1724
1725    /// An atom key in the default test domain `t`.
1726    fn key(subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
1727        key_in("t", subject, predicate, object)
1728    }
1729
1730    /// An atom key in an explicit domain.
1731    fn key_in(domain: &str, subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
1732        AtomKey {
1733            domain: domain.to_string(),
1734            subject: subject.to_string(),
1735            predicate: predicate.to_string(),
1736            object: object.map(|o| o.to_string()),
1737        }
1738    }
1739
1740    fn id(c: &Compiled, k: &AtomKey) -> AtomId {
1741        c.atoms.iter().position(|a| a == k).unwrap() as AtomId
1742    }
1743
1744    #[test]
1745    fn exclusive_unfolds_pairwise() {
1746        let src = r#"
1747        PREMISE e:
1748            EXCLUSIVE
1749                x a
1750                x b
1751                x c
1752        "#;
1753        let c = cs(src).unwrap();
1754        // C(3,2) = 3 clauses, each of 2 positive literals.
1755        assert_eq!(c.clauses.len(), 3);
1756        for cl in &c.clauses {
1757            assert_eq!(cl.lits.len(), 2);
1758            assert!(cl.lits.iter().all(|l| !l.negated));
1759        }
1760    }
1761
1762    #[test]
1763    fn implication_negates_consequent() {
1764        // WHEN x a THEN x b  ==  Impossible([x a, NOT x b])
1765        let src = r#"
1766        PREMISE r:
1767            WHEN x a
1768            THEN x b
1769        "#;
1770        let c = cs(src).unwrap();
1771        assert_eq!(c.clauses.len(), 1);
1772        let cl = &c.clauses[0];
1773        assert_eq!(cl.lits.len(), 2);
1774        let a = id(&c, &key("x", "a", None));
1775        let b = id(&c, &key("x", "b", None));
1776        assert!(cl.lits.contains(&Lit {
1777            atom: a,
1778            negated: false
1779        }));
1780        assert!(cl.lits.contains(&Lit {
1781            atom: b,
1782            negated: true
1783        }));
1784    }
1785
1786    #[test]
1787    fn negated_consequent_flips_to_positive() {
1788        // THEN NOT x b  →  NOT(NOT x b) = x b positive inside Impossible
1789        let src = r#"
1790        PREMISE r:
1791            WHEN x a
1792            THEN NOT x b
1793        "#;
1794        let c = cs(src).unwrap();
1795        let b = id(&c, &key("x", "b", None));
1796        assert!(c.clauses[0].lits.contains(&Lit {
1797            atom: b,
1798            negated: false
1799        }));
1800    }
1801
1802    #[test]
1803    fn consequent_or_is_one_clause_with_all_negated() {
1804        // WHEN x p THEN x a OR x b  ==  Impossible([x p, NOT x a, NOT x b])
1805        let src = r#"
1806        PREMISE r:
1807            WHEN x p
1808            THEN x a
1809            OR x b
1810        "#;
1811        let c = cs(src).unwrap();
1812        assert_eq!(c.clauses.len(), 1);
1813        let cl = &c.clauses[0];
1814        assert_eq!(cl.lits.len(), 3);
1815        let p = id(&c, &key("x", "p", None));
1816        let a = id(&c, &key("x", "a", None));
1817        let b = id(&c, &key("x", "b", None));
1818        assert!(cl.lits.contains(&Lit {
1819            atom: p,
1820            negated: false
1821        }));
1822        assert!(cl.lits.contains(&Lit {
1823            atom: a,
1824            negated: true
1825        }));
1826        assert!(cl.lits.contains(&Lit {
1827            atom: b,
1828            negated: true
1829        }));
1830    }
1831
1832    #[test]
1833    fn antecedent_or_is_one_clause_per_disjunct() {
1834        // WHEN x a OR x b THEN x c
1835        //   == Impossible([x a, NOT x c]) ∧ Impossible([x b, NOT x c])
1836        let src = r#"
1837        PREMISE r:
1838            WHEN x a
1839            OR x b
1840            THEN x c
1841        "#;
1842        let c = cs(src).unwrap();
1843        assert_eq!(c.clauses.len(), 2);
1844        let a = id(&c, &key("x", "a", None));
1845        let b = id(&c, &key("x", "b", None));
1846        let cc = id(&c, &key("x", "c", None));
1847        // every clause has exactly two lits and carries NOT c
1848        for cl in &c.clauses {
1849            assert_eq!(cl.lits.len(), 2);
1850            assert!(cl.lits.contains(&Lit {
1851                atom: cc,
1852                negated: true
1853            }));
1854        }
1855        let has = |atom| {
1856            c.clauses.iter().any(|cl| {
1857                cl.lits.contains(&Lit {
1858                    atom,
1859                    negated: false,
1860                })
1861            })
1862        };
1863        assert!(has(a) && has(b));
1864    }
1865
1866    #[test]
1867    fn antecedent_or_with_consequent_or_distributes() {
1868        // (a ∨ b) → (c ∨ d): Impossible([a,¬c,¬d]) ∧ Impossible([b,¬c,¬d])
1869        let src = r#"
1870        PREMISE r:
1871            WHEN x a
1872            OR x b
1873            THEN x c
1874            OR x d
1875        "#;
1876        let c = cs(src).unwrap();
1877        assert_eq!(c.clauses.len(), 2);
1878        for cl in &c.clauses {
1879            assert_eq!(cl.lits.len(), 3);
1880        }
1881    }
1882
1883    #[test]
1884    fn rule_with_or_antecedent_splits_into_two_rules() {
1885        // (a ∨ b) → c derives c whenever either fires: two single-antecedent rules.
1886        let src = r#"
1887        RULE r:
1888            WHEN x a
1889            OR x b
1890            THEN x c
1891        "#;
1892        let c = cs(src).unwrap();
1893        assert_eq!(c.rules.len(), 2);
1894        assert!(
1895            c.rules
1896                .iter()
1897                .all(|r| r.antecedent.len() == 1 && r.consequent.len() == 1)
1898        );
1899    }
1900
1901    #[test]
1902    fn rule_with_or_consequent_is_rejected() {
1903        // A rule cannot derive a disjunction — must be a PREMISE.
1904        let src = r#"
1905        RULE r:
1906            WHEN x a
1907            THEN x b
1908            OR x c
1909        "#;
1910        let err = cs(src).unwrap_err();
1911        assert!(matches!(
1912            err,
1913            CompileError::RuleDisjunctiveConsequent { .. }
1914        ));
1915    }
1916
1917    #[test]
1918    fn oneof_is_pairwise_plus_at_least_one() {
1919        let src = r#"
1920        PREMISE o:
1921            ONEOF
1922                x a
1923                x b
1924        "#;
1925        let c = cs(src).unwrap();
1926        // pairwise C(2,2)=1 + 1 at-least-one = 2 clauses
1927        assert_eq!(c.clauses.len(), 2);
1928        // the at-least-one clause is the all-negated one
1929        assert!(c.clauses.iter().any(|cl| cl.lits.iter().all(|l| l.negated)));
1930    }
1931
1932    #[test]
1933    fn atleast_is_one_negated_clause() {
1934        let src = r#"
1935        PREMISE a:
1936            ATLEAST
1937                x a
1938                x b
1939                x c
1940        "#;
1941        let c = cs(src).unwrap();
1942        assert_eq!(c.clauses.len(), 1);
1943        assert_eq!(c.clauses[0].lits.len(), 3);
1944        assert!(c.clauses[0].lits.iter().all(|l| l.negated));
1945    }
1946
1947    #[test]
1948    fn rules_are_separate_from_clauses() {
1949        let src = r#"
1950        RULE needs:
1951            WHEN x a
1952            THEN x b
1953        "#;
1954        let c = cs(src).unwrap();
1955        assert_eq!(c.clauses.len(), 0);
1956        assert_eq!(c.rules.len(), 1);
1957        assert_eq!(c.rules[0].antecedent.len(), 1);
1958        assert_eq!(c.rules[0].consequent.len(), 1);
1959    }
1960
1961    #[test]
1962    fn atoms_are_canonically_sorted() {
1963        let src = r#"
1964        FACT z z
1965        FACT a a
1966        FACT m m
1967        "#;
1968        let c = cs(src).unwrap();
1969        let mut sorted = c.atoms.clone();
1970        sorted.sort();
1971        assert_eq!(c.atoms, sorted);
1972    }
1973
1974    #[test]
1975    fn duplicate_premise_is_idempotent() {
1976        let src = r#"
1977        PREMISE e:
1978            EXCLUSIVE
1979                x a
1980                x b
1981        PREMISE e:
1982            EXCLUSIVE
1983                x a
1984                x b
1985        "#;
1986        let c = cs(src).unwrap();
1987        assert_eq!(c.clauses.len(), 1);
1988    }
1989
1990    #[test]
1991    fn redefinition_with_different_body_errors() {
1992        let src = r#"
1993        PREMISE e:
1994            EXCLUSIVE
1995                x a
1996                x b
1997        PREMISE e:
1998            EXCLUSIVE
1999                x a
2000                x c
2001        "#;
2002        let err = cs(src).unwrap_err();
2003        assert_eq!(
2004            err,
2005            CompileError::PremiseRedefinition {
2006                name: "e".to_string()
2007            }
2008        );
2009    }
2010
2011    #[test]
2012    fn duplicate_fact_is_idempotent() {
2013        let c = cs("FACT x a\nFACT x a\n").unwrap();
2014        assert_eq!(c.facts.len(), 1);
2015    }
2016
2017    #[test]
2018    fn conflicting_facts_are_both_kept() {
2019        // FACT X + NOT X is a CONFLICT for the solver, not a compile error.
2020        let c = cs("FACT x a\nNOT x a\n").unwrap();
2021        assert_eq!(c.facts.len(), 2);
2022    }
2023
2024    #[test]
2025    fn import_is_recorded_pending() {
2026        let c = cs("IMPORT \"physics.vrf\"\nFACT x a\n").unwrap();
2027        assert_eq!(c.pending_imports, vec!["physics.vrf".to_string()]);
2028    }
2029
2030    #[test]
2031    fn qualified_fact_lands_in_the_imported_domain() {
2032        // The library's premise is about `physics.Engine_X has fuel`; the main file
2033        // asserts a fact qualified INTO that domain, so the two share one atom id.
2034        let mut r = MemoryResolver::new();
2035        r.add(
2036            "lib.vrf",
2037            r#"
2038        DOMAIN physics
2039        PREMISE needs_fuel:
2040            WHEN Engine_X has engine
2041            THEN Engine_X has fuel
2042        "#,
2043        );
2044        r.add(
2045            "main.vrf",
2046            r#"
2047        DOMAIN main
2048        IMPORT "lib.vrf"
2049        FACT physics.Engine_X has engine
2050        FACT physics.Engine_X has fuel
2051        "#,
2052        );
2053        let c = compile("main.vrf", &r).unwrap();
2054        assert!(c.pending_imports.is_empty());
2055        assert_eq!(c.clauses.len(), 1); // the imported premise
2056        assert_eq!(c.facts.len(), 2);
2057
2058        // `physics.Engine_X has fuel` from the FACT and the imported premise share an id.
2059        let fuel = key_in("physics", "Engine_X", "has", Some("fuel"));
2060        let fuel_id = id(&c, &fuel);
2061        assert!(c.facts.iter().any(|f| f.atom == fuel_id));
2062        assert!(c.clauses[0].lits.iter().any(|l| l.atom == fuel_id));
2063    }
2064
2065    #[test]
2066    fn same_triple_in_different_domains_does_not_unify() {
2067        // Without a domain prefix the fact lands in `main`, NOT `physics`, so it is
2068        // a distinct atom from the library's `physics.Engine_X has fuel`.
2069        let mut r = MemoryResolver::new();
2070        r.add("lib.vrf", "DOMAIN physics\nFACT Engine_X has fuel\n");
2071        r.add(
2072            "main.vrf",
2073            "DOMAIN main\nIMPORT \"lib.vrf\"\nFACT Engine_X has fuel\n",
2074        );
2075        let c = compile("main.vrf", &r).unwrap();
2076        // Two distinct atoms: physics.Engine_X has fuel and main.Engine_X has fuel.
2077        assert!(c.atoms.iter().any(|a| a.domain == "physics"));
2078        assert!(c.atoms.iter().any(|a| a.domain == "main"));
2079        assert_eq!(
2080            c.atoms
2081                .iter()
2082                .filter(|a| a.subject == "Engine_X" && a.predicate == "has")
2083                .count(),
2084            2
2085        );
2086    }
2087
2088    #[test]
2089    fn import_alias_binds_a_local_domain_name() {
2090        // `AS phys` lets the consumer reference the imported domain by a local name.
2091        let mut r = MemoryResolver::new();
2092        r.add("lib.vrf", "DOMAIN physics\nFACT Motor over_200\n");
2093        r.add(
2094            "main.vrf",
2095            "DOMAIN main\nIMPORT \"lib.vrf\" AS phys\nFACT phys.Motor over_100\n",
2096        );
2097        let c = compile("main.vrf", &r).unwrap();
2098        // Both facts live in the physics domain (one via its own name, one via alias).
2099        assert_eq!(c.atoms.iter().filter(|a| a.domain == "physics").count(), 2);
2100    }
2101
2102    #[test]
2103    fn unknown_domain_reference_errors() {
2104        // Referencing a domain that is neither this file's nor imported here fails.
2105        let err = cs("FACT ghost.x a\n").unwrap_err();
2106        assert!(matches!(err, CompileError::UnknownDomain { .. }));
2107    }
2108
2109    #[test]
2110    fn imports_are_not_transitive_for_naming() {
2111        // main imports physics; physics imports math. main may NOT name math.
2112        let mut r = MemoryResolver::new();
2113        r.add("math.vrf", "DOMAIN math\nFACT foo bar\n");
2114        r.add(
2115            "physics.vrf",
2116            "DOMAIN physics\nIMPORT \"math.vrf\"\nFACT Motor over_100\n",
2117        );
2118        r.add(
2119            "main.vrf",
2120            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT math.foo bar\n",
2121        );
2122        let err = compile("main.vrf", &r).unwrap_err();
2123        assert!(matches!(err, CompileError::UnknownDomain { .. }));
2124    }
2125
2126    #[test]
2127    fn transitive_dependency_clauses_still_load() {
2128        // Even though main can't *name* math, math's clauses still participate.
2129        let mut r = MemoryResolver::new();
2130        r.add(
2131            "math.vrf",
2132            r"
2133        DOMAIN math
2134        PREMISE e:
2135            EXCLUSIVE
2136                x a
2137                x b
2138        ",
2139        );
2140        r.add("physics.vrf", "DOMAIN physics\nIMPORT \"math.vrf\"\n");
2141        r.add("main.vrf", "DOMAIN main\nIMPORT \"physics.vrf\"\n");
2142        let c = compile("main.vrf", &r).unwrap();
2143        assert_eq!(c.clauses.len(), 1); // math's clause loaded transitively
2144        assert!(c.clauses.iter().all(|cl| cl.origin.source == "math.vrf"));
2145    }
2146
2147    #[test]
2148    fn missing_domain_errors() {
2149        let err = compile_source("nodomain.vrf", "FACT x a\n").unwrap_err();
2150        assert!(matches!(err, CompileError::MissingDomain { .. }));
2151    }
2152
2153    #[test]
2154    fn duplicate_domain_errors() {
2155        let err = compile_source("dup.vrf", "DOMAIN a\nDOMAIN b\nFACT x a\n").unwrap_err();
2156        assert!(matches!(err, CompileError::DuplicateDomain { .. }));
2157    }
2158
2159    #[test]
2160    fn alias_clash_when_one_local_name_binds_two_domains() {
2161        // The same local alias `x` bound to two genuinely different domains is a
2162        // clash: disambiguate with distinct aliases.
2163        let mut r = MemoryResolver::new();
2164        r.add("a.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2165        r.add("b.vrf", "DOMAIN chemistry\nFACT atom reacts\n");
2166        r.add(
2167            "main.vrf",
2168            "DOMAIN main\nIMPORT \"a.vrf\" AS x\nIMPORT \"b.vrf\" AS x\n",
2169        );
2170        let err = compile("main.vrf", &r).unwrap_err();
2171        assert!(matches!(err, CompileError::DomainAliasClash { .. }));
2172    }
2173
2174    #[test]
2175    fn two_files_with_the_same_domain_name_merge() {
2176        // Nominal domains: two files both declaring DOMAIN physics share it (the
2177        // value of importing a premise library is exactly this unification).
2178        let mut r = MemoryResolver::new();
2179        r.add("a.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2180        r.add(
2181            "main.vrf",
2182            "DOMAIN physics\nIMPORT \"a.vrf\"\nFACT Motor over_200\n",
2183        );
2184        let c = compile("main.vrf", &r).unwrap();
2185        // Both motors live in the single shared `physics` domain.
2186        assert!(c.atoms.iter().all(|a| a.domain == "physics"));
2187        assert_eq!(c.atoms.len(), 2);
2188    }
2189
2190    #[test]
2191    fn diamond_import_is_deduped() {
2192        // main → a, c ; a → base ; c → base. base merged once.
2193        let mut r = MemoryResolver::new();
2194        r.add(
2195            "base.vrf",
2196            r#"
2197        DOMAIN base
2198        PREMISE b:
2199            EXCLUSIVE
2200                x a
2201                x b
2202        "#,
2203        );
2204        r.add("a.vrf", "DOMAIN a\nIMPORT \"base.vrf\"\n");
2205        r.add("c.vrf", "DOMAIN c\nIMPORT \"base.vrf\"\n");
2206        r.add(
2207            "main.vrf",
2208            "DOMAIN main\nIMPORT \"a.vrf\"\nIMPORT \"c.vrf\"\n",
2209        );
2210        let c = compile("main.vrf", &r).unwrap();
2211        assert_eq!(c.clauses.len(), 1); // base's single clause, not two
2212    }
2213
2214    #[test]
2215    fn circular_import_errors() {
2216        let mut r = MemoryResolver::new();
2217        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\n");
2218        r.add("b.vrf", "DOMAIN b\nIMPORT \"a.vrf\"\n");
2219        let err = compile("a.vrf", &r).unwrap_err();
2220        assert!(matches!(err, CompileError::CircularImport(_)));
2221    }
2222
2223    #[test]
2224    fn three_node_cycle_errors() {
2225        // a → b → c → a. The back-edge to the on-path ancestor is detected.
2226        let mut r = MemoryResolver::new();
2227        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\n");
2228        r.add("b.vrf", "DOMAIN b\nIMPORT \"c.vrf\"\n");
2229        r.add("c.vrf", "DOMAIN c\nIMPORT \"a.vrf\"\n");
2230        let err = compile("a.vrf", &r).unwrap_err();
2231        assert!(matches!(err, CompileError::CircularImport(_)));
2232    }
2233
2234    #[test]
2235    fn shared_grandchild_diamond_loads_once() {
2236        // The user's case: a imports B and C; C ALSO imports B. B must be compiled
2237        // exactly once (its single clause is not duplicated by the two paths to it).
2238        let mut r = MemoryResolver::new();
2239        r.add(
2240            "b.vrf",
2241            r"
2242        DOMAIN b
2243        PREMISE e:
2244            EXCLUSIVE
2245                x a
2246                x b
2247        ",
2248        );
2249        r.add("c.vrf", "DOMAIN c\nIMPORT \"b.vrf\"\n");
2250        r.add("a.vrf", "DOMAIN a\nIMPORT \"b.vrf\"\nIMPORT \"c.vrf\"\n");
2251        let c = compile("a.vrf", &r).unwrap();
2252        assert_eq!(
2253            c.clauses.len(),
2254            1,
2255            "b.vrf's clause must appear exactly once"
2256        );
2257    }
2258
2259    #[test]
2260    fn exponential_fan_out_is_memoized_not_blown_up() {
2261        // f_k imports f_{k-1} TWICE. Without content-hash memoization the visit
2262        // count is 2^k (2^40 ≈ a trillion); with it, the work is linear, so this
2263        // finishes instantly. A guard against any combinatorial blow-up / DoS.
2264        let mut r = MemoryResolver::new();
2265        r.add("f0.vrf", "DOMAIN d0\nFACT x a\n");
2266        let n = 40;
2267        for k in 1..=n {
2268            r.add(
2269                &alloc::format!("f{k}.vrf"),
2270                &alloc::format!(
2271                    "DOMAIN d{k}\nIMPORT \"f{}.vrf\"\nIMPORT \"f{}.vrf\"\n",
2272                    k - 1,
2273                    k - 1
2274                ),
2275            );
2276        }
2277        let c = compile(&alloc::format!("f{n}.vrf"), &r).unwrap();
2278        assert_eq!(c.facts.len(), 1); // the single fact from f0, reached once
2279    }
2280
2281    #[test]
2282    fn very_deep_linear_chain_does_not_overflow() {
2283        // A long non-cyclic chain. Resolution is iterative (explicit work stack),
2284        // so a depth that would overflow a recursive loader compiles cleanly.
2285        let mut r = MemoryResolver::new();
2286        r.add("f0.vrf", "DOMAIN d0\nFACT x a\n");
2287        let n = 10_000;
2288        for k in 1..=n {
2289            r.add(
2290                &alloc::format!("f{k}.vrf"),
2291                &alloc::format!("DOMAIN d{k}\nIMPORT \"f{}.vrf\"\n", k - 1),
2292            );
2293        }
2294        let c = compile(&alloc::format!("f{n}.vrf"), &r).unwrap();
2295        assert_eq!(c.facts.len(), 1);
2296    }
2297
2298    #[test]
2299    fn missing_import_errors() {
2300        let mut r = MemoryResolver::new();
2301        r.add("main.vrf", "DOMAIN main\nIMPORT \"ghost.vrf\"\n");
2302        let err = compile("main.vrf", &r).unwrap_err();
2303        assert!(matches!(err, CompileError::ImportNotFound(_)));
2304    }
2305
2306    #[test]
2307    fn unused_import_is_flagged() {
2308        // main imports physics but never writes a `physics.` atom → unused.
2309        let mut r = MemoryResolver::new();
2310        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2311        r.add(
2312            "main.vrf",
2313            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT x a\n",
2314        );
2315        let c = compile("main.vrf", &r).unwrap();
2316        assert_eq!(c.unused_imports.len(), 1);
2317        assert_eq!(c.unused_imports[0].domain, "physics");
2318        assert_eq!(c.unused_imports[0].file, "main.vrf");
2319        assert_eq!(c.unused_imports[0].alias, None);
2320    }
2321
2322    #[test]
2323    fn referenced_import_is_not_unused() {
2324        // The same import, but now a `physics.` atom uses it → not flagged.
2325        let mut r = MemoryResolver::new();
2326        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2327        r.add(
2328            "main.vrf",
2329            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT physics.Motor over_200\n",
2330        );
2331        let c = compile("main.vrf", &r).unwrap();
2332        assert!(c.unused_imports.is_empty(), "{:?}", c.unused_imports);
2333    }
2334
2335    #[test]
2336    fn unused_import_records_its_alias() {
2337        let mut r = MemoryResolver::new();
2338        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2339        r.add(
2340            "main.vrf",
2341            "DOMAIN main\nIMPORT \"physics.vrf\" AS phys\nFACT x a\n",
2342        );
2343        let c = compile("main.vrf", &r).unwrap();
2344        assert_eq!(c.unused_imports.len(), 1);
2345        assert_eq!(c.unused_imports[0].alias.as_deref(), Some("phys"));
2346    }
2347
2348    #[test]
2349    fn import_referenced_only_inside_a_premise_is_used() {
2350        // The reference can be anywhere — here inside a premise body, not a fact.
2351        let mut r = MemoryResolver::new();
2352        r.add("physics.vrf", "DOMAIN physics\nFACT Motor over_100\n");
2353        r.add(
2354            "main.vrf",
2355            r#"
2356        DOMAIN main
2357        IMPORT "physics.vrf"
2358        PREMISE p:
2359            WHEN physics.Motor over_100
2360            THEN x ok
2361        "#,
2362        );
2363        let c = compile("main.vrf", &r).unwrap();
2364        assert!(c.unused_imports.is_empty(), "{:?}", c.unused_imports);
2365    }
2366
2367    #[test]
2368    fn same_premise_name_across_files_coexists() {
2369        // Two files may legitimately reuse a premise NAME with different bodies.
2370        // Names are per-source labels — both premises apply, qualified by source.
2371        // NOT a redefinition error. (Atoms stay apart too: different domains.)
2372        let mut r = MemoryResolver::new();
2373        r.add(
2374            "physics.vrf",
2375            r#"
2376        DOMAIN physics
2377        PREMISE safety:
2378            EXCLUSIVE
2379                x a
2380                x b
2381        "#,
2382        );
2383        r.add(
2384            "main.vrf",
2385            r#"
2386        DOMAIN main
2387        IMPORT "physics.vrf"
2388        PREMISE safety:
2389            EXCLUSIVE
2390                x a
2391                x c
2392        "#,
2393        );
2394        let c = compile("main.vrf", &r).unwrap();
2395        assert_eq!(c.clauses.len(), 2); // a-b from physics, a-c from main
2396        assert!(c.clauses.iter().any(|cl| cl.origin.source == "physics.vrf"));
2397        assert!(c.clauses.iter().any(|cl| cl.origin.source == "main.vrf"));
2398    }
2399
2400    #[test]
2401    fn redefinition_within_one_source_still_errors() {
2402        // But reusing a name with a different body *inside one source* is a mistake.
2403        let src = r#"
2404        DOMAIN m
2405        PREMISE e:
2406            EXCLUSIVE
2407                x a
2408                x b
2409        PREMISE e:
2410            EXCLUSIVE
2411                x a
2412                x c
2413        "#;
2414        let err = compile_source("main.vrf", src).unwrap_err();
2415        assert_eq!(
2416            err,
2417            CompileError::PremiseRedefinition {
2418                name: "e".to_string()
2419            }
2420        );
2421    }
2422
2423    #[test]
2424    fn import_demo_examples_resolve() {
2425        let mut r = MemoryResolver::new();
2426        r.add(
2427            "physics.vrf",
2428            include_str!("../../../docs/examples/physics.vrf"),
2429        );
2430        r.add(
2431            "import-demo.vrf",
2432            include_str!("../../../docs/examples/import-demo.vrf"),
2433        );
2434        let c = compile("import-demo.vrf", &r).unwrap();
2435        assert!(c.pending_imports.is_empty());
2436        // physics.vrf: one_path (EXCLUSIVE, 1 clause) + speed_order (impl, 1 clause)
2437        assert_eq!(c.clauses.len(), 2);
2438        // The qualified facts (`physics.Motor …`) share ids with the imported premise.
2439        let over_100 = id(&c, &key_in("physics", "Motor", "over_100", None));
2440        assert!(c.facts.iter().any(|f| f.atom == over_100));
2441        assert!(
2442            c.clauses
2443                .iter()
2444                .any(|cl| cl.lits.iter().any(|l| l.atom == over_100))
2445        );
2446    }
2447
2448    #[test]
2449    fn creature_example_compiles() {
2450        let src = include_str!("../../../docs/examples/creature.vrf");
2451        let c = compile_source("creature.vrf", src).unwrap();
2452        assert_eq!(c.facts.len(), 2); // flying, warm_blood
2453        assert_eq!(c.rules.len(), 1); // needs_oxygen
2454        assert_eq!(c.checks.len(), 1);
2455        // fly_xor_swim (1) + wings_need_bone (THEN wing AND bone → 2) + no_dual_temp (1) = 4
2456        assert_eq!(c.clauses.len(), 4);
2457        assert_eq!(c.atoms.len(), 7);
2458    }
2459
2460    #[test]
2461    fn forbids_unfolds_pairwise() {
2462        let src = r#"
2463        PREMISE f:
2464            FORBIDS
2465                x a
2466                x b
2467                x c
2468        "#;
2469        let c = cs(src).unwrap();
2470        assert_eq!(c.clauses.len(), 3); // C(3,2), like EXCLUSIVE
2471        assert!(
2472            c.clauses
2473                .iter()
2474                .all(|cl| cl.lits.len() == 2 && cl.lits.iter().all(|l| !l.negated))
2475        );
2476    }
2477
2478    #[test]
2479    fn rule_with_multiple_consequents() {
2480        let src = r#"
2481        RULE r:
2482            WHEN x a
2483            THEN x b
2484            AND  x c
2485        "#;
2486        let c = cs(src).unwrap();
2487        assert_eq!(c.rules.len(), 1);
2488        assert_eq!(c.rules[0].consequent.len(), 2);
2489    }
2490
2491    #[test]
2492    fn negated_antecedent_literal_keeps_polarity() {
2493        // WHEN NOT x a THEN x b  ==  Impossible([NOT x a, NOT x b])
2494        let src = r#"
2495        PREMISE a:
2496            WHEN NOT x a
2497            THEN x b
2498        "#;
2499        let c = cs(src).unwrap();
2500        let xa = id(&c, &key("x", "a", None));
2501        assert!(c.clauses[0].lits.contains(&Lit {
2502            atom: xa,
2503            negated: true
2504        }));
2505    }
2506
2507    #[test]
2508    fn rule_keeps_consequent_negation() {
2509        let src = r#"
2510        RULE r:
2511            WHEN x a
2512            THEN NOT x b
2513        "#;
2514        let c = cs(src).unwrap();
2515        assert!(c.rules[0].consequent[0].negated);
2516    }
2517
2518    #[test]
2519    fn compilation_is_deterministic() {
2520        let src = r#"
2521        PREMISE e:
2522            EXCLUSIVE
2523                z z
2524                a a
2525                m m
2526        FACT q q
2527        "#;
2528        assert_eq!(cs(src).unwrap(), cs(src).unwrap());
2529    }
2530
2531    #[test]
2532    fn empty_program_compiles_to_empty_ir() {
2533        let c = cs("// nothing here\n").unwrap();
2534        assert!(c.atoms.is_empty() && c.clauses.is_empty() && c.facts.is_empty());
2535    }
2536
2537    #[test]
2538    fn same_clause_from_two_named_premises_is_deduped() {
2539        // Different names, identical logical content → one clause, no redefinition.
2540        let src = r#"
2541        PREMISE e1:
2542            EXCLUSIVE
2543                x a
2544                x b
2545        PREMISE e2:
2546            EXCLUSIVE
2547                x a
2548                x b
2549        "#;
2550        let c = cs(src).unwrap();
2551        assert_eq!(c.clauses.len(), 1);
2552    }
2553
2554    #[test]
2555    fn object_distinguishes_atom_identity() {
2556        // `x p a` and `x p b` differ only by object → two distinct atoms.
2557        let c = cs("FACT x p a\nFACT x p b\n").unwrap();
2558        assert_eq!(c.atoms.len(), 2);
2559    }
2560
2561    // --- closed-world: ONEOF closes its variable's value set -----------------
2562
2563    /// A `ONEOF` body declaring three values of `resolved is …`. Flush-left so it
2564    /// concatenates cleanly in front of an appended line (CAPSTONE-style const).
2565    const ONEOF_RESOLVED: &str = r"PREMISE pick:
2566    ONEOF
2567        resolved is censored
2568        resolved is censored_mtp
2569        resolved is uncensored
2570";
2571
2572    #[test]
2573    fn value_outside_oneof_is_rejected() {
2574        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censoredmtp\n");
2575        let err = cs(&src).unwrap_err();
2576        let CompileError::UnknownValue(e) = err else {
2577            panic!("expected UnknownValue, got {err:?}");
2578        };
2579        assert_eq!(e.value, "censoredmtp");
2580        assert_eq!(e.subject, "resolved");
2581        assert_eq!(e.predicate, "is");
2582        assert_eq!(e.declared, "censored, censored_mtp, uncensored");
2583    }
2584
2585    #[test]
2586    fn near_miss_value_suggests_the_intended_one() {
2587        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censoredmtp\n");
2588        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
2589            panic!("expected UnknownValue");
2590        };
2591        assert_eq!(e.suggestion, " — did you mean `censored_mtp`?");
2592    }
2593
2594    #[test]
2595    fn far_off_value_offers_no_suggestion() {
2596        // `wildly_different` is past the edit-distance budget of every declared
2597        // value, so we reject it but do not guess.
2598        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is wildly_different\n");
2599        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
2600            panic!("expected UnknownValue");
2601        };
2602        assert_eq!(e.suggestion, "");
2603    }
2604
2605    #[test]
2606    fn declared_value_compiles_cleanly() {
2607        let src = alloc::format!("{ONEOF_RESOLVED}FACT resolved is censored_mtp\n");
2608        assert!(cs(&src).is_ok());
2609    }
2610
2611    #[test]
2612    fn oneof_declared_after_the_reference_still_catches_it() {
2613        // The check runs once every source is accumulated, so order is irrelevant.
2614        let src = alloc::format!("FACT resolved is censoredmtp\n{ONEOF_RESOLVED}");
2615        assert!(matches!(
2616            cs(&src).unwrap_err(),
2617            CompileError::UnknownValue(_)
2618        ));
2619    }
2620
2621    #[test]
2622    fn out_of_set_value_inside_a_premise_is_caught() {
2623        // Closed-world covers references anywhere — not just FACTs.
2624        let src = alloc::format!(
2625            r"{ONEOF_RESOLVED}
2626            PREMISE p:
2627                WHEN resolved is censoredmtp
2628                THEN x done
2629        "
2630        );
2631        assert!(matches!(
2632            cs(&src).unwrap_err(),
2633            CompileError::UnknownValue(_)
2634        ));
2635    }
2636
2637    #[test]
2638    fn out_of_set_value_inside_a_rule_is_caught() {
2639        let src = alloc::format!(
2640            r"{ONEOF_RESOLVED}
2641            RULE r:
2642                WHEN x go
2643                THEN resolved is censoredmtp
2644        "
2645        );
2646        assert!(matches!(
2647            cs(&src).unwrap_err(),
2648            CompileError::UnknownValue(_)
2649        ));
2650    }
2651
2652    #[test]
2653    fn binary_atoms_in_a_oneof_do_not_close_anything() {
2654        // `alice cooks` / `alice cleans` have no object slot, so there is no value
2655        // set to violate — a later `alice bakes` is just another atom, not an error.
2656        let src = r"
2657        PREMISE chores:
2658            ONEOF
2659                alice cooks
2660                alice cleans
2661        FACT alice bakes
2662        ";
2663        assert!(cs(src).is_ok());
2664    }
2665
2666    #[test]
2667    fn a_subject_without_a_oneof_stays_open() {
2668        // No ONEOF over `mood is …` → open world, any value is a fresh atom.
2669        let src = alloc::format!("{ONEOF_RESOLVED}FACT mood is anything_goes\n");
2670        assert!(cs(&src).is_ok());
2671    }
2672
2673    #[test]
2674    fn two_oneofs_union_their_declared_values() {
2675        // A value declared by either ONEOF for the same variable is legal.
2676        let src = r"
2677        PREMISE a:
2678            ONEOF
2679                v is one
2680                v is two
2681        PREMISE b:
2682            ONEOF
2683                v is two
2684                v is three
2685        FACT v is three
2686        ";
2687        assert!(cs(src).is_ok());
2688    }
2689
2690    #[test]
2691    fn earliest_offender_is_reported() {
2692        // Two violations; the diagnostic points at the first by line.
2693        let src = alloc::format!(
2694            "{ONEOF_RESOLVED}FACT resolved is firstbad\nFACT resolved is secondbad\n"
2695        );
2696        let CompileError::UnknownValue(e) = cs(&src).unwrap_err() else {
2697            panic!("expected UnknownValue");
2698        };
2699        assert_eq!(e.value, "firstbad");
2700    }
2701
2702    #[test]
2703    fn closed_world_spans_imported_domains() {
2704        // physics closes `Motor speed …`; main, referencing it via the prefix with
2705        // a typo, is rejected — the value set is shared across the domain boundary.
2706        let mut r = MemoryResolver::new();
2707        r.add(
2708            "physics.vrf",
2709            r"
2710        DOMAIN physics
2711        PREMISE g:
2712            ONEOF
2713                Motor speed slow
2714                Motor speed fast
2715        ",
2716        );
2717        r.add(
2718            "main.vrf",
2719            "DOMAIN main\nIMPORT \"physics.vrf\"\nFACT physics.Motor speed faast\n",
2720        );
2721        let CompileError::UnknownValue(e) = compile("main.vrf", &r).unwrap_err() else {
2722            panic!("expected UnknownValue");
2723        };
2724        assert_eq!(e.value, "faast");
2725        assert_eq!(e.suggestion, " — did you mean `fast`?");
2726    }
2727
2728    #[test]
2729    fn same_value_in_a_different_domain_does_not_clash() {
2730        // `state is open` is closed in domain a; domain b's own `state is shut`
2731        // (never declared in a) is fine — value sets are per-domain.
2732        let mut r = MemoryResolver::new();
2733        r.add(
2734            "a.vrf",
2735            r"
2736        DOMAIN a
2737        PREMISE s:
2738            ONEOF
2739                state is open
2740                state is closed
2741        ",
2742        );
2743        r.add("b.vrf", "DOMAIN b\nIMPORT \"a.vrf\"\nFACT state is shut\n");
2744        // `state is shut` is in domain b, which has no ONEOF → open, so it compiles.
2745        assert!(compile("b.vrf", &r).is_ok());
2746    }
2747
2748    #[test]
2749    fn levenshtein_basics() {
2750        // The canonical distance works on char slices; spell the string cases
2751        // through a tiny adapter so the table below reads as before.
2752        fn lev(a: &str, b: &str) -> usize {
2753            levenshtein(
2754                &a.chars().collect::<Vec<char>>(),
2755                &b.chars().collect::<Vec<char>>(),
2756            )
2757        }
2758        assert_eq!(lev("", ""), 0);
2759        assert_eq!(lev("abc", "abc"), 0);
2760        assert_eq!(lev("censoredmtp", "censored_mtp"), 1);
2761        assert_eq!(lev("norml", "normal"), 1);
2762        assert_eq!(lev("kitten", "sitting"), 3);
2763    }
2764
2765    #[test]
2766    fn nearest_respects_the_length_budget() {
2767        let cands = ["censored", "censored_mtp", "uncensored"];
2768        assert_eq!(nearest("censoredmtp", &cands), Some("censored_mtp"));
2769        // "zzz" is far from all; no suggestion.
2770        assert_eq!(nearest("zzz", &cands), None);
2771    }
2772
2773    #[test]
2774    fn nearest_offers_nothing_for_very_short_values() {
2775        // 1–2 character values get a budget of 0: every other short token is at
2776        // distance 1, so a "did you mean" carries no signal. True for single CJK
2777        // characters (one symbol = a whole word) and for two-letter codes alike.
2778        assert_eq!(nearest("七", &["一", "二", "三"]), None);
2779        assert_eq!(nearest("us", &["uk", "eu", "jp"]), None);
2780        // A multi-character CJK word still gets a sensible nearest (one wrong
2781        // character = distance 1, budget = 3/3 = 1).
2782        assert_eq!(nearest("中文字", &["中文学", "日本語"]), Some("中文学"));
2783    }
2784
2785    #[test]
2786    fn short_value_is_still_rejected_just_without_a_guess() {
2787        // The closed-world error does not depend on the suggestion: an out-of-set
2788        // single-character value is rejected exactly, only the `did you mean` is
2789        // suppressed.
2790        let src = r"
2791        PREMISE pick:
2792            ONEOF
2793                roll is 一
2794                roll is 二
2795        FACT roll is 七
2796        ";
2797        let CompileError::UnknownValue(e) = cs(src).unwrap_err() else {
2798            panic!("expected UnknownValue");
2799        };
2800        assert_eq!(e.value, "七");
2801        assert_eq!(e.suggestion, "");
2802    }
2803
2804    // --- FOR EACH / SET (bounded quantification, Phase 1) ------------------
2805
2806    #[test]
2807    fn for_each_grounds_once_per_element() {
2808        // A ONEOF body over a 2-element set: each element yields one pairwise
2809        // clause + one at-least-one clause = 2 clauses; 2 elements → 4 clauses,
2810        // and 4 distinct grounded atoms (a/b × slot m/n).
2811        let src = r"
2812        SET xs
2813            a
2814            b
2815        PREMISE slot FOR EACH t IN xs:
2816            ONEOF
2817                t slot m
2818                t slot n
2819        ";
2820        let c = cs(src).unwrap();
2821        assert_eq!(c.clauses.len(), 4);
2822        for s in ["a", "b"] {
2823            for o in ["m", "n"] {
2824                assert!(c.atoms.contains(&key(s, "slot", Some(o))));
2825            }
2826        }
2827    }
2828
2829    #[test]
2830    fn for_each_in_a_rule_derives_per_element() {
2831        // A quantified RULE grounds to one rule per element.
2832        let src = r"
2833        SET xs
2834            a
2835            b
2836        RULE r FOR EACH t IN xs:
2837            WHEN t on
2838            THEN t hot
2839        ";
2840        let c = cs(src).unwrap();
2841        assert_eq!(c.rules.len(), 2);
2842    }
2843
2844    #[test]
2845    fn for_each_over_an_undeclared_set_is_rejected() {
2846        let src = r"
2847        SET tasks
2848            a
2849        PREMISE p FOR EACH t IN taske:
2850            ONEOF
2851                t s x
2852                t s y
2853        ";
2854        let CompileError::UnknownSet {
2855            set, suggestion, ..
2856        } = cs(src).unwrap_err()
2857        else {
2858            panic!("expected UnknownSet");
2859        };
2860        assert_eq!(set, "taske");
2861        assert_eq!(suggestion, " — did you mean `tasks`?");
2862    }
2863
2864    #[test]
2865    fn for_each_closes_each_grounded_variable() {
2866        // ONEOF inside FOR EACH closes the variable per element, so an out-of-set
2867        // value on a grounded subject is a hard error (closed-world after subst).
2868        let src = r"
2869        SET xs
2870            a
2871            b
2872        PREMISE c FOR EACH t IN xs:
2873            ONEOF
2874                t color red
2875                t color blue
2876        FACT a color gren
2877        ";
2878        let CompileError::UnknownValue(e) = cs(src).unwrap_err() else {
2879            panic!("expected UnknownValue from the grounded ONEOF");
2880        };
2881        assert_eq!(e.value, "gren");
2882        assert_eq!(e.subject, "a");
2883    }
2884
2885    #[test]
2886    fn nested_for_each_is_a_parse_error() {
2887        // The structural guarantee: a second FOR EACH is unrepresentable — the
2888        // header carries exactly one, so nesting fails to parse (no domain
2889        // product can ever be written).
2890        let src = r"
2891        SET xs
2892            a
2893        PREMISE p FOR EACH x IN xs FOR EACH y IN xs:
2894            ONEOF
2895                x r y
2896                x s y
2897        ";
2898        assert!(matches!(cs(src), Err(CompileError::Parse(_))));
2899    }
2900
2901    #[test]
2902    fn relation_for_each_grounds_per_fact_pair() {
2903        // Two declared edges → the body is instantiated once per edge (two
2904        // pairwise clauses), and both edge atoms are recorded as consumed so the
2905        // ORPHAN lint will not flag them.
2906        let src = r"
2907        FACT a linked b
2908        FACT b linked c
2909        PREMISE p FOR EACH x linked y:
2910            FORBIDS
2911                x hot on
2912                y hot on
2913        ";
2914        let c = cs(src).unwrap();
2915        assert_eq!(c.clauses.len(), 2);
2916        assert_eq!(c.consumed.len(), 2);
2917        assert!(c.consumed.contains(&id(&c, &key("a", "linked", Some("b")))));
2918    }
2919
2920    #[test]
2921    fn relation_for_each_over_no_edges_is_inert() {
2922        // A relation with no matching facts grounds to nothing (vacuous), not an
2923        // error — unlike an undeclared SET.
2924        let src = r"
2925        PREMISE p FOR EACH x linked y:
2926            FORBIDS
2927                x hot on
2928                y hot on
2929        ";
2930        let c = cs(src).unwrap();
2931        assert_eq!(c.clauses.len(), 0);
2932        assert!(c.consumed.is_empty());
2933    }
2934
2935    #[test]
2936    fn close_transitive_extends_the_relation() {
2937        // a->b, b->c; CLOSE adds a->c, so a relation FOR EACH grounds over all
2938        // three pairs (without CLOSE it would be two).
2939        let src = r"
2940        FACT a r b
2941        FACT b r c
2942        CLOSE r TRANSITIVE
2943        PREMISE p FOR EACH x r y:
2944            FORBIDS
2945                x hot on
2946                y hot on
2947        ";
2948        let c = cs(src).unwrap();
2949        assert_eq!(c.clauses.len(), 3);
2950    }
2951
2952    #[test]
2953    fn close_transitive_rejects_a_cycle() {
2954        let src = r"
2955        FACT a r b
2956        FACT b r a
2957        CLOSE r TRANSITIVE
2958        ";
2959        let CompileError::CyclicRelation { relation, .. } = cs(src).unwrap_err() else {
2960            panic!("expected CyclicRelation");
2961        };
2962        assert_eq!(relation, "r");
2963    }
2964
2965    #[test]
2966    fn grounding_count_is_linear_in_the_set() {
2967        // No domain product: N elements → exactly N groundings (here N clauses,
2968        // one at-least-one per element), never N².
2969        let elems: alloc::string::String = (0..20).map(|i| alloc::format!("    e{i}\n")).collect();
2970        let src = alloc::format!(
2971            "SET xs\n{elems}PREMISE p FOR EACH t IN xs:\n    ATLEAST\n        t a\n        t b\n"
2972        );
2973        let c = cs(&src).unwrap();
2974        assert_eq!(c.clauses.len(), 20);
2975    }
2976}