Skip to main content

elenchus_compiler/
lib.rs

1//! elenchus-compiler — compiles parsed elenchus DSL into a canonical clause IR.
2//!
3//! This crate is **preparation, not solving**. It takes the AST (from
4//! `elenchus-parser`) and produces a deterministic, solver-ready intermediate
5//! representation:
6//!
7//! - **atom interner**: `(subject, predicate, object?)` → dense `u32` id,
8//!   canonically sorted so ids (and any later enumeration) are deterministic;
9//! - **desugaring**: surface CAPS sugar → `Impossible` clauses
10//!   (`EXCLUSIVE` pairwise, `WHEN…THEN` → `Impossible([A, …, NOT C])`, etc.);
11//! - **content-addressing** (sha256, mirroring vsm-guard's CAS): identical
12//!   clauses are deduped (idempotent — `P ∧ P ≡ P`), and a named construct
13//!   redefined with a different body is a `PremiseRedefinition` error.
14//!
15//! The actual reasoning (3-valued forward chaining, SAT, all-SAT, the WARNING
16//! pool, the four results) lives in `elenchus-solver`. `IMPORT` resolution is a
17//! source-agnostic [`Resolver`] that flat-merges another source into the shared
18//! atom universe ([`compile`] resolves imports; [`compile_source`] leaves them
19//! pending).
20//!
21//! # Example
22//!
23//! ```
24//! use elenchus_compiler::compile_source;
25//!
26//! // `ASSUME` lowers to a *soft* fact: the same atom universe as a `FACT`, but
27//! // one the solver may retract. Here `x a` is asserted both ways (hard + soft).
28//! let ir = compile_source("demo.vrf", "FACT x a\nASSUME NOT x a\nCHECK x\n").unwrap();
29//! assert_eq!(ir.facts.len(), 2);
30//! assert!(ir.facts.iter().any(|f| f.soft)); // the ASSUME is the soft one
31//! ```
32#![no_std]
33// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
34#![warn(missing_docs)]
35
36extern crate alloc;
37
38#[cfg(feature = "std")]
39extern crate std;
40
41use alloc::collections::{BTreeMap, BTreeSet};
42use alloc::string::{String, ToString};
43use alloc::vec;
44use alloc::vec::Vec;
45use core::fmt::Write as _;
46
47use elenchus_parser::{Atom, Body, Conn, ListOp, Literal, Statement};
48use sha2::{Digest, Sha256};
49use thiserror::Error;
50
51// --- content-addressing (mirrors vsm-guard::hashing) -----------------------
52
53/// SHA-256 content addressing. Used only for dedup / redefinition / provenance,
54/// never for namespacing atoms.
55pub fn hash_hex(data: &[u8]) -> String {
56    let mut hasher = Sha256::new();
57    hasher.update(data);
58    let out = hasher.finalize();
59    let mut s = String::with_capacity(out.len() * 2);
60    for b in out {
61        let _ = write!(s, "{:02x}", b);
62    }
63    s
64}
65
66// --- IR types --------------------------------------------------------------
67
68/// Dense atom identifier (also the SAT variable number).
69pub type AtomId = u32;
70
71/// The identity of an atom: the triple `(subject, predicate, object?)`, owned so
72/// it survives across merged sources. Ordering is lexicographic → canonical.
73#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
74pub struct AtomKey {
75    /// The entity the claim is about (owned copy of the parser's `subject`).
76    pub subject: String,
77    /// The relation or property asserted.
78    pub predicate: String,
79    /// Optional object; part of identity, so `has flying` ≠ `has swimming`.
80    pub object: Option<String>,
81}
82
83impl AtomKey {
84    /// Owned copy of a borrowed parser [`Atom`].
85    fn from_atom(a: &Atom) -> Self {
86        AtomKey {
87            subject: a.subject.to_string(),
88            predicate: a.predicate.to_string(),
89            object: a.object.map(|o| o.to_string()),
90        }
91    }
92}
93
94/// A literal as it appears *inside* an `Impossible` clause: an atom, optionally
95/// negated. `negated = true` means the literal is `NOT atom`.
96#[derive(Debug, Clone, Copy, PartialEq, Eq)]
97pub struct Lit {
98    /// Interned id of the atom (also its SAT variable number).
99    pub atom: AtomId,
100    /// `true` means this literal is `NOT atom` inside the clause.
101    pub negated: bool,
102}
103
104/// A confident truth value. UNKNOWN is the *absence* of a fact, never stored.
105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
106pub enum Value {
107    /// The atom is asserted TRUE (from `FACT`).
108    True,
109    /// The atom is asserted FALSE (from `NOT`).
110    False,
111}
112
113/// Where a piece of IR came from — for readable conflict/warning pools.
114#[derive(Debug, Clone, PartialEq, Eq)]
115pub struct Origin {
116    /// The source label this came from (file name or `"<root>"`/`"<text>"`).
117    pub source: String,
118    /// 1-based line number of the originating statement.
119    pub line: u32,
120    /// The premise/rule name, if it came from a named construct.
121    pub premise: Option<String>,
122    /// Surface kind for the report, e.g. `"FACT"`, `"EXCLUSIVE"`, `"PREMISE"`.
123    pub kind: &'static str,
124}
125
126/// A confident fact (from `FACT` / `NOT`). Conflicting facts on the same atom
127/// are preserved (both kept) — the solver reports that as a CONFLICT.
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub struct Fact {
130    /// The atom this fact pins down.
131    pub atom: AtomId,
132    /// The asserted truth value.
133    pub value: Value,
134    /// Where it came from (for the report).
135    pub origin: Origin,
136    /// `true` for an `ASSUME` (a *soft*, retractable hypothesis). A soft fact
137    /// behaves like a normal fact in the forward pass, but when the assumptions
138    /// cannot all hold the solver may drop it (and only it) to explain the
139    /// contradiction — a `FACT`/`NOT` is never retractable.
140    pub soft: bool,
141}
142
143/// An `Impossible` clause: the listed literals cannot all hold simultaneously.
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub struct Clause {
146    /// The literals that cannot all hold at once (an `Impossible([...])`).
147    pub lits: Vec<Lit>,
148    /// Where it came from (for the report).
149    pub origin: Origin,
150}
151
152/// A forward-chaining rule (from `RULE`): if all antecedent literals hold, derive
153/// the consequent literals.
154#[derive(Debug, Clone, PartialEq, Eq)]
155pub struct Rule {
156    /// Literals that must all hold for the rule to fire.
157    pub antecedent: Vec<Lit>,
158    /// Literals derived (asserted) when the antecedent holds.
159    pub consequent: Vec<Lit>,
160    /// Where it came from (for the report).
161    pub origin: Origin,
162}
163
164/// A `CHECK` query.
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub struct Check {
167    /// Restrict the report to this subject; `None` means check everything.
168    pub subject: Option<String>,
169    /// `true` runs the backward (all-SAT) pass to detect UNDERDETERMINED.
170    pub bidirectional: bool,
171}
172
173/// The compiled IR: the solver's input.
174#[derive(Debug, Clone, PartialEq, Eq)]
175pub struct Compiled {
176    /// Indexed by [`AtomId`]; canonically sorted.
177    pub atoms: Vec<AtomKey>,
178    /// Confident assertions from `FACT`/`NOT`.
179    pub facts: Vec<Fact>,
180    /// `Impossible` clauses (desugared premises + the built-in non-contradiction).
181    pub clauses: Vec<Clause>,
182    /// Forward-chaining rules from `RULE`.
183    pub rules: Vec<Rule>,
184    /// `CHECK` queries.
185    pub checks: Vec<Check>,
186    /// Imports seen but not yet resolved (only populated by [`compile_source`];
187    /// [`compile`] resolves them, leaving this empty).
188    pub pending_imports: Vec<String>,
189}
190
191/// Anything that can go wrong while compiling (and resolving imports).
192#[derive(Debug, Error, PartialEq, Eq)]
193pub enum CompileError {
194    /// A source failed to parse; carries the file label and the parser message.
195    #[error("parse error in {file}: {message}")]
196    Parse {
197        /// The source label that failed to parse.
198        file: String,
199        /// The parser's error message.
200        message: String,
201    },
202    /// A name was reused with a different body *within the same source*.
203    #[error("'{name}' redefined with a different body")]
204    PremiseRedefinition {
205        /// The clashing premise/rule name.
206        name: String,
207    },
208    /// An `IMPORT` target could not be loaded by the [`Resolver`].
209    #[error("import not found: {0}")]
210    ImportNotFound(String),
211    /// Imports form a cycle (a source transitively imports itself).
212    #[error("circular import: {0}")]
213    CircularImport(String),
214    /// A `RULE` used `OR` in its `THEN`: forward chaining cannot derive a
215    /// disjunction (it would not know which literal to assert). Model it as a
216    /// `PREMISE` constraint instead.
217    #[error("rule '{name}' cannot derive a disjunction (OR in THEN); use a PREMISE instead")]
218    RuleDisjunctiveConsequent {
219        /// The offending rule name.
220        name: String,
221    },
222}
223
224// --- raw (key-based) intermediate, before interning ------------------------
225// While accumulating we key everything by `AtomKey` (the owned triple) rather
226// than by `AtomId`, because ids only become stable once *all* sources are merged
227// and the atom set is sorted in `finalize`. These mirror the public IR types but
228// hold keys instead of ids.
229
230/// A literal keyed by atom identity (pre-interning counterpart of [`Lit`]).
231#[derive(Clone)]
232struct RawLit {
233    key: AtomKey,
234    negated: bool,
235}
236
237/// A fact keyed by atom identity (pre-interning counterpart of [`Fact`]).
238struct RawFact {
239    key: AtomKey,
240    value: Value,
241    origin: Origin,
242    soft: bool,
243}
244
245/// A clause keyed by atom identity (pre-interning counterpart of [`Clause`]).
246struct RawClause {
247    lits: Vec<RawLit>,
248    origin: Origin,
249}
250
251/// A rule keyed by atom identity (pre-interning counterpart of [`Rule`]).
252struct RawRule {
253    antecedent: Vec<RawLit>,
254    consequent: Vec<RawLit>,
255    origin: Origin,
256}
257
258// --- compiler --------------------------------------------------------------
259
260/// Accumulates statements from one or more sources, then interns + emits the IR.
261#[derive(Default)]
262pub struct Compiler {
263    keys: BTreeSet<AtomKey>,
264    facts: Vec<RawFact>,
265    clauses: Vec<RawClause>,
266    rules: Vec<RawRule>,
267    checks: Vec<Check>,
268    pending_imports: Vec<String>,
269    /// (source, name) → content hash of its body, for redefinition detection.
270    /// Scoped per source: premise/rule names are labels, not global identifiers,
271    /// so different files (domains) may reuse a name. A clash is only an error
272    /// *within the same source*.
273    defined: BTreeMap<(String, String), String>,
274    /// dedup of identical clauses by canonical content hash.
275    clause_sigs: BTreeSet<String>,
276    /// dedup of identical facts by (key, value).
277    fact_sigs: BTreeSet<String>,
278}
279
280impl Compiler {
281    /// A fresh, empty compiler.
282    pub fn new() -> Self {
283        Self::default()
284    }
285
286    /// Parse one source and accumulate its statements. `source` is a label used
287    /// in provenance (e.g. a file name or `"<root>"`).
288    pub fn add_source(&mut self, source: &str, src: &str) -> Result<(), CompileError> {
289        let program = elenchus_parser::parse(src).map_err(|e| CompileError::Parse {
290            file: source.to_string(),
291            message: e.message,
292        })?;
293        for stmt in &program.statements {
294            self.add_statement(source, stmt)?;
295        }
296        Ok(())
297    }
298
299    /// Resolve and flat-merge `path` and its transitive imports.
300    /// `visited` holds content hashes already merged (dedup); `stack` holds the
301    /// hashes on the current path (cycle detection).
302    fn load_recursive<R: Resolver>(
303        &mut self,
304        path: &str,
305        resolver: &R,
306        visited: &mut BTreeSet<String>,
307        stack: &mut Vec<String>,
308    ) -> Result<(), CompileError> {
309        let content = resolver.load(path)?;
310        let hash = hash_hex(content.as_bytes());
311        if visited.contains(&hash) {
312            return Ok(()); // already merged — idempotent
313        }
314        if stack.contains(&hash) {
315            return Err(CompileError::CircularImport(path.to_string()));
316        }
317        stack.push(hash.clone());
318
319        let program = elenchus_parser::parse(&content).map_err(|e| CompileError::Parse {
320            file: path.to_string(),
321            message: e.message,
322        })?;
323        for stmt in &program.statements {
324            if let Statement::Import(p) = stmt {
325                let resolved = resolver.resolve(path, p.data);
326                self.load_recursive(&resolved, resolver, visited, stack)?;
327            } else {
328                self.add_statement(path, stmt)?;
329            }
330        }
331
332        stack.pop();
333        visited.insert(hash);
334        Ok(())
335    }
336
337    /// Route one statement to the right accumulator (facts, checks, named
338    /// constructs); `IMPORT` is only recorded as pending here (see [`compile`]
339    /// / [`load_recursive`] for actual resolution).
340    fn add_statement(&mut self, source: &str, stmt: &Statement) -> Result<(), CompileError> {
341        match stmt {
342            Statement::Import(path) => {
343                self.pending_imports.push(path.data.to_string());
344            }
345            Statement::Fact(a) => self.add_fact(source, a, Value::True, "FACT", false),
346            Statement::Negation(a) => self.add_fact(source, a, Value::False, "NOT", false),
347            Statement::Assume(l) => {
348                let value = if l.data.negated {
349                    Value::False
350                } else {
351                    Value::True
352                };
353                // A soft assertion shares the FACT accumulator; the atom is the
354                // literal's atom, the polarity its `NOT`, and `soft` marks it
355                // retractable. The span is the whole `ASSUME` line.
356                let located = elenchus_parser::Located {
357                    data: l.data.atom.clone(),
358                    span: l.span,
359                };
360                self.add_fact(source, &located, value, "ASSUME", true);
361            }
362            Statement::Check {
363                subject,
364                bidirectional,
365            } => self.checks.push(Check {
366                subject: subject.as_ref().map(|s| s.data.to_string()),
367                bidirectional: *bidirectional,
368            }),
369            Statement::Premise { name, body } => {
370                let line = name.span.location_line();
371                self.add_named(source, name.data, line, body, false)?;
372            }
373            Statement::Rule { name, body } => {
374                let line = name.span.location_line();
375                self.add_named(source, name.data, line, body, true)?;
376            }
377        }
378        Ok(())
379    }
380
381    /// Record an atom identity in the shared universe (deduped by the `BTreeSet`).
382    fn intern(&mut self, key: &AtomKey) {
383        if !self.keys.contains(key) {
384            self.keys.insert(key.clone());
385        }
386    }
387
388    /// Accumulate a `FACT`/`NOT`; exact duplicates (same key+value+kind) are
389    /// dropped as idempotent, while a `FACT` and a `NOT` on the same atom are
390    /// both kept so the solver can report the CONFLICT.
391    fn add_fact(
392        &mut self,
393        source: &str,
394        a: &elenchus_parser::Located<Atom>,
395        value: Value,
396        kind: &'static str,
397        soft: bool,
398    ) {
399        let key = AtomKey::from_atom(&a.data);
400        self.intern(&key);
401        let sig = alloc::format!(
402            "{}|{}|{}|{}",
403            key_sig(&key),
404            matches!(value, Value::True) as u8,
405            kind,
406            "" // facts dedup ignores line; identical FACT twice is idempotent
407        );
408        if !self.fact_sigs.insert(sig) {
409            return; // exact duplicate fact — idempotent
410        }
411        self.facts.push(RawFact {
412            key,
413            value,
414            origin: Origin {
415                source: source.to_string(),
416                line: a.span.location_line(),
417                premise: None,
418                kind,
419            },
420            soft,
421        });
422    }
423
424    /// Handle a named construct (`PREMISE` or `RULE`). `is_rule` selects derivation
425    /// vs checking. Returns an error on redefinition with a different body.
426    fn add_named(
427        &mut self,
428        source: &str,
429        name: &str,
430        line: u32,
431        body: &Body,
432        is_rule: bool,
433    ) -> Result<(), CompileError> {
434        let body_hash = hash_hex(canonical_body(name, body, is_rule).as_bytes());
435        let key = (source.to_string(), name.to_string());
436        match self.defined.get(&key) {
437            Some(prev) if *prev == body_hash => return Ok(()), // identical → idempotent
438            Some(_) => {
439                // Same name + different body *in the same source* — a real mistake.
440                return Err(CompileError::PremiseRedefinition {
441                    name: name.to_string(),
442                });
443            }
444            None => {
445                self.defined.insert(key, body_hash);
446            }
447        }
448
449        if is_rule {
450            // RULE always has an implication body (guaranteed by the grammar).
451            if let Body::Impl {
452                antecedent,
453                ante_conn,
454                consequent,
455                cons_conn,
456            } = body
457            {
458                // A rule *derives* its consequent; an `OR` consequent is not a
459                // single fact to assert, so reject it (use a PREMISE instead).
460                if *cons_conn == Conn::Or {
461                    return Err(CompileError::RuleDisjunctiveConsequent {
462                        name: name.to_string(),
463                    });
464                }
465                let (ante, cons) = (raw_lits(antecedent), raw_lits(consequent));
466                for l in ante.iter().chain(cons.iter()) {
467                    self.intern(&l.key);
468                }
469                let origin = self.origin(source, line, Some(name), "RULE");
470                match ante_conn {
471                    // a ∧ b → C : one rule firing on the whole antecedent.
472                    Conn::And => self.rules.push(RawRule {
473                        antecedent: ante,
474                        consequent: cons,
475                        origin,
476                    }),
477                    // (a ∨ b) → C == (a → C) ∧ (b → C): one rule per antecedent.
478                    Conn::Or => {
479                        for a in &ante {
480                            self.rules.push(RawRule {
481                                antecedent: vec![a.clone()],
482                                consequent: cons.clone(),
483                                origin: origin.clone(),
484                            });
485                        }
486                    }
487                }
488            }
489            return Ok(());
490        }
491
492        match body {
493            Body::List { op, atoms } => {
494                let keys: Vec<AtomKey> =
495                    atoms.iter().map(|a| AtomKey::from_atom(&a.data)).collect();
496                for k in &keys {
497                    self.intern(k);
498                }
499                let kind = list_kind(*op);
500                let origin = self.origin(source, line, Some(name), kind);
501                match op {
502                    // EXCLUSIVE / FORBIDS: "at most one" → pairwise Impossible([a_i, a_j]).
503                    ListOp::Exclusive | ListOp::Forbids => {
504                        self.emit_pairwise(&keys, &origin);
505                    }
506                    // ONEOF: pairwise (at most one) + at-least-one.
507                    ListOp::OneOf => {
508                        self.emit_pairwise(&keys, &origin);
509                        self.emit_at_least_one(&keys, &origin);
510                    }
511                    // ATLEAST: Impossible([NOT a_1, …, NOT a_n]).
512                    ListOp::AtLeast => {
513                        self.emit_at_least_one(&keys, &origin);
514                    }
515                }
516            }
517            Body::Impl {
518                antecedent,
519                ante_conn,
520                consequent,
521                cons_conn,
522            } => {
523                // Implication A → C as `Impossible(A_true ∧ ¬C)`. We group each
524                // side by its connective and emit one clause per (ante × cons)
525                // group pair — a uniform rule covering all AND/OR combinations:
526                //   AND-ante → all its literals share every clause;
527                //   OR-ante  → one clause per literal;
528                //   AND-cons → one clause per (negated) literal;
529                //   OR-cons  → all its (negated) literals share every clause.
530                let ante = raw_lits(antecedent);
531                let cons = raw_lits(consequent);
532                for l in ante.iter().chain(cons.iter()) {
533                    self.intern(&l.key);
534                }
535                let origin = self.origin(source, line, Some(name), "PREMISE");
536
537                let ante_groups: Vec<Vec<RawLit>> = match ante_conn {
538                    Conn::And => vec![ante.clone()],
539                    Conn::Or => ante.iter().map(|l| vec![l.clone()]).collect(),
540                };
541                let cons_groups: Vec<Vec<RawLit>> = match cons_conn {
542                    Conn::And => cons.iter().map(|l| vec![l.clone()]).collect(),
543                    Conn::Or => vec![cons.clone()],
544                };
545                for ag in &ante_groups {
546                    for cg in &cons_groups {
547                        let mut lits = ag.clone();
548                        for c in cg {
549                            lits.push(RawLit {
550                                key: c.key.clone(),
551                                negated: !c.negated,
552                            });
553                        }
554                        self.push_clause(lits, origin.clone());
555                    }
556                }
557            }
558        }
559        Ok(())
560    }
561
562    /// Emit "at most one TRUE" as one `Impossible([a_i, a_j])` per unordered
563    /// pair. Pairwise (not a single big clause) because `Impossible([a,b,c])`
564    /// only forbids *all three* together — it would still allow two.
565    fn emit_pairwise(&mut self, keys: &[AtomKey], origin: &Origin) {
566        for i in 0..keys.len() {
567            for j in (i + 1)..keys.len() {
568                let lits = vec![
569                    RawLit {
570                        key: keys[i].clone(),
571                        negated: false,
572                    },
573                    RawLit {
574                        key: keys[j].clone(),
575                        negated: false,
576                    },
577                ];
578                self.push_clause(lits, origin.clone());
579            }
580        }
581    }
582
583    /// Emit "at least one TRUE" as a single `Impossible([NOT a_1, …, NOT a_n])`
584    /// — it is impossible for all of them to be false at once.
585    fn emit_at_least_one(&mut self, keys: &[AtomKey], origin: &Origin) {
586        let lits = keys
587            .iter()
588            .map(|k| RawLit {
589                key: k.clone(),
590                negated: true,
591            })
592            .collect();
593        self.push_clause(lits, origin.clone());
594    }
595
596    /// Append a clause unless an identical one (by canonical [`clause_sig`]) is
597    /// already present — `P ∧ P ≡ P`, so dedup keeps the IR minimal.
598    fn push_clause(&mut self, lits: Vec<RawLit>, origin: Origin) {
599        let sig = clause_sig(&lits);
600        if self.clause_sigs.insert(sig) {
601            self.clauses.push(RawClause { lits, origin });
602        }
603        // else: identical clause already present — idempotent.
604    }
605
606    /// Build an [`Origin`] for provenance from the current source/line/name.
607    fn origin(&self, source: &str, line: u32, premise: Option<&str>, kind: &'static str) -> Origin {
608        Origin {
609            source: source.to_string(),
610            line,
611            premise: premise.map(|s| s.to_string()),
612            kind,
613        }
614    }
615
616    /// Intern all atoms (canonical sort), then lower the raw IR to ids.
617    pub fn finalize(self) -> Compiled {
618        let atoms: Vec<AtomKey> = self.keys.into_iter().collect(); // BTreeSet → sorted
619        let mut id_of: BTreeMap<AtomKey, AtomId> = BTreeMap::new();
620        for (i, k) in atoms.iter().enumerate() {
621            id_of.insert(k.clone(), i as AtomId);
622        }
623        let lower = |l: &RawLit| Lit {
624            atom: id_of[&l.key],
625            negated: l.negated,
626        };
627
628        let facts = self
629            .facts
630            .into_iter()
631            .map(|f| Fact {
632                atom: id_of[&f.key],
633                value: f.value,
634                origin: f.origin,
635                soft: f.soft,
636            })
637            .collect();
638        let clauses = self
639            .clauses
640            .into_iter()
641            .map(|c| Clause {
642                lits: c.lits.iter().map(lower).collect(),
643                origin: c.origin,
644            })
645            .collect();
646        let rules = self
647            .rules
648            .into_iter()
649            .map(|r| Rule {
650                antecedent: r.antecedent.iter().map(lower).collect(),
651                consequent: r.consequent.iter().map(lower).collect(),
652                origin: r.origin,
653            })
654            .collect();
655
656        Compiled {
657            atoms,
658            facts,
659            clauses,
660            rules,
661            checks: self.checks,
662            pending_imports: self.pending_imports,
663        }
664    }
665}
666
667/// Convenience: compile a single source into the IR. `IMPORT`s are recorded as
668/// pending, not resolved (use [`compile`] with a [`Resolver`] to resolve them).
669pub fn compile_source(source: &str, src: &str) -> Result<Compiled, CompileError> {
670    let mut c = Compiler::new();
671    c.add_source(source, src)?;
672    Ok(c.finalize())
673}
674
675// --- import resolution (source-agnostic) -----------------------------------
676
677/// Resolves `IMPORT "path"` to source text. The engine is source-agnostic: it
678/// consumes strings, so a file is merely one backing store. Mirrors
679/// vsm-grammar's `SourceResolver`.
680pub trait Resolver {
681    /// Load the raw source text for a resolved path.
682    fn load(&self, path: &str) -> Result<String, CompileError>;
683
684    /// Normalize `relative` against the importing source `base`.
685    /// Default: paths are absolute names, returned unchanged.
686    fn resolve(&self, _base: &str, relative: &str) -> String {
687        relative.to_string()
688    }
689}
690
691/// An in-memory resolver: serves sources from a name → content map. Pure
692/// `no_std`. Mirrors vsm-grammar's `MemoryResolver`.
693#[derive(Default)]
694pub struct MemoryResolver {
695    sources: BTreeMap<String, String>,
696}
697
698impl MemoryResolver {
699    /// An empty resolver with no sources.
700    pub fn new() -> Self {
701        Self::default()
702    }
703
704    /// Register `content` under `path`; returns `&mut self` for chaining.
705    pub fn add(&mut self, path: &str, content: &str) -> &mut Self {
706        self.sources.insert(path.to_string(), content.to_string());
707        self
708    }
709}
710
711impl Resolver for MemoryResolver {
712    fn load(&self, path: &str) -> Result<String, CompileError> {
713        self.sources
714            .get(path)
715            .cloned()
716            .ok_or_else(|| CompileError::ImportNotFound(path.to_string()))
717    }
718}
719
720/// A filesystem-backed resolver. Mirrors vsm-grammar's `FileResolver`:
721/// relative imports resolve against the importing file's directory, with manual
722/// `..` normalization (no canonicalization, to keep a virtual layout).
723#[cfg(feature = "std")]
724pub struct FileResolver;
725
726#[cfg(feature = "std")]
727impl Resolver for FileResolver {
728    fn load(&self, path: &str) -> Result<String, CompileError> {
729        std::fs::read_to_string(path)
730            .map_err(|e| CompileError::ImportNotFound(alloc::format!("{}: {}", path, e)))
731    }
732
733    fn resolve(&self, base: &str, relative: &str) -> String {
734        use std::path::{Component, Path, PathBuf};
735        let parent = Path::new(base).parent().unwrap_or_else(|| Path::new("."));
736        let joined = parent.join(relative);
737        let mut out = PathBuf::new();
738        for component in joined.components() {
739            match component {
740                Component::ParentDir => {
741                    out.pop();
742                }
743                Component::CurDir => {}
744                c => out.push(c),
745            }
746        }
747        // Normalize to forward slashes so resolved paths (and therefore the
748        // provenance recorded in the IR) are identical on Windows and Unix.
749        // Windows `std::fs` accepts `/` just fine.
750        out.to_string_lossy().replace('\\', "/")
751    }
752}
753
754/// Compile a root source and all its transitive `IMPORT`s into one IR.
755///
756/// Imports are flat-merged into a single shared atom universe (atoms unify by
757/// identity across files). Sources are content-addressed (sha256): a source
758/// already merged is skipped (dedup), and an import cycle is an error.
759///
760/// Premise/rule names are per-source labels, not global identifiers: different
761/// files (domains) may reuse a name, and the report qualifies them by source. A
762/// name reused with a different body is an error only *within the same source*.
763pub fn compile<R: Resolver>(root: &str, resolver: &R) -> Result<Compiled, CompileError> {
764    let mut c = Compiler::new();
765    let mut visited = BTreeSet::new();
766    let mut stack = Vec::new();
767    c.load_recursive(root, resolver, &mut visited, &mut stack)?;
768    Ok(c.finalize())
769}
770
771// --- helpers ---------------------------------------------------------------
772
773/// Lower parsed, located literals to key-based [`RawLit`]s (drops spans).
774fn raw_lits(lits: &[elenchus_parser::Located<Literal>]) -> Vec<RawLit> {
775    lits.iter()
776        .map(|l| RawLit {
777            key: AtomKey::from_atom(&l.data.atom),
778            negated: l.data.negated,
779        })
780        .collect()
781}
782
783/// The surface keyword for a list op, used as [`Origin::kind`] in the report.
784fn list_kind(op: ListOp) -> &'static str {
785    match op {
786        ListOp::Exclusive => "EXCLUSIVE",
787        ListOp::Forbids => "FORBIDS",
788        ListOp::OneOf => "ONEOF",
789        ListOp::AtLeast => "ATLEAST",
790    }
791}
792
793/// Stable `subject|predicate|object` string for an atom key (the unit from which
794/// clause/fact/body signatures are built).
795fn key_sig(k: &AtomKey) -> String {
796    alloc::format!(
797        "{}|{}|{}",
798        k.subject,
799        k.predicate,
800        k.object.as_deref().unwrap_or("")
801    )
802}
803
804/// Canonical, order-independent signature of a clause's literals (for dedup).
805fn clause_sig(lits: &[RawLit]) -> String {
806    let mut parts: Vec<String> = lits
807        .iter()
808        .map(|l| alloc::format!("{}|{}", key_sig(&l.key), l.negated as u8))
809        .collect();
810    parts.sort();
811    parts.dedup();
812    parts.join(";")
813}
814
815/// Canonical body string for a named construct, hashed for redefinition checks.
816fn canonical_body(name: &str, body: &Body, is_rule: bool) -> String {
817    let mut s = String::new();
818    let _ = write!(s, "{}|{}|", if is_rule { "RULE" } else { "PREMISE" }, name);
819    match body {
820        Body::List { op, atoms } => {
821            let _ = write!(s, "LIST|{}|", list_kind(*op));
822            let mut keys: Vec<String> = atoms
823                .iter()
824                .map(|a| key_sig(&AtomKey::from_atom(&a.data)))
825                .collect();
826            keys.sort();
827            s.push_str(&keys.join(";"));
828        }
829        Body::Impl {
830            antecedent,
831            ante_conn,
832            consequent,
833            cons_conn,
834        } => {
835            let conn = |c: &Conn| if *c == Conn::Or { "OR" } else { "AND" };
836            s.push_str("IMPL|ANTE|");
837            s.push_str(conn(ante_conn));
838            s.push('|');
839            s.push_str(&lit_sigs(antecedent));
840            s.push_str("|CONS|");
841            s.push_str(conn(cons_conn));
842            s.push('|');
843            s.push_str(&lit_sigs(consequent));
844        }
845    }
846    s
847}
848
849/// Sorted `key|negated` signature of a literal list (order-independent), used
850/// inside [`canonical_body`] so reordering a body does not look like a redefinition.
851fn lit_sigs(lits: &[elenchus_parser::Located<Literal>]) -> String {
852    let mut parts: Vec<String> = lits
853        .iter()
854        .map(|l| {
855            alloc::format!(
856                "{}|{}",
857                key_sig(&AtomKey::from_atom(&l.data.atom)),
858                l.data.negated as u8
859            )
860        })
861        .collect();
862    parts.sort();
863    parts.join(";")
864}
865
866#[cfg(test)]
867mod tests {
868    use super::*;
869
870    fn key(subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
871        AtomKey {
872            subject: subject.to_string(),
873            predicate: predicate.to_string(),
874            object: object.map(|o| o.to_string()),
875        }
876    }
877
878    fn id(c: &Compiled, k: &AtomKey) -> AtomId {
879        c.atoms.iter().position(|a| a == k).unwrap() as AtomId
880    }
881
882    #[test]
883    fn exclusive_unfolds_pairwise() {
884        let src = r#"
885        PREMISE e:
886            EXCLUSIVE
887                x a
888                x b
889                x c
890        "#;
891        let c = compile_source("<t>", src).unwrap();
892        // C(3,2) = 3 clauses, each of 2 positive literals.
893        assert_eq!(c.clauses.len(), 3);
894        for cl in &c.clauses {
895            assert_eq!(cl.lits.len(), 2);
896            assert!(cl.lits.iter().all(|l| !l.negated));
897        }
898    }
899
900    #[test]
901    fn implication_negates_consequent() {
902        // WHEN x a THEN x b  ==  Impossible([x a, NOT x b])
903        let src = r#"
904        PREMISE r:
905            WHEN x a
906            THEN x b
907        "#;
908        let c = compile_source("<t>", src).unwrap();
909        assert_eq!(c.clauses.len(), 1);
910        let cl = &c.clauses[0];
911        assert_eq!(cl.lits.len(), 2);
912        let a = id(&c, &key("x", "a", None));
913        let b = id(&c, &key("x", "b", None));
914        assert!(cl.lits.contains(&Lit {
915            atom: a,
916            negated: false
917        }));
918        assert!(cl.lits.contains(&Lit {
919            atom: b,
920            negated: true
921        }));
922    }
923
924    #[test]
925    fn negated_consequent_flips_to_positive() {
926        // THEN NOT x b  →  NOT(NOT x b) = x b positive inside Impossible
927        let src = r#"
928        PREMISE r:
929            WHEN x a
930            THEN NOT x b
931        "#;
932        let c = compile_source("<t>", src).unwrap();
933        let b = id(&c, &key("x", "b", None));
934        assert!(c.clauses[0].lits.contains(&Lit {
935            atom: b,
936            negated: false
937        }));
938    }
939
940    #[test]
941    fn consequent_or_is_one_clause_with_all_negated() {
942        // WHEN x p THEN x a OR x b  ==  Impossible([x p, NOT x a, NOT x b])
943        let src = r#"
944        PREMISE r:
945            WHEN x p
946            THEN x a
947            OR x b
948        "#;
949        let c = compile_source("<t>", src).unwrap();
950        assert_eq!(c.clauses.len(), 1);
951        let cl = &c.clauses[0];
952        assert_eq!(cl.lits.len(), 3);
953        let p = id(&c, &key("x", "p", None));
954        let a = id(&c, &key("x", "a", None));
955        let b = id(&c, &key("x", "b", None));
956        assert!(cl.lits.contains(&Lit {
957            atom: p,
958            negated: false
959        }));
960        assert!(cl.lits.contains(&Lit {
961            atom: a,
962            negated: true
963        }));
964        assert!(cl.lits.contains(&Lit {
965            atom: b,
966            negated: true
967        }));
968    }
969
970    #[test]
971    fn antecedent_or_is_one_clause_per_disjunct() {
972        // WHEN x a OR x b THEN x c
973        //   == Impossible([x a, NOT x c]) ∧ Impossible([x b, NOT x c])
974        let src = r#"
975        PREMISE r:
976            WHEN x a
977            OR x b
978            THEN x c
979        "#;
980        let c = compile_source("<t>", src).unwrap();
981        assert_eq!(c.clauses.len(), 2);
982        let a = id(&c, &key("x", "a", None));
983        let b = id(&c, &key("x", "b", None));
984        let cc = id(&c, &key("x", "c", None));
985        // every clause has exactly two lits and carries NOT c
986        for cl in &c.clauses {
987            assert_eq!(cl.lits.len(), 2);
988            assert!(cl.lits.contains(&Lit {
989                atom: cc,
990                negated: true
991            }));
992        }
993        let has = |atom| {
994            c.clauses.iter().any(|cl| {
995                cl.lits.contains(&Lit {
996                    atom,
997                    negated: false,
998                })
999            })
1000        };
1001        assert!(has(a) && has(b));
1002    }
1003
1004    #[test]
1005    fn antecedent_or_with_consequent_or_distributes() {
1006        // (a ∨ b) → (c ∨ d): Impossible([a,¬c,¬d]) ∧ Impossible([b,¬c,¬d])
1007        let src = r#"
1008        PREMISE r:
1009            WHEN x a
1010            OR x b
1011            THEN x c
1012            OR x d
1013        "#;
1014        let c = compile_source("<t>", src).unwrap();
1015        assert_eq!(c.clauses.len(), 2);
1016        for cl in &c.clauses {
1017            assert_eq!(cl.lits.len(), 3);
1018        }
1019    }
1020
1021    #[test]
1022    fn rule_with_or_antecedent_splits_into_two_rules() {
1023        // (a ∨ b) → c derives c whenever either fires: two single-antecedent rules.
1024        let src = r#"
1025        RULE r:
1026            WHEN x a
1027            OR x b
1028            THEN x c
1029        "#;
1030        let c = compile_source("<t>", src).unwrap();
1031        assert_eq!(c.rules.len(), 2);
1032        assert!(
1033            c.rules
1034                .iter()
1035                .all(|r| r.antecedent.len() == 1 && r.consequent.len() == 1)
1036        );
1037    }
1038
1039    #[test]
1040    fn rule_with_or_consequent_is_rejected() {
1041        // A rule cannot derive a disjunction — must be a PREMISE.
1042        let src = r#"
1043        RULE r:
1044            WHEN x a
1045            THEN x b
1046            OR x c
1047        "#;
1048        let err = compile_source("<t>", src).unwrap_err();
1049        assert!(matches!(
1050            err,
1051            CompileError::RuleDisjunctiveConsequent { .. }
1052        ));
1053    }
1054
1055    #[test]
1056    fn oneof_is_pairwise_plus_at_least_one() {
1057        let src = r#"
1058        PREMISE o:
1059            ONEOF
1060                x a
1061                x b
1062        "#;
1063        let c = compile_source("<t>", src).unwrap();
1064        // pairwise C(2,2)=1 + 1 at-least-one = 2 clauses
1065        assert_eq!(c.clauses.len(), 2);
1066        // the at-least-one clause is the all-negated one
1067        assert!(c.clauses.iter().any(|cl| cl.lits.iter().all(|l| l.negated)));
1068    }
1069
1070    #[test]
1071    fn atleast_is_one_negated_clause() {
1072        let src = r#"
1073        PREMISE a:
1074            ATLEAST
1075                x a
1076                x b
1077                x c
1078        "#;
1079        let c = compile_source("<t>", src).unwrap();
1080        assert_eq!(c.clauses.len(), 1);
1081        assert_eq!(c.clauses[0].lits.len(), 3);
1082        assert!(c.clauses[0].lits.iter().all(|l| l.negated));
1083    }
1084
1085    #[test]
1086    fn rules_are_separate_from_clauses() {
1087        let src = r#"
1088        RULE needs:
1089            WHEN x a
1090            THEN x b
1091        "#;
1092        let c = compile_source("<t>", src).unwrap();
1093        assert_eq!(c.clauses.len(), 0);
1094        assert_eq!(c.rules.len(), 1);
1095        assert_eq!(c.rules[0].antecedent.len(), 1);
1096        assert_eq!(c.rules[0].consequent.len(), 1);
1097    }
1098
1099    #[test]
1100    fn atoms_are_canonically_sorted() {
1101        let src = r#"
1102        FACT z z
1103        FACT a a
1104        FACT m m
1105        "#;
1106        let c = compile_source("<t>", src).unwrap();
1107        let mut sorted = c.atoms.clone();
1108        sorted.sort();
1109        assert_eq!(c.atoms, sorted);
1110    }
1111
1112    #[test]
1113    fn duplicate_premise_is_idempotent() {
1114        let src = r#"
1115        PREMISE e:
1116            EXCLUSIVE
1117                x a
1118                x b
1119        PREMISE e:
1120            EXCLUSIVE
1121                x a
1122                x b
1123        "#;
1124        let c = compile_source("<t>", src).unwrap();
1125        assert_eq!(c.clauses.len(), 1);
1126    }
1127
1128    #[test]
1129    fn redefinition_with_different_body_errors() {
1130        let src = r#"
1131        PREMISE e:
1132            EXCLUSIVE
1133                x a
1134                x b
1135        PREMISE e:
1136            EXCLUSIVE
1137                x a
1138                x c
1139        "#;
1140        let err = compile_source("<t>", src).unwrap_err();
1141        assert_eq!(
1142            err,
1143            CompileError::PremiseRedefinition {
1144                name: "e".to_string()
1145            }
1146        );
1147    }
1148
1149    #[test]
1150    fn duplicate_fact_is_idempotent() {
1151        let c = compile_source("<t>", "FACT x a\nFACT x a\n").unwrap();
1152        assert_eq!(c.facts.len(), 1);
1153    }
1154
1155    #[test]
1156    fn conflicting_facts_are_both_kept() {
1157        // FACT X + NOT X is a CONFLICT for the solver, not a compile error.
1158        let c = compile_source("<t>", "FACT x a\nNOT x a\n").unwrap();
1159        assert_eq!(c.facts.len(), 2);
1160    }
1161
1162    #[test]
1163    fn import_is_recorded_pending() {
1164        let c = compile_source("<t>", "IMPORT \"physics.vrf\"\nFACT x a\n").unwrap();
1165        assert_eq!(c.pending_imports, vec!["physics.vrf".to_string()]);
1166    }
1167
1168    #[test]
1169    fn import_flat_merges_and_atoms_unify() {
1170        // The library constrains `Engine.X has fuel`; the main file asserts it.
1171        // After merge the atom must be ONE shared id (unification across files).
1172        let mut r = MemoryResolver::new();
1173        r.add(
1174            "lib.vrf",
1175            r#"
1176        PREMISE needs_fuel:
1177            WHEN Engine.X has engine
1178            THEN Engine.X has fuel
1179        "#,
1180        );
1181        r.add(
1182            "main.vrf",
1183            r#"
1184        IMPORT "lib.vrf"
1185        FACT Engine.X has engine
1186        FACT Engine.X has fuel
1187        "#,
1188        );
1189        let c = compile("main.vrf", &r).unwrap();
1190        assert!(c.pending_imports.is_empty());
1191        assert_eq!(c.clauses.len(), 1); // the imported premise
1192        assert_eq!(c.facts.len(), 2);
1193
1194        // `Engine.X has fuel` from the FACT and from the imported premise share an id.
1195        let fuel = key("Engine.X", "has", Some("fuel"));
1196        let fuel_id = id(&c, &fuel);
1197        assert!(c.facts.iter().any(|f| f.atom == fuel_id));
1198        assert!(c.clauses[0].lits.iter().any(|l| l.atom == fuel_id));
1199    }
1200
1201    #[test]
1202    fn diamond_import_is_deduped() {
1203        // main → a, b ; a → base ; b → base. base merged once.
1204        let mut r = MemoryResolver::new();
1205        r.add(
1206            "base.vrf",
1207            r#"
1208        PREMISE b:
1209            EXCLUSIVE
1210                x a
1211                x b
1212        "#,
1213        );
1214        r.add("a.vrf", "IMPORT \"base.vrf\"\n");
1215        r.add("c.vrf", "IMPORT \"base.vrf\"\n");
1216        r.add("main.vrf", "IMPORT \"a.vrf\"\nIMPORT \"c.vrf\"\n");
1217        let c = compile("main.vrf", &r).unwrap();
1218        assert_eq!(c.clauses.len(), 1); // base's single clause, not two
1219    }
1220
1221    #[test]
1222    fn circular_import_errors() {
1223        let mut r = MemoryResolver::new();
1224        r.add("a.vrf", "IMPORT \"b.vrf\"\n");
1225        r.add("b.vrf", "IMPORT \"a.vrf\"\n");
1226        let err = compile("a.vrf", &r).unwrap_err();
1227        assert!(matches!(err, CompileError::CircularImport(_)));
1228    }
1229
1230    #[test]
1231    fn missing_import_errors() {
1232        let mut r = MemoryResolver::new();
1233        r.add("main.vrf", "IMPORT \"ghost.vrf\"\n");
1234        let err = compile("main.vrf", &r).unwrap_err();
1235        assert!(matches!(err, CompileError::ImportNotFound(_)));
1236    }
1237
1238    #[test]
1239    fn same_name_across_domains_coexists() {
1240        // Two files may legitimately reuse a premise NAME with different bodies
1241        // (different domains). Names are per-source labels — both premises apply,
1242        // and the report qualifies them by source. NOT a redefinition error.
1243        let mut r = MemoryResolver::new();
1244        r.add(
1245            "physics.vrf",
1246            r#"
1247        PREMISE safety:
1248            EXCLUSIVE
1249                x a
1250                x b
1251        "#,
1252        );
1253        r.add(
1254            "main.vrf",
1255            r#"
1256        IMPORT "physics.vrf"
1257        PREMISE safety:
1258            EXCLUSIVE
1259                x a
1260                x c
1261        "#,
1262        );
1263        let c = compile("main.vrf", &r).unwrap();
1264        assert_eq!(c.clauses.len(), 2); // a-b from physics, a-c from main
1265        assert!(c.clauses.iter().any(|cl| cl.origin.source == "physics.vrf"));
1266        assert!(c.clauses.iter().any(|cl| cl.origin.source == "main.vrf"));
1267    }
1268
1269    #[test]
1270    fn two_libs_same_name_into_one_consumer() {
1271        // A.vrf and B.vrf each define their OWN `x` (different bodies); C imports
1272        // both. Both `x` coexist (per-source labels). Meanwhile the atom they
1273        // share (S has a) unifies into ONE id — names are scoped, atoms are not.
1274        let mut r = MemoryResolver::new();
1275        r.add(
1276            "A.vrf",
1277            r#"
1278        PREMISE x:
1279            EXCLUSIVE
1280                S has a
1281                S has b
1282        "#,
1283        );
1284        r.add(
1285            "B.vrf",
1286            r#"
1287        PREMISE x:
1288            EXCLUSIVE
1289                S has a
1290                S has c
1291        "#,
1292        );
1293        r.add("C.vrf", "IMPORT \"A.vrf\"\nIMPORT \"B.vrf\"\n");
1294        let c = compile("C.vrf", &r).unwrap();
1295
1296        // both `x` premises contributed a clause, kept apart by source
1297        assert_eq!(c.clauses.len(), 2);
1298        assert!(
1299            c.clauses
1300                .iter()
1301                .any(|cl| cl.origin.source == "A.vrf" && cl.origin.premise.as_deref() == Some("x"))
1302        );
1303        assert!(
1304            c.clauses
1305                .iter()
1306                .any(|cl| cl.origin.source == "B.vrf" && cl.origin.premise.as_deref() == Some("x"))
1307        );
1308
1309        // the shared atom `S has a` is a single interned id used by both clauses
1310        let s_a = id(&c, &key("S", "has", Some("a")));
1311        assert!(
1312            c.clauses
1313                .iter()
1314                .filter(|cl| cl.lits.iter().any(|l| l.atom == s_a))
1315                .count()
1316                == 2,
1317            "both clauses must reference the same unified `S has a` atom"
1318        );
1319    }
1320
1321    #[test]
1322    fn redefinition_within_one_source_still_errors() {
1323        // But reusing a name with a different body *inside one source* is a mistake.
1324        let src = r#"
1325        PREMISE e:
1326            EXCLUSIVE
1327                x a
1328                x b
1329        PREMISE e:
1330            EXCLUSIVE
1331                x a
1332                x c
1333        "#;
1334        let err = compile_source("main.vrf", src).unwrap_err();
1335        assert_eq!(
1336            err,
1337            CompileError::PremiseRedefinition {
1338                name: "e".to_string()
1339            }
1340        );
1341    }
1342
1343    #[test]
1344    fn import_demo_examples_resolve() {
1345        let mut r = MemoryResolver::new();
1346        r.add(
1347            "physics.vrf",
1348            include_str!("../../../docs/examples/physics.vrf"),
1349        );
1350        r.add(
1351            "import-demo.vrf",
1352            include_str!("../../../docs/examples/import-demo.vrf"),
1353        );
1354        let c = compile("import-demo.vrf", &r).unwrap();
1355        assert!(c.pending_imports.is_empty());
1356        // physics.vrf: one_path (EXCLUSIVE, 1 clause) + speed_order (impl, 1 clause)
1357        assert_eq!(c.clauses.len(), 2);
1358        // over_200 / over_100 unify between the facts and the imported premise.
1359        let over_100 = id(&c, &key("Motor", "over_100", None));
1360        assert!(c.facts.iter().any(|f| f.atom == over_100));
1361        assert!(
1362            c.clauses
1363                .iter()
1364                .any(|cl| cl.lits.iter().any(|l| l.atom == over_100))
1365        );
1366    }
1367
1368    #[test]
1369    fn creature_example_compiles() {
1370        let src = include_str!("../../../docs/examples/creature.vrf");
1371        let c = compile_source("creature.vrf", src).unwrap();
1372        assert_eq!(c.facts.len(), 2); // flying, warm_blood
1373        assert_eq!(c.rules.len(), 1); // needs_oxygen
1374        assert_eq!(c.checks.len(), 1);
1375        // fly_xor_swim (1) + wings_need_bone (THEN wing AND bone → 2) + no_dual_temp (1) = 4
1376        assert_eq!(c.clauses.len(), 4);
1377        assert_eq!(c.atoms.len(), 7);
1378    }
1379
1380    #[test]
1381    fn forbids_unfolds_pairwise() {
1382        let src = r#"
1383        PREMISE f:
1384            FORBIDS
1385                x a
1386                x b
1387                x c
1388        "#;
1389        let c = compile_source("<t>", src).unwrap();
1390        assert_eq!(c.clauses.len(), 3); // C(3,2), like EXCLUSIVE
1391        assert!(
1392            c.clauses
1393                .iter()
1394                .all(|cl| cl.lits.len() == 2 && cl.lits.iter().all(|l| !l.negated))
1395        );
1396    }
1397
1398    #[test]
1399    fn rule_with_multiple_consequents() {
1400        let src = r#"
1401        RULE r:
1402            WHEN x a
1403            THEN x b
1404            AND  x c
1405        "#;
1406        let c = compile_source("<t>", src).unwrap();
1407        assert_eq!(c.rules.len(), 1);
1408        assert_eq!(c.rules[0].consequent.len(), 2);
1409    }
1410
1411    #[test]
1412    fn negated_antecedent_literal_keeps_polarity() {
1413        // WHEN NOT x a THEN x b  ==  Impossible([NOT x a, NOT x b])
1414        let src = r#"
1415        PREMISE a:
1416            WHEN NOT x a
1417            THEN x b
1418        "#;
1419        let c = compile_source("<t>", src).unwrap();
1420        let xa = id(&c, &key("x", "a", None));
1421        assert!(c.clauses[0].lits.contains(&Lit {
1422            atom: xa,
1423            negated: true
1424        }));
1425    }
1426
1427    #[test]
1428    fn rule_keeps_consequent_negation() {
1429        let src = r#"
1430        RULE r:
1431            WHEN x a
1432            THEN NOT x b
1433        "#;
1434        let c = compile_source("<t>", src).unwrap();
1435        assert!(c.rules[0].consequent[0].negated);
1436    }
1437
1438    #[test]
1439    fn compilation_is_deterministic() {
1440        let src = r#"
1441        PREMISE e:
1442            EXCLUSIVE
1443                z z
1444                a a
1445                m m
1446        FACT q q
1447        "#;
1448        assert_eq!(
1449            compile_source("<t>", src).unwrap(),
1450            compile_source("<t>", src).unwrap()
1451        );
1452    }
1453
1454    #[test]
1455    fn empty_program_compiles_to_empty_ir() {
1456        let c = compile_source("<t>", "// nothing here\n").unwrap();
1457        assert!(c.atoms.is_empty() && c.clauses.is_empty() && c.facts.is_empty());
1458    }
1459
1460    #[test]
1461    fn same_clause_from_two_named_premises_is_deduped() {
1462        // Different names, identical logical content → one clause, no redefinition.
1463        let src = r#"
1464        PREMISE e1:
1465            EXCLUSIVE
1466                x a
1467                x b
1468        PREMISE e2:
1469            EXCLUSIVE
1470                x a
1471                x b
1472        "#;
1473        let c = compile_source("<t>", src).unwrap();
1474        assert_eq!(c.clauses.len(), 1);
1475    }
1476
1477    #[test]
1478    fn object_distinguishes_atom_identity() {
1479        // `x p a` and `x p b` differ only by object → two distinct atoms.
1480        let c = compile_source("<t>", "FACT x p a\nFACT x p b\n").unwrap();
1481        assert_eq!(c.atoms.len(), 2);
1482    }
1483}