elenchus_compiler/
lib.rs

1//! elenchus-compiler — compiles parsed elenchus DSL into a canonical clause IR.
2//!
3//! This crate is **preparation, not solving**. It takes the AST (from
4//! `elenchus-parser`) and produces a deterministic, solver-ready intermediate
5//! representation:
6//!
7//! - **atom interner**: `(subject, predicate, object?)` → dense `u32` id,
8//!   canonically sorted so ids (and any later enumeration) are deterministic;
9//! - **desugaring**: surface CAPS sugar → `Impossible` clauses
10//!   (`EXCLUSIVE` pairwise, `WHEN…THEN` → `Impossible([A, …, NOT C])`, etc.);
11//! - **content-addressing** (sha256, mirroring vsm-guard's CAS): identical
12//!   clauses are deduped (idempotent — `P ∧ P ≡ P`), and a named construct
13//!   redefined with a different body is a `PremiseRedefinition` error.
14//!
15//! The actual reasoning (3-valued forward chaining, SAT, all-SAT, the WARNING
16//! pool, the four results) belongs to the future `elenchus-solver` crate.
17//!
18//! `IMPORT` resolution (a source-agnostic `Resolver`, flat-merge into the shared
19//! atom universe) lands next; for now imports are recorded as pending.
20#![no_std]
21// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
22#![warn(missing_docs)]
23
24extern crate alloc;
25
26#[cfg(feature = "std")]
27extern crate std;
28
29use alloc::collections::{BTreeMap, BTreeSet};
30use alloc::string::{String, ToString};
31use alloc::vec;
32use alloc::vec::Vec;
33use core::fmt::Write as _;
34
35use elenchus_parser::{Atom, Body, ListOp, Literal, Statement};
36use sha2::{Digest, Sha256};
37use thiserror::Error;
38
39// --- content-addressing (mirrors vsm-guard::hashing) -----------------------
40
41/// SHA-256 content addressing. Used only for dedup / redefinition / provenance,
42/// never for namespacing atoms.
43pub fn hash_hex(data: &[u8]) -> String {
44    let mut hasher = Sha256::new();
45    hasher.update(data);
46    let out = hasher.finalize();
47    let mut s = String::with_capacity(out.len() * 2);
48    for b in out {
49        let _ = write!(s, "{:02x}", b);
50    }
51    s
52}
53
54// --- IR types --------------------------------------------------------------
55
56/// Dense atom identifier (also the SAT variable number).
57pub type AtomId = u32;
58
59/// The identity of an atom: the triple `(subject, predicate, object?)`, owned so
60/// it survives across merged sources. Ordering is lexicographic → canonical.
61#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
62pub struct AtomKey {
63    /// The entity the claim is about (owned copy of the parser's `subject`).
64    pub subject: String,
65    /// The relation or property asserted.
66    pub predicate: String,
67    /// Optional object; part of identity, so `has flying` ≠ `has swimming`.
68    pub object: Option<String>,
69}
70
71impl AtomKey {
72    /// Owned copy of a borrowed parser [`Atom`].
73    fn from_atom(a: &Atom) -> Self {
74        AtomKey {
75            subject: a.subject.to_string(),
76            predicate: a.predicate.to_string(),
77            object: a.object.map(|o| o.to_string()),
78        }
79    }
80}
81
82/// A literal as it appears *inside* an `Impossible` clause: an atom, optionally
83/// negated. `negated = true` means the literal is `NOT atom`.
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85pub struct Lit {
86    /// Interned id of the atom (also its SAT variable number).
87    pub atom: AtomId,
88    /// `true` means this literal is `NOT atom` inside the clause.
89    pub negated: bool,
90}
91
92/// A confident truth value. UNKNOWN is the *absence* of a fact, never stored.
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
94pub enum Value {
95    /// The atom is asserted TRUE (from `FACT`).
96    True,
97    /// The atom is asserted FALSE (from `NOT`).
98    False,
99}
100
101/// Where a piece of IR came from — for readable conflict/warning pools.
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct Origin {
104    /// The source label this came from (file name or `"<root>"`/`"<text>"`).
105    pub source: String,
106    /// 1-based line number of the originating statement.
107    pub line: u32,
108    /// The premise/rule name, if it came from a named construct.
109    pub premise: Option<String>,
110    /// Surface kind for the report, e.g. `"FACT"`, `"EXCLUSIVE"`, `"PREMISE"`.
111    pub kind: &'static str,
112}
113
114/// A confident fact (from `FACT` / `NOT`). Conflicting facts on the same atom
115/// are preserved (both kept) — the solver reports that as a CONFLICT.
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub struct Fact {
118    /// The atom this fact pins down.
119    pub atom: AtomId,
120    /// The asserted truth value.
121    pub value: Value,
122    /// Where it came from (for the report).
123    pub origin: Origin,
124}
125
126/// An `Impossible` clause: the listed literals cannot all hold simultaneously.
127#[derive(Debug, Clone, PartialEq, Eq)]
128pub struct Clause {
129    /// The literals that cannot all hold at once (an `Impossible([...])`).
130    pub lits: Vec<Lit>,
131    /// Where it came from (for the report).
132    pub origin: Origin,
133}
134
135/// A forward-chaining rule (from `RULE`): if all antecedent literals hold, derive
136/// the consequent literals.
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub struct Rule {
139    /// Literals that must all hold for the rule to fire.
140    pub antecedent: Vec<Lit>,
141    /// Literals derived (asserted) when the antecedent holds.
142    pub consequent: Vec<Lit>,
143    /// Where it came from (for the report).
144    pub origin: Origin,
145}
146
147/// A `CHECK` query.
148#[derive(Debug, Clone, PartialEq, Eq)]
149pub struct Check {
150    /// Restrict the report to this subject; `None` means check everything.
151    pub subject: Option<String>,
152    /// `true` runs the backward (all-SAT) pass to detect UNDERDETERMINED.
153    pub bidirectional: bool,
154}
155
156/// The compiled IR: the solver's input.
157#[derive(Debug, Clone, PartialEq, Eq)]
158pub struct Compiled {
159    /// Indexed by [`AtomId`]; canonically sorted.
160    pub atoms: Vec<AtomKey>,
161    /// Confident assertions from `FACT`/`NOT`.
162    pub facts: Vec<Fact>,
163    /// `Impossible` clauses (desugared premises + the built-in non-contradiction).
164    pub clauses: Vec<Clause>,
165    /// Forward-chaining rules from `RULE`.
166    pub rules: Vec<Rule>,
167    /// `CHECK` queries.
168    pub checks: Vec<Check>,
169    /// Imports seen but not yet resolved (only populated by [`compile_source`];
170    /// [`compile`] resolves them, leaving this empty).
171    pub pending_imports: Vec<String>,
172}
173
174/// Anything that can go wrong while compiling (and resolving imports).
175#[derive(Debug, Error, PartialEq, Eq)]
176pub enum CompileError {
177    /// A source failed to parse; carries the file label and the parser message.
178    #[error("parse error in {file}: {message}")]
179    Parse {
180        /// The source label that failed to parse.
181        file: String,
182        /// The parser's error message.
183        message: String,
184    },
185    /// A name was reused with a different body *within the same source*.
186    #[error("'{name}' redefined with a different body")]
187    PremiseRedefinition {
188        /// The clashing premise/rule name.
189        name: String,
190    },
191    /// An `IMPORT` target could not be loaded by the [`Resolver`].
192    #[error("import not found: {0}")]
193    ImportNotFound(String),
194    /// Imports form a cycle (a source transitively imports itself).
195    #[error("circular import: {0}")]
196    CircularImport(String),
197}
198
199// --- raw (key-based) intermediate, before interning ------------------------
200// While accumulating we key everything by `AtomKey` (the owned triple) rather
201// than by `AtomId`, because ids only become stable once *all* sources are merged
202// and the atom set is sorted in `finalize`. These mirror the public IR types but
203// hold keys instead of ids.
204
205/// A literal keyed by atom identity (pre-interning counterpart of [`Lit`]).
206#[derive(Clone)]
207struct RawLit {
208    key: AtomKey,
209    negated: bool,
210}
211
212/// A fact keyed by atom identity (pre-interning counterpart of [`Fact`]).
213struct RawFact {
214    key: AtomKey,
215    value: Value,
216    origin: Origin,
217}
218
219/// A clause keyed by atom identity (pre-interning counterpart of [`Clause`]).
220struct RawClause {
221    lits: Vec<RawLit>,
222    origin: Origin,
223}
224
225/// A rule keyed by atom identity (pre-interning counterpart of [`Rule`]).
226struct RawRule {
227    antecedent: Vec<RawLit>,
228    consequent: Vec<RawLit>,
229    origin: Origin,
230}
231
232// --- compiler --------------------------------------------------------------
233
234/// Accumulates statements from one or more sources, then interns + emits the IR.
235#[derive(Default)]
236pub struct Compiler {
237    keys: BTreeSet<AtomKey>,
238    facts: Vec<RawFact>,
239    clauses: Vec<RawClause>,
240    rules: Vec<RawRule>,
241    checks: Vec<Check>,
242    pending_imports: Vec<String>,
243    /// (source, name) → content hash of its body, for redefinition detection.
244    /// Scoped per source: premise/rule names are labels, not global identifiers,
245    /// so different files (domains) may reuse a name. A clash is only an error
246    /// *within the same source*.
247    defined: BTreeMap<(String, String), String>,
248    /// dedup of identical clauses by canonical content hash.
249    clause_sigs: BTreeSet<String>,
250    /// dedup of identical facts by (key, value).
251    fact_sigs: BTreeSet<String>,
252}
253
254impl Compiler {
255    /// A fresh, empty compiler.
256    pub fn new() -> Self {
257        Self::default()
258    }
259
260    /// Parse one source and accumulate its statements. `source` is a label used
261    /// in provenance (e.g. a file name or `"<root>"`).
262    pub fn add_source(&mut self, source: &str, src: &str) -> Result<(), CompileError> {
263        let program = elenchus_parser::parse(src).map_err(|e| CompileError::Parse {
264            file: source.to_string(),
265            message: e.message,
266        })?;
267        for stmt in &program.statements {
268            self.add_statement(source, stmt)?;
269        }
270        Ok(())
271    }
272
273    /// Resolve and flat-merge `path` and its transitive imports.
274    /// `visited` holds content hashes already merged (dedup); `stack` holds the
275    /// hashes on the current path (cycle detection).
276    fn load_recursive<R: Resolver>(
277        &mut self,
278        path: &str,
279        resolver: &R,
280        visited: &mut BTreeSet<String>,
281        stack: &mut Vec<String>,
282    ) -> Result<(), CompileError> {
283        let content = resolver.load(path)?;
284        let hash = hash_hex(content.as_bytes());
285        if visited.contains(&hash) {
286            return Ok(()); // already merged — idempotent
287        }
288        if stack.contains(&hash) {
289            return Err(CompileError::CircularImport(path.to_string()));
290        }
291        stack.push(hash.clone());
292
293        let program = elenchus_parser::parse(&content).map_err(|e| CompileError::Parse {
294            file: path.to_string(),
295            message: e.message,
296        })?;
297        for stmt in &program.statements {
298            if let Statement::Import(p) = stmt {
299                let resolved = resolver.resolve(path, p.data);
300                self.load_recursive(&resolved, resolver, visited, stack)?;
301            } else {
302                self.add_statement(path, stmt)?;
303            }
304        }
305
306        stack.pop();
307        visited.insert(hash);
308        Ok(())
309    }
310
311    /// Route one statement to the right accumulator (facts, checks, named
312    /// constructs); `IMPORT` is only recorded as pending here (see [`compile`]
313    /// / [`load_recursive`] for actual resolution).
314    fn add_statement(&mut self, source: &str, stmt: &Statement) -> Result<(), CompileError> {
315        match stmt {
316            Statement::Import(path) => {
317                self.pending_imports.push(path.data.to_string());
318            }
319            Statement::Fact(a) => self.add_fact(source, a, Value::True, "FACT"),
320            Statement::Negation(a) => self.add_fact(source, a, Value::False, "NOT"),
321            Statement::Check {
322                subject,
323                bidirectional,
324            } => self.checks.push(Check {
325                subject: subject.as_ref().map(|s| s.data.to_string()),
326                bidirectional: *bidirectional,
327            }),
328            Statement::Premise { name, body } => {
329                let line = name.span.location_line();
330                self.add_named(source, name.data, line, body, false)?;
331            }
332            Statement::Rule { name, body } => {
333                let line = name.span.location_line();
334                self.add_named(source, name.data, line, body, true)?;
335            }
336        }
337        Ok(())
338    }
339
340    /// Record an atom identity in the shared universe (deduped by the `BTreeSet`).
341    fn intern(&mut self, key: &AtomKey) {
342        if !self.keys.contains(key) {
343            self.keys.insert(key.clone());
344        }
345    }
346
347    /// Accumulate a `FACT`/`NOT`; exact duplicates (same key+value+kind) are
348    /// dropped as idempotent, while a `FACT` and a `NOT` on the same atom are
349    /// both kept so the solver can report the CONFLICT.
350    fn add_fact(
351        &mut self,
352        source: &str,
353        a: &elenchus_parser::Located<Atom>,
354        value: Value,
355        kind: &'static str,
356    ) {
357        let key = AtomKey::from_atom(&a.data);
358        self.intern(&key);
359        let sig = alloc::format!(
360            "{}|{}|{}|{}",
361            key_sig(&key),
362            matches!(value, Value::True) as u8,
363            kind,
364            "" // facts dedup ignores line; identical FACT twice is idempotent
365        );
366        if !self.fact_sigs.insert(sig) {
367            return; // exact duplicate fact — idempotent
368        }
369        self.facts.push(RawFact {
370            key,
371            value,
372            origin: Origin {
373                source: source.to_string(),
374                line: a.span.location_line(),
375                premise: None,
376                kind,
377            },
378        });
379    }
380
381    /// Handle a named construct (`PREMISE` or `RULE`). `is_rule` selects derivation
382    /// vs checking. Returns an error on redefinition with a different body.
383    fn add_named(
384        &mut self,
385        source: &str,
386        name: &str,
387        line: u32,
388        body: &Body,
389        is_rule: bool,
390    ) -> Result<(), CompileError> {
391        let body_hash = hash_hex(canonical_body(name, body, is_rule).as_bytes());
392        let key = (source.to_string(), name.to_string());
393        match self.defined.get(&key) {
394            Some(prev) if *prev == body_hash => return Ok(()), // identical → idempotent
395            Some(_) => {
396                // Same name + different body *in the same source* — a real mistake.
397                return Err(CompileError::PremiseRedefinition {
398                    name: name.to_string(),
399                });
400            }
401            None => {
402                self.defined.insert(key, body_hash);
403            }
404        }
405
406        if is_rule {
407            // RULE always has an implication body (guaranteed by the grammar).
408            if let Body::Impl {
409                antecedent,
410                consequent,
411            } = body
412            {
413                let (ante, cons) = (raw_lits(antecedent), raw_lits(consequent));
414                for l in ante.iter().chain(cons.iter()) {
415                    self.intern(&l.key);
416                }
417                self.rules.push(RawRule {
418                    antecedent: ante,
419                    consequent: cons,
420                    origin: self.origin(source, line, Some(name), "RULE"),
421                });
422            }
423            return Ok(());
424        }
425
426        match body {
427            Body::List { op, atoms } => {
428                let keys: Vec<AtomKey> =
429                    atoms.iter().map(|a| AtomKey::from_atom(&a.data)).collect();
430                for k in &keys {
431                    self.intern(k);
432                }
433                let kind = list_kind(*op);
434                let origin = self.origin(source, line, Some(name), kind);
435                match op {
436                    // EXCLUSIVE / FORBIDS: "at most one" → pairwise Impossible([a_i, a_j]).
437                    ListOp::Exclusive | ListOp::Forbids => {
438                        self.emit_pairwise(&keys, &origin);
439                    }
440                    // ONEOF: pairwise (at most one) + at-least-one.
441                    ListOp::OneOf => {
442                        self.emit_pairwise(&keys, &origin);
443                        self.emit_at_least_one(&keys, &origin);
444                    }
445                    // ATLEAST: Impossible([NOT a_1, …, NOT a_n]).
446                    ListOp::AtLeast => {
447                        self.emit_at_least_one(&keys, &origin);
448                    }
449                }
450            }
451            Body::Impl {
452                antecedent,
453                consequent,
454            } => {
455                // A1 ∧ … ∧ An → C  ==  Impossible([A1, …, An, NOT C]) for each consequent C.
456                let ante = raw_lits(antecedent);
457                for l in &ante {
458                    self.intern(&l.key);
459                }
460                let origin = self.origin(source, line, Some(name), "PREMISE");
461                for c in consequent {
462                    let neg_c = RawLit {
463                        key: AtomKey::from_atom(&c.data.atom),
464                        negated: !c.data.negated,
465                    };
466                    self.intern(&neg_c.key);
467                    let mut lits = ante.clone();
468                    lits.push(neg_c);
469                    self.push_clause(lits, origin.clone());
470                }
471            }
472        }
473        Ok(())
474    }
475
476    /// Emit "at most one TRUE" as one `Impossible([a_i, a_j])` per unordered
477    /// pair. Pairwise (not a single big clause) because `Impossible([a,b,c])`
478    /// only forbids *all three* together — it would still allow two.
479    fn emit_pairwise(&mut self, keys: &[AtomKey], origin: &Origin) {
480        for i in 0..keys.len() {
481            for j in (i + 1)..keys.len() {
482                let lits = vec![
483                    RawLit {
484                        key: keys[i].clone(),
485                        negated: false,
486                    },
487                    RawLit {
488                        key: keys[j].clone(),
489                        negated: false,
490                    },
491                ];
492                self.push_clause(lits, origin.clone());
493            }
494        }
495    }
496
497    /// Emit "at least one TRUE" as a single `Impossible([NOT a_1, …, NOT a_n])`
498    /// — it is impossible for all of them to be false at once.
499    fn emit_at_least_one(&mut self, keys: &[AtomKey], origin: &Origin) {
500        let lits = keys
501            .iter()
502            .map(|k| RawLit {
503                key: k.clone(),
504                negated: true,
505            })
506            .collect();
507        self.push_clause(lits, origin.clone());
508    }
509
510    /// Append a clause unless an identical one (by canonical [`clause_sig`]) is
511    /// already present — `P ∧ P ≡ P`, so dedup keeps the IR minimal.
512    fn push_clause(&mut self, lits: Vec<RawLit>, origin: Origin) {
513        let sig = clause_sig(&lits);
514        if self.clause_sigs.insert(sig) {
515            self.clauses.push(RawClause { lits, origin });
516        }
517        // else: identical clause already present — idempotent.
518    }
519
520    /// Build an [`Origin`] for provenance from the current source/line/name.
521    fn origin(&self, source: &str, line: u32, premise: Option<&str>, kind: &'static str) -> Origin {
522        Origin {
523            source: source.to_string(),
524            line,
525            premise: premise.map(|s| s.to_string()),
526            kind,
527        }
528    }
529
530    /// Intern all atoms (canonical sort), then lower the raw IR to ids.
531    pub fn finalize(self) -> Compiled {
532        let atoms: Vec<AtomKey> = self.keys.into_iter().collect(); // BTreeSet → sorted
533        let mut id_of: BTreeMap<AtomKey, AtomId> = BTreeMap::new();
534        for (i, k) in atoms.iter().enumerate() {
535            id_of.insert(k.clone(), i as AtomId);
536        }
537        let lower = |l: &RawLit| Lit {
538            atom: id_of[&l.key],
539            negated: l.negated,
540        };
541
542        let facts = self
543            .facts
544            .into_iter()
545            .map(|f| Fact {
546                atom: id_of[&f.key],
547                value: f.value,
548                origin: f.origin,
549            })
550            .collect();
551        let clauses = self
552            .clauses
553            .into_iter()
554            .map(|c| Clause {
555                lits: c.lits.iter().map(lower).collect(),
556                origin: c.origin,
557            })
558            .collect();
559        let rules = self
560            .rules
561            .into_iter()
562            .map(|r| Rule {
563                antecedent: r.antecedent.iter().map(lower).collect(),
564                consequent: r.consequent.iter().map(lower).collect(),
565                origin: r.origin,
566            })
567            .collect();
568
569        Compiled {
570            atoms,
571            facts,
572            clauses,
573            rules,
574            checks: self.checks,
575            pending_imports: self.pending_imports,
576        }
577    }
578}
579
580/// Convenience: compile a single source into the IR. `IMPORT`s are recorded as
581/// pending, not resolved (use [`compile`] with a [`Resolver`] to resolve them).
582pub fn compile_source(source: &str, src: &str) -> Result<Compiled, CompileError> {
583    let mut c = Compiler::new();
584    c.add_source(source, src)?;
585    Ok(c.finalize())
586}
587
588// --- import resolution (source-agnostic) -----------------------------------
589
590/// Resolves `IMPORT "path"` to source text. The engine is source-agnostic: it
591/// consumes strings, so a file is merely one backing store. Mirrors
592/// vsm-grammar's `SourceResolver`.
593pub trait Resolver {
594    /// Load the raw source text for a resolved path.
595    fn load(&self, path: &str) -> Result<String, CompileError>;
596
597    /// Normalize `relative` against the importing source `base`.
598    /// Default: paths are absolute names, returned unchanged.
599    fn resolve(&self, _base: &str, relative: &str) -> String {
600        relative.to_string()
601    }
602}
603
604/// An in-memory resolver: serves sources from a name → content map. Pure
605/// `no_std`. Mirrors vsm-grammar's `MemoryResolver`.
606#[derive(Default)]
607pub struct MemoryResolver {
608    sources: BTreeMap<String, String>,
609}
610
611impl MemoryResolver {
612    /// An empty resolver with no sources.
613    pub fn new() -> Self {
614        Self::default()
615    }
616
617    /// Register `content` under `path`; returns `&mut self` for chaining.
618    pub fn add(&mut self, path: &str, content: &str) -> &mut Self {
619        self.sources.insert(path.to_string(), content.to_string());
620        self
621    }
622}
623
624impl Resolver for MemoryResolver {
625    fn load(&self, path: &str) -> Result<String, CompileError> {
626        self.sources
627            .get(path)
628            .cloned()
629            .ok_or_else(|| CompileError::ImportNotFound(path.to_string()))
630    }
631}
632
633/// A filesystem-backed resolver. Mirrors vsm-grammar's `FileResolver`:
634/// relative imports resolve against the importing file's directory, with manual
635/// `..` normalization (no canonicalization, to keep a virtual layout).
636#[cfg(feature = "std")]
637pub struct FileResolver;
638
639#[cfg(feature = "std")]
640impl Resolver for FileResolver {
641    fn load(&self, path: &str) -> Result<String, CompileError> {
642        std::fs::read_to_string(path)
643            .map_err(|e| CompileError::ImportNotFound(alloc::format!("{}: {}", path, e)))
644    }
645
646    fn resolve(&self, base: &str, relative: &str) -> String {
647        use std::path::{Component, Path, PathBuf};
648        let parent = Path::new(base).parent().unwrap_or_else(|| Path::new("."));
649        let joined = parent.join(relative);
650        let mut out = PathBuf::new();
651        for component in joined.components() {
652            match component {
653                Component::ParentDir => {
654                    out.pop();
655                }
656                Component::CurDir => {}
657                c => out.push(c),
658            }
659        }
660        // Normalize to forward slashes so resolved paths (and therefore the
661        // provenance recorded in the IR) are identical on Windows and Unix.
662        // Windows `std::fs` accepts `/` just fine.
663        out.to_string_lossy().replace('\\', "/")
664    }
665}
666
667/// Compile a root source and all its transitive `IMPORT`s into one IR.
668///
669/// Imports are flat-merged into a single shared atom universe (atoms unify by
670/// identity across files). Sources are content-addressed (sha256): a source
671/// already merged is skipped (dedup), and an import cycle is an error.
672///
673/// Premise/rule names are per-source labels, not global identifiers: different
674/// files (domains) may reuse a name, and the report qualifies them by source. A
675/// name reused with a different body is an error only *within the same source*.
676pub fn compile<R: Resolver>(root: &str, resolver: &R) -> Result<Compiled, CompileError> {
677    let mut c = Compiler::new();
678    let mut visited = BTreeSet::new();
679    let mut stack = Vec::new();
680    c.load_recursive(root, resolver, &mut visited, &mut stack)?;
681    Ok(c.finalize())
682}
683
684// --- helpers ---------------------------------------------------------------
685
686/// Lower parsed, located literals to key-based [`RawLit`]s (drops spans).
687fn raw_lits(lits: &[elenchus_parser::Located<Literal>]) -> Vec<RawLit> {
688    lits.iter()
689        .map(|l| RawLit {
690            key: AtomKey::from_atom(&l.data.atom),
691            negated: l.data.negated,
692        })
693        .collect()
694}
695
696/// The surface keyword for a list op, used as [`Origin::kind`] in the report.
697fn list_kind(op: ListOp) -> &'static str {
698    match op {
699        ListOp::Exclusive => "EXCLUSIVE",
700        ListOp::Forbids => "FORBIDS",
701        ListOp::OneOf => "ONEOF",
702        ListOp::AtLeast => "ATLEAST",
703    }
704}
705
706/// Stable `subject|predicate|object` string for an atom key (the unit from which
707/// clause/fact/body signatures are built).
708fn key_sig(k: &AtomKey) -> String {
709    alloc::format!(
710        "{}|{}|{}",
711        k.subject,
712        k.predicate,
713        k.object.as_deref().unwrap_or("")
714    )
715}
716
717/// Canonical, order-independent signature of a clause's literals (for dedup).
718fn clause_sig(lits: &[RawLit]) -> String {
719    let mut parts: Vec<String> = lits
720        .iter()
721        .map(|l| alloc::format!("{}|{}", key_sig(&l.key), l.negated as u8))
722        .collect();
723    parts.sort();
724    parts.dedup();
725    parts.join(";")
726}
727
728/// Canonical body string for a named construct, hashed for redefinition checks.
729fn canonical_body(name: &str, body: &Body, is_rule: bool) -> String {
730    let mut s = String::new();
731    let _ = write!(s, "{}|{}|", if is_rule { "RULE" } else { "PREMISE" }, name);
732    match body {
733        Body::List { op, atoms } => {
734            let _ = write!(s, "LIST|{}|", list_kind(*op));
735            let mut keys: Vec<String> = atoms
736                .iter()
737                .map(|a| key_sig(&AtomKey::from_atom(&a.data)))
738                .collect();
739            keys.sort();
740            s.push_str(&keys.join(";"));
741        }
742        Body::Impl {
743            antecedent,
744            consequent,
745        } => {
746            s.push_str("IMPL|ANTE|");
747            s.push_str(&lit_sigs(antecedent));
748            s.push_str("|CONS|");
749            s.push_str(&lit_sigs(consequent));
750        }
751    }
752    s
753}
754
755/// Sorted `key|negated` signature of a literal list (order-independent), used
756/// inside [`canonical_body`] so reordering a body does not look like a redefinition.
757fn lit_sigs(lits: &[elenchus_parser::Located<Literal>]) -> String {
758    let mut parts: Vec<String> = lits
759        .iter()
760        .map(|l| {
761            alloc::format!(
762                "{}|{}",
763                key_sig(&AtomKey::from_atom(&l.data.atom)),
764                l.data.negated as u8
765            )
766        })
767        .collect();
768    parts.sort();
769    parts.join(";")
770}
771
772#[cfg(test)]
773mod tests {
774    use super::*;
775
776    fn key(subject: &str, predicate: &str, object: Option<&str>) -> AtomKey {
777        AtomKey {
778            subject: subject.to_string(),
779            predicate: predicate.to_string(),
780            object: object.map(|o| o.to_string()),
781        }
782    }
783
784    fn id(c: &Compiled, k: &AtomKey) -> AtomId {
785        c.atoms.iter().position(|a| a == k).unwrap() as AtomId
786    }
787
788    #[test]
789    fn exclusive_unfolds_pairwise() {
790        let src = "PREMISE e:\n    EXCLUSIVE\n        x a\n        x b\n        x c\n";
791        let c = compile_source("<t>", src).unwrap();
792        // C(3,2) = 3 clauses, each of 2 positive literals.
793        assert_eq!(c.clauses.len(), 3);
794        for cl in &c.clauses {
795            assert_eq!(cl.lits.len(), 2);
796            assert!(cl.lits.iter().all(|l| !l.negated));
797        }
798    }
799
800    #[test]
801    fn implication_negates_consequent() {
802        // WHEN x a THEN x b  ==  Impossible([x a, NOT x b])
803        let src = "PREMISE r:\n    WHEN x a\n    THEN x b\n";
804        let c = compile_source("<t>", src).unwrap();
805        assert_eq!(c.clauses.len(), 1);
806        let cl = &c.clauses[0];
807        assert_eq!(cl.lits.len(), 2);
808        let a = id(&c, &key("x", "a", None));
809        let b = id(&c, &key("x", "b", None));
810        assert!(cl.lits.contains(&Lit {
811            atom: a,
812            negated: false
813        }));
814        assert!(cl.lits.contains(&Lit {
815            atom: b,
816            negated: true
817        }));
818    }
819
820    #[test]
821    fn negated_consequent_flips_to_positive() {
822        // THEN NOT x b  →  NOT(NOT x b) = x b positive inside Impossible
823        let src = "PREMISE r:\n    WHEN x a\n    THEN NOT x b\n";
824        let c = compile_source("<t>", src).unwrap();
825        let b = id(&c, &key("x", "b", None));
826        assert!(c.clauses[0].lits.contains(&Lit {
827            atom: b,
828            negated: false
829        }));
830    }
831
832    #[test]
833    fn oneof_is_pairwise_plus_at_least_one() {
834        let src = "PREMISE o:\n    ONEOF\n        x a\n        x b\n";
835        let c = compile_source("<t>", src).unwrap();
836        // pairwise C(2,2)=1 + 1 at-least-one = 2 clauses
837        assert_eq!(c.clauses.len(), 2);
838        // the at-least-one clause is the all-negated one
839        assert!(c.clauses.iter().any(|cl| cl.lits.iter().all(|l| l.negated)));
840    }
841
842    #[test]
843    fn atleast_is_one_negated_clause() {
844        let src = "PREMISE a:\n    ATLEAST\n        x a\n        x b\n        x c\n";
845        let c = compile_source("<t>", src).unwrap();
846        assert_eq!(c.clauses.len(), 1);
847        assert_eq!(c.clauses[0].lits.len(), 3);
848        assert!(c.clauses[0].lits.iter().all(|l| l.negated));
849    }
850
851    #[test]
852    fn rules_are_separate_from_clauses() {
853        let src = "RULE needs:\n    WHEN x a\n    THEN x b\n";
854        let c = compile_source("<t>", src).unwrap();
855        assert_eq!(c.clauses.len(), 0);
856        assert_eq!(c.rules.len(), 1);
857        assert_eq!(c.rules[0].antecedent.len(), 1);
858        assert_eq!(c.rules[0].consequent.len(), 1);
859    }
860
861    #[test]
862    fn atoms_are_canonically_sorted() {
863        let src = "FACT z z\nFACT a a\nFACT m m\n";
864        let c = compile_source("<t>", src).unwrap();
865        let mut sorted = c.atoms.clone();
866        sorted.sort();
867        assert_eq!(c.atoms, sorted);
868    }
869
870    #[test]
871    fn duplicate_premise_is_idempotent() {
872        let src = "PREMISE e:\n    EXCLUSIVE\n        x a\n        x b\nPREMISE e:\n    EXCLUSIVE\n        x a\n        x b\n";
873        let c = compile_source("<t>", src).unwrap();
874        assert_eq!(c.clauses.len(), 1);
875    }
876
877    #[test]
878    fn redefinition_with_different_body_errors() {
879        let src = "PREMISE e:\n    EXCLUSIVE\n        x a\n        x b\nPREMISE e:\n    EXCLUSIVE\n        x a\n        x c\n";
880        let err = compile_source("<t>", src).unwrap_err();
881        assert_eq!(
882            err,
883            CompileError::PremiseRedefinition {
884                name: "e".to_string()
885            }
886        );
887    }
888
889    #[test]
890    fn duplicate_fact_is_idempotent() {
891        let c = compile_source("<t>", "FACT x a\nFACT x a\n").unwrap();
892        assert_eq!(c.facts.len(), 1);
893    }
894
895    #[test]
896    fn conflicting_facts_are_both_kept() {
897        // FACT X + NOT X is a CONFLICT for the solver, not a compile error.
898        let c = compile_source("<t>", "FACT x a\nNOT x a\n").unwrap();
899        assert_eq!(c.facts.len(), 2);
900    }
901
902    #[test]
903    fn import_is_recorded_pending() {
904        let c = compile_source("<t>", "IMPORT \"physics.vrf\"\nFACT x a\n").unwrap();
905        assert_eq!(c.pending_imports, vec!["physics.vrf".to_string()]);
906    }
907
908    #[test]
909    fn import_flat_merges_and_atoms_unify() {
910        // The library constrains `Engine.X has fuel`; the main file asserts it.
911        // After merge the atom must be ONE shared id (unification across files).
912        let mut r = MemoryResolver::new();
913        r.add(
914            "lib.vrf",
915            "PREMISE needs_fuel:\n    WHEN Engine.X has engine\n    THEN Engine.X has fuel\n",
916        );
917        r.add(
918            "main.vrf",
919            "IMPORT \"lib.vrf\"\nFACT Engine.X has engine\nFACT Engine.X has fuel\n",
920        );
921        let c = compile("main.vrf", &r).unwrap();
922        assert!(c.pending_imports.is_empty());
923        assert_eq!(c.clauses.len(), 1); // the imported premise
924        assert_eq!(c.facts.len(), 2);
925
926        // `Engine.X has fuel` from the FACT and from the imported premise share an id.
927        let fuel = key("Engine.X", "has", Some("fuel"));
928        let fuel_id = id(&c, &fuel);
929        assert!(c.facts.iter().any(|f| f.atom == fuel_id));
930        assert!(c.clauses[0].lits.iter().any(|l| l.atom == fuel_id));
931    }
932
933    #[test]
934    fn diamond_import_is_deduped() {
935        // main → a, b ; a → base ; b → base. base merged once.
936        let mut r = MemoryResolver::new();
937        r.add(
938            "base.vrf",
939            "PREMISE b:\n    EXCLUSIVE\n        x a\n        x b\n",
940        );
941        r.add("a.vrf", "IMPORT \"base.vrf\"\n");
942        r.add("c.vrf", "IMPORT \"base.vrf\"\n");
943        r.add("main.vrf", "IMPORT \"a.vrf\"\nIMPORT \"c.vrf\"\n");
944        let c = compile("main.vrf", &r).unwrap();
945        assert_eq!(c.clauses.len(), 1); // base's single clause, not two
946    }
947
948    #[test]
949    fn circular_import_errors() {
950        let mut r = MemoryResolver::new();
951        r.add("a.vrf", "IMPORT \"b.vrf\"\n");
952        r.add("b.vrf", "IMPORT \"a.vrf\"\n");
953        let err = compile("a.vrf", &r).unwrap_err();
954        assert!(matches!(err, CompileError::CircularImport(_)));
955    }
956
957    #[test]
958    fn missing_import_errors() {
959        let mut r = MemoryResolver::new();
960        r.add("main.vrf", "IMPORT \"ghost.vrf\"\n");
961        let err = compile("main.vrf", &r).unwrap_err();
962        assert!(matches!(err, CompileError::ImportNotFound(_)));
963    }
964
965    #[test]
966    fn same_name_across_domains_coexists() {
967        // Two files may legitimately reuse a premise NAME with different bodies
968        // (different domains). Names are per-source labels — both premises apply,
969        // and the report qualifies them by source. NOT a redefinition error.
970        let mut r = MemoryResolver::new();
971        r.add(
972            "physics.vrf",
973            "PREMISE safety:\n    EXCLUSIVE\n        x a\n        x b\n",
974        );
975        r.add(
976            "main.vrf",
977            "IMPORT \"physics.vrf\"\nPREMISE safety:\n    EXCLUSIVE\n        x a\n        x c\n",
978        );
979        let c = compile("main.vrf", &r).unwrap();
980        assert_eq!(c.clauses.len(), 2); // a-b from physics, a-c from main
981        assert!(c.clauses.iter().any(|cl| cl.origin.source == "physics.vrf"));
982        assert!(c.clauses.iter().any(|cl| cl.origin.source == "main.vrf"));
983    }
984
985    #[test]
986    fn two_libs_same_name_into_one_consumer() {
987        // A.vrf and B.vrf each define their OWN `x` (different bodies); C imports
988        // both. Both `x` coexist (per-source labels). Meanwhile the atom they
989        // share (S has a) unifies into ONE id — names are scoped, atoms are not.
990        let mut r = MemoryResolver::new();
991        r.add(
992            "A.vrf",
993            "PREMISE x:\n    EXCLUSIVE\n        S has a\n        S has b\n",
994        );
995        r.add(
996            "B.vrf",
997            "PREMISE x:\n    EXCLUSIVE\n        S has a\n        S has c\n",
998        );
999        r.add("C.vrf", "IMPORT \"A.vrf\"\nIMPORT \"B.vrf\"\n");
1000        let c = compile("C.vrf", &r).unwrap();
1001
1002        // both `x` premises contributed a clause, kept apart by source
1003        assert_eq!(c.clauses.len(), 2);
1004        assert!(
1005            c.clauses
1006                .iter()
1007                .any(|cl| cl.origin.source == "A.vrf" && cl.origin.premise.as_deref() == Some("x"))
1008        );
1009        assert!(
1010            c.clauses
1011                .iter()
1012                .any(|cl| cl.origin.source == "B.vrf" && cl.origin.premise.as_deref() == Some("x"))
1013        );
1014
1015        // the shared atom `S has a` is a single interned id used by both clauses
1016        let s_a = id(&c, &key("S", "has", Some("a")));
1017        assert!(
1018            c.clauses
1019                .iter()
1020                .filter(|cl| cl.lits.iter().any(|l| l.atom == s_a))
1021                .count()
1022                == 2,
1023            "both clauses must reference the same unified `S has a` atom"
1024        );
1025    }
1026
1027    #[test]
1028    fn redefinition_within_one_source_still_errors() {
1029        // But reusing a name with a different body *inside one source* is a mistake.
1030        let src = "PREMISE e:\n    EXCLUSIVE\n        x a\n        x b\nPREMISE e:\n    EXCLUSIVE\n        x a\n        x c\n";
1031        let err = compile_source("main.vrf", src).unwrap_err();
1032        assert_eq!(
1033            err,
1034            CompileError::PremiseRedefinition {
1035                name: "e".to_string()
1036            }
1037        );
1038    }
1039
1040    #[test]
1041    fn import_demo_examples_resolve() {
1042        let mut r = MemoryResolver::new();
1043        r.add(
1044            "physics.vrf",
1045            include_str!("../../../docs/examples/physics.vrf"),
1046        );
1047        r.add(
1048            "import-demo.vrf",
1049            include_str!("../../../docs/examples/import-demo.vrf"),
1050        );
1051        let c = compile("import-demo.vrf", &r).unwrap();
1052        assert!(c.pending_imports.is_empty());
1053        // physics.vrf: one_path (EXCLUSIVE, 1 clause) + speed_order (impl, 1 clause)
1054        assert_eq!(c.clauses.len(), 2);
1055        // over_200 / over_100 unify between the facts and the imported premise.
1056        let over_100 = id(&c, &key("Motor", "over_100", None));
1057        assert!(c.facts.iter().any(|f| f.atom == over_100));
1058        assert!(
1059            c.clauses
1060                .iter()
1061                .any(|cl| cl.lits.iter().any(|l| l.atom == over_100))
1062        );
1063    }
1064
1065    #[test]
1066    fn creature_example_compiles() {
1067        let src = include_str!("../../../docs/examples/creature.vrf");
1068        let c = compile_source("creature.vrf", src).unwrap();
1069        assert_eq!(c.facts.len(), 2); // flying, warm_blood
1070        assert_eq!(c.rules.len(), 1); // needs_oxygen
1071        assert_eq!(c.checks.len(), 1);
1072        // fly_xor_swim (1) + wings_need_bone (THEN wing AND bone → 2) + no_dual_temp (1) = 4
1073        assert_eq!(c.clauses.len(), 4);
1074        assert_eq!(c.atoms.len(), 7);
1075    }
1076
1077    #[test]
1078    fn forbids_unfolds_pairwise() {
1079        let src = "PREMISE f:\n    FORBIDS\n        x a\n        x b\n        x c\n";
1080        let c = compile_source("<t>", src).unwrap();
1081        assert_eq!(c.clauses.len(), 3); // C(3,2), like EXCLUSIVE
1082        assert!(
1083            c.clauses
1084                .iter()
1085                .all(|cl| cl.lits.len() == 2 && cl.lits.iter().all(|l| !l.negated))
1086        );
1087    }
1088
1089    #[test]
1090    fn rule_with_multiple_consequents() {
1091        let src = "RULE r:\n    WHEN x a\n    THEN x b\n    AND  x c\n";
1092        let c = compile_source("<t>", src).unwrap();
1093        assert_eq!(c.rules.len(), 1);
1094        assert_eq!(c.rules[0].consequent.len(), 2);
1095    }
1096
1097    #[test]
1098    fn negated_antecedent_literal_keeps_polarity() {
1099        // WHEN NOT x a THEN x b  ==  Impossible([NOT x a, NOT x b])
1100        let src = "PREMISE a:\n    WHEN NOT x a\n    THEN x b\n";
1101        let c = compile_source("<t>", src).unwrap();
1102        let xa = id(&c, &key("x", "a", None));
1103        assert!(c.clauses[0].lits.contains(&Lit {
1104            atom: xa,
1105            negated: true
1106        }));
1107    }
1108
1109    #[test]
1110    fn rule_keeps_consequent_negation() {
1111        let src = "RULE r:\n    WHEN x a\n    THEN NOT x b\n";
1112        let c = compile_source("<t>", src).unwrap();
1113        assert!(c.rules[0].consequent[0].negated);
1114    }
1115
1116    #[test]
1117    fn compilation_is_deterministic() {
1118        let src = "PREMISE e:\n    EXCLUSIVE\n        z z\n        a a\n        m m\nFACT q q\n";
1119        assert_eq!(
1120            compile_source("<t>", src).unwrap(),
1121            compile_source("<t>", src).unwrap()
1122        );
1123    }
1124
1125    #[test]
1126    fn empty_program_compiles_to_empty_ir() {
1127        let c = compile_source("<t>", "// nothing here\n").unwrap();
1128        assert!(c.atoms.is_empty() && c.clauses.is_empty() && c.facts.is_empty());
1129    }
1130
1131    #[test]
1132    fn same_clause_from_two_named_premises_is_deduped() {
1133        // Different names, identical logical content → one clause, no redefinition.
1134        let src = "PREMISE e1:\n    EXCLUSIVE\n        x a\n        x b\nPREMISE e2:\n    EXCLUSIVE\n        x a\n        x b\n";
1135        let c = compile_source("<t>", src).unwrap();
1136        assert_eq!(c.clauses.len(), 1);
1137    }
1138
1139    #[test]
1140    fn object_distinguishes_atom_identity() {
1141        // `x p a` and `x p b` differ only by object → two distinct atoms.
1142        let c = compile_source("<t>", "FACT x p a\nFACT x p b\n").unwrap();
1143        assert_eq!(c.atoms.len(), 2);
1144    }
1145}
elenchus_compiler/lib.rs

elenchus_compiler/
lib.rs