Skip to main content

oxiz_proof/
minimize.rs

1//! Proof minimization via hash-cons deduplication and iterative re-trimming.
2//!
3//! This module provides a [`ProofMinimizer`] that computes a *minimal* proof DAG
4//! by deduplicating semantically-identical lemma nodes across all branches and
5//! then iterating cone reduction until a fixed point is reached.
6//!
7//! ## Distinction from `compress.rs`
8//!
9//! `compress.rs` removes *structurally redundant* nodes (unreachable steps, trivial
10//! identity rewrites). `minimize.rs` goes further: it performs **hash-cons
11//! deduplication** — collapsing any two nodes that share the same rule, conclusion
12//! text, argument list, and premise count into a single canonical node, then
13//! re-trims the proof until no further removal is possible.
14
15use crate::compress::get_dependency_cone;
16use crate::proof::{Proof, ProofNodeId, ProofStep};
17use rustc_hash::FxHashMap;
18use std::collections::hash_map::DefaultHasher;
19use std::hash::{Hash, Hasher};
20
21// ---------------------------------------------------------------------------
22// Configuration
23// ---------------------------------------------------------------------------
24
25/// Configuration for [`ProofMinimizer`].
26#[derive(Debug, Clone)]
27pub struct MinimizeConfig {
28    /// Maximum number of minimization passes (dedup + trim loop).
29    /// Defaults to 4.
30    pub max_passes: usize,
31    /// Whether to perform hash-cons deduplication.
32    /// When `false`, only structural cone reduction is applied.
33    /// Defaults to `true`.
34    pub enable_dedup: bool,
35}
36
37impl Default for MinimizeConfig {
38    fn default() -> Self {
39        Self {
40            max_passes: 4,
41            enable_dedup: true,
42        }
43    }
44}
45
46impl MinimizeConfig {
47    /// Create a new configuration with all options at their defaults.
48    pub fn new() -> Self {
49        Self::default()
50    }
51
52    /// Disable hash-cons deduplication (only cone reduction runs).
53    pub fn without_dedup(mut self) -> Self {
54        self.enable_dedup = false;
55        self
56    }
57
58    /// Set the maximum number of passes.
59    pub fn with_max_passes(mut self, passes: usize) -> Self {
60        self.max_passes = passes;
61        self
62    }
63}
64
65// ---------------------------------------------------------------------------
66// Result
67// ---------------------------------------------------------------------------
68
69/// Statistics returned by [`ProofMinimizer::minimize`].
70#[derive(Debug, Clone, Default)]
71pub struct MinimizeResult {
72    /// Number of minimization passes that actually ran.
73    pub passes: usize,
74    /// Total node count removed across all passes.
75    pub nodes_removed: usize,
76    /// Total duplicate nodes collapsed (may span multiple passes).
77    pub duplicates_collapsed: usize,
78}
79
80// ---------------------------------------------------------------------------
81// Hash key for candidate duplicates
82// ---------------------------------------------------------------------------
83
84/// An opaque u64 that identifies a node's *semantic fingerprint*:
85/// derived from its rule variant, conclusion text, argument list, and
86/// premise count. Two nodes are candidates for deduplication iff they
87/// produce the same `ConclusionHash`.
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
89struct ConclusionHash(u64);
90
91impl ConclusionHash {
92    /// Compute the hash for an [`Axiom`] node.
93    fn for_axiom(conclusion: &str) -> Self {
94        let mut h = DefaultHasher::new();
95        0u8.hash(&mut h); // variant tag = 0 for Axiom
96        conclusion.hash(&mut h);
97        Self(h.finish())
98    }
99
100    /// Compute the hash for an [`Inference`] node.
101    fn for_inference(rule: &str, conclusion: &str, args: &[String], premise_count: usize) -> Self {
102        let mut h = DefaultHasher::new();
103        1u8.hash(&mut h); // variant tag = 1 for Inference
104        rule.hash(&mut h);
105        conclusion.hash(&mut h);
106        args.hash(&mut h);
107        premise_count.hash(&mut h);
108        Self(h.finish())
109    }
110}
111
112// ---------------------------------------------------------------------------
113// Equality check (full structural equality on the node step)
114// ---------------------------------------------------------------------------
115
116/// Full structural equality check between two [`ProofNode`]s after the
117/// hash-cons candidate match. Because two different inference rules might
118/// accidentally collide in the hash (though unlikely), we verify every field.
119fn nodes_are_equal(proof: &Proof, lhs: ProofNodeId, rhs: ProofNodeId) -> bool {
120    match (proof.get_node(lhs), proof.get_node(rhs)) {
121        (Some(l), Some(r)) => match (&l.step, &r.step) {
122            (ProofStep::Axiom { conclusion: cl }, ProofStep::Axiom { conclusion: cr }) => cl == cr,
123            (
124                ProofStep::Inference {
125                    rule: rl,
126                    premises: pl,
127                    conclusion: cl,
128                    args: al,
129                },
130                ProofStep::Inference {
131                    rule: rr,
132                    premises: pr,
133                    conclusion: cr,
134                    args: ar,
135                },
136            ) => rl == rr && cl == cr && al.as_slice() == ar.as_slice() && pl.len() == pr.len(),
137            _ => false,
138        },
139        _ => false,
140    }
141}
142
143// ---------------------------------------------------------------------------
144// Core minimization helpers
145// ---------------------------------------------------------------------------
146
147/// Rebuild `proof` retaining only the nodes in the dependency cone of
148/// `root_id`, rewriting premise references through `id_remap`.
149///
150/// Returns the rebuilt proof and the number of nodes removed.
151fn rebuild_cone(
152    proof: &Proof,
153    root_id: ProofNodeId,
154    id_remap: &FxHashMap<ProofNodeId, ProofNodeId>,
155) -> (Proof, usize) {
156    let nodes_before = proof.len();
157
158    // Compute the set of nodes needed (after remapping), starting from root.
159    // We first resolve the root through the remap, then compute its cone.
160    let effective_root = resolve(root_id, id_remap);
161    let cone_set: std::collections::HashSet<ProofNodeId> =
162        get_dependency_cone(proof, effective_root)
163            .into_iter()
164            .collect();
165
166    let mut new_proof = Proof::new();
167    let mut local_remap: FxHashMap<ProofNodeId, ProofNodeId> = FxHashMap::default();
168
169    // Walk nodes in original order so prerequisites are inserted before
170    // dependents (Proof stores them topologically since add_inference uses
171    // existing IDs as premises, which must already exist).
172    for node in proof.nodes() {
173        // Skip nodes outside the cone.
174        if !cone_set.contains(&node.id) {
175            continue;
176        }
177
178        let new_id = match &node.step {
179            ProofStep::Axiom { conclusion } => new_proof.add_axiom(conclusion.clone()),
180            ProofStep::Inference {
181                rule,
182                premises,
183                conclusion,
184                args,
185            } => {
186                let new_premises: Vec<ProofNodeId> = premises
187                    .iter()
188                    .map(|p| {
189                        let resolved = resolve(*p, id_remap);
190                        *local_remap.get(&resolved).unwrap_or(&resolved)
191                    })
192                    .collect();
193
194                new_proof.add_inference_with_args(
195                    rule.clone(),
196                    new_premises,
197                    args.to_vec(),
198                    conclusion.clone(),
199                )
200            }
201        };
202        local_remap.insert(node.id, new_id);
203    }
204
205    let nodes_after = new_proof.len();
206    let removed = nodes_before.saturating_sub(nodes_after);
207    (new_proof, removed)
208}
209
210/// Follow the remap chain to find the canonical node for `id`.
211fn resolve(id: ProofNodeId, remap: &FxHashMap<ProofNodeId, ProofNodeId>) -> ProofNodeId {
212    let mut current = id;
213    // Guard against pathological cycles (should never occur in a DAG, but
214    // be defensive). Limit iterations to avoid infinite loops.
215    for _ in 0..1024 {
216        match remap.get(&current) {
217            Some(&next) if next != current => current = next,
218            _ => break,
219        }
220    }
221    current
222}
223
224/// Perform one deduplication pass over `proof`.
225///
226/// Returns an `id_remap` table mapping duplicate node IDs to their canonical
227/// representative, and a count of how many nodes were declared duplicate.
228fn dedup_pass(proof: &Proof) -> (FxHashMap<ProofNodeId, ProofNodeId>, usize) {
229    // Map from ConclusionHash → first-seen canonical node id.
230    let mut canon: FxHashMap<ConclusionHash, ProofNodeId> = FxHashMap::default();
231    let mut remap: FxHashMap<ProofNodeId, ProofNodeId> = FxHashMap::default();
232    let mut collapsed = 0usize;
233
234    for node in proof.nodes() {
235        let hash = match &node.step {
236            ProofStep::Axiom { conclusion } => ConclusionHash::for_axiom(conclusion),
237            ProofStep::Inference {
238                rule,
239                premises,
240                conclusion,
241                args,
242            } => ConclusionHash::for_inference(rule, conclusion, args.as_slice(), premises.len()),
243        };
244
245        match canon.get(&hash).copied() {
246            Some(existing) if nodes_are_equal(proof, existing, node.id) => {
247                // This node is a true duplicate of `existing`.
248                remap.insert(node.id, existing);
249                collapsed += 1;
250            }
251            Some(_) => {
252                // Hash collision but not structurally equal — keep this node
253                // as a separate canonical (the first one stays canonical, this
254                // one remains unmapped).
255            }
256            None => {
257                canon.insert(hash, node.id);
258            }
259        }
260    }
261
262    (remap, collapsed)
263}
264
265// ---------------------------------------------------------------------------
266// Public API
267// ---------------------------------------------------------------------------
268
269/// Proof minimizer that performs hash-cons deduplication followed by
270/// iterative dependency-cone reduction until a fixed point.
271pub struct ProofMinimizer {
272    config: MinimizeConfig,
273}
274
275impl ProofMinimizer {
276    /// Create a new minimizer with the given configuration.
277    pub fn new(config: MinimizeConfig) -> Self {
278        Self { config }
279    }
280
281    /// Create a minimizer with default configuration.
282    pub fn with_defaults() -> Self {
283        Self::new(MinimizeConfig::default())
284    }
285
286    /// Minimize `proof` in-place.
287    ///
288    /// Returns a [`MinimizeResult`] describing what was done.
289    pub fn minimize(&self, proof: &mut Proof) -> MinimizeResult {
290        let mut result = MinimizeResult::default();
291
292        // Fast exit for empty proofs.
293        if proof.is_empty() {
294            return result;
295        }
296
297        for pass in 0..self.config.max_passes {
298            let size_before = proof.len();
299            let mut pass_collapsed = 0usize;
300
301            // Step 1: hash-cons deduplication (unless disabled).
302            if self.config.enable_dedup {
303                let (remap, collapsed) = dedup_pass(proof);
304                pass_collapsed = collapsed;
305
306                if !remap.is_empty() {
307                    // Rebuild the proof with the remap applied, trimmed to root.
308                    if let Some(root_id) = proof.root() {
309                        let (new_proof, _) = rebuild_cone(proof, root_id, &remap);
310                        *proof = new_proof;
311                    }
312                }
313            }
314
315            // Step 2: cone reduction (trim nodes unreachable from root).
316            if let Some(root_id) = proof.root() {
317                let empty_remap = FxHashMap::default();
318                let (new_proof, _) = rebuild_cone(proof, root_id, &empty_remap);
319                *proof = new_proof;
320            }
321
322            let size_after = proof.len();
323            let removed_this_pass = size_before.saturating_sub(size_after);
324
325            result.passes = pass + 1;
326            result.nodes_removed += removed_this_pass;
327            result.duplicates_collapsed += pass_collapsed;
328
329            // Fixed-point check: stop if nothing was removed this pass.
330            if removed_this_pass == 0 && pass_collapsed == 0 {
331                break;
332            }
333        }
334
335        result
336    }
337}
338
339impl Default for ProofMinimizer {
340    fn default() -> Self {
341        Self::with_defaults()
342    }
343}
344
345// ---------------------------------------------------------------------------
346// Tests
347// ---------------------------------------------------------------------------
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352    use crate::proof::Proof;
353
354    // ------------------------------------------------------------------
355    // Helper: build a proof with two axiom-nodes whose conclusions are
356    // identical through the `update_conclusion` backdoor.
357    //
358    // Because Proof::add_axiom deduplicates by conclusion at construction
359    // time, we construct them with distinct conclusions first, then rename
360    // one to match the other.
361    // ------------------------------------------------------------------
362    fn proof_with_duplicate_axioms() -> (Proof, ProofNodeId, ProofNodeId) {
363        let mut proof = Proof::new();
364        let a1 = proof.add_axiom("p"); // id=0, conclusion="p"
365        let a2 = proof.add_axiom("q"); // id=1, conclusion="q"
366        // Rename a2 to also conclude "p" — now two nodes share "p".
367        proof.update_conclusion(a2, "p");
368        // Add an inference over both so both are in the root's cone.
369        let root = proof.add_inference("merge", vec![a1, a2], "merged_p");
370        (proof, a1, root)
371    }
372
373    // (a) A proof with duplicate axiom nodes must shrink after minimization.
374    #[test]
375    fn test_duplicate_axiom_proof_shrinks() {
376        let (mut proof, _a1, _root) = proof_with_duplicate_axioms();
377        let size_before = proof.len();
378
379        let minimizer = ProofMinimizer::with_defaults();
380        let result = minimizer.minimize(&mut proof);
381
382        assert!(
383            result.nodes_removed > 0 || proof.len() < size_before,
384            "minimizer must remove at least one duplicate node"
385        );
386    }
387
388    // (b) Minimizing an already-minimal proof yields no removals, and a
389    //     second minimize produces the same result.
390    #[test]
391    fn test_idempotent_on_minimal() {
392        let mut proof = Proof::new();
393        let a1 = proof.add_axiom("x");
394        let a2 = proof.add_axiom("y");
395        proof.add_inference("and", vec![a1, a2], "x_and_y");
396
397        let minimizer = ProofMinimizer::with_defaults();
398        let result1 = minimizer.minimize(&mut proof);
399        let size_after_first = proof.len();
400
401        let result2 = minimizer.minimize(&mut proof);
402        let size_after_second = proof.len();
403
404        assert_eq!(
405            result1.nodes_removed, 0,
406            "a minimal proof should have nothing removed"
407        );
408        assert_eq!(
409            size_after_first, size_after_second,
410            "second minimize should produce the same size"
411        );
412        assert_eq!(
413            result2.nodes_removed, 0,
414            "second minimize should report no removals"
415        );
416    }
417
418    // (c) Minimization must preserve the root conclusion.
419    #[test]
420    fn test_preserves_conclusion() {
421        let (mut proof, _a1, _root) = proof_with_duplicate_axioms();
422        let original_conclusion = proof
423            .root_node()
424            .map(|n| n.conclusion().to_string())
425            .expect("proof must have a root node");
426
427        let minimizer = ProofMinimizer::with_defaults();
428        let _result = minimizer.minimize(&mut proof);
429
430        let after_conclusion = proof
431            .root_node()
432            .map(|n| n.conclusion().to_string())
433            .expect("proof must still have a root node after minimize");
434
435        assert_eq!(
436            original_conclusion, after_conclusion,
437            "root conclusion must be preserved across minimization"
438        );
439    }
440
441    // (d) A chain of duplicate lemmas requires multiple passes to fully
442    //     resolve; `passes > 1` and the final result is stable.
443    #[test]
444    fn test_iterates_to_fixed_point() {
445        // Build: a0="p", a1="q" (renamed to "p"), layer1 infers from both,
446        // then a2="r", a3="s" (renamed to "r"), layer2 infers from both.
447        // The root uses layer1 and layer2.
448        let mut proof = Proof::new();
449
450        // Layer 1 duplicates
451        let a0 = proof.add_axiom("p_orig");
452        let a1 = proof.add_axiom("p_dup_raw");
453        proof.update_conclusion(a1, "p_orig"); // duplicate of a0
454        let layer1 = proof.add_inference("l1", vec![a0, a1], "layer1_out");
455
456        // Layer 2 duplicates — different conclusion namespace
457        let a2 = proof.add_axiom("r_orig");
458        let a3 = proof.add_axiom("r_dup_raw");
459        proof.update_conclusion(a3, "r_orig"); // duplicate of a2
460        let layer2 = proof.add_inference("l2", vec![a2, a3], "layer2_out");
461
462        // Root uses both layers
463        proof.add_inference("root_rule", vec![layer1, layer2], "final_root");
464
465        let minimizer = ProofMinimizer::new(MinimizeConfig {
466            max_passes: 4,
467            enable_dedup: true,
468        });
469        let result = minimizer.minimize(&mut proof);
470
471        // Should have removed duplicate nodes.
472        assert!(
473            result.nodes_removed > 0,
474            "minimizer must remove duplicates; nodes_removed={}",
475            result.nodes_removed
476        );
477
478        // After minimizing, a second pass must be stable (fixed-point).
479        let size_stable = proof.len();
480        let result2 = minimizer.minimize(&mut proof);
481        assert_eq!(
482            proof.len(),
483            size_stable,
484            "proof size must not change after fixed-point"
485        );
486        assert_eq!(
487            result2.nodes_removed, 0,
488            "no further removals expected at fixed-point"
489        );
490
491        // Check that passes > 1 only when the proof actually required it.
492        // (With this construction we always run at least 1 pass.)
493        assert!(result.passes >= 1, "at least one pass must be recorded");
494    }
495
496    // (e) enable_dedup=false disables the explicit dedup pass but cone
497    //     reduction still runs. A genuinely minimal proof (no unreachable
498    //     nodes, all distinct conclusions) must not shrink.
499    #[test]
500    fn test_disable_dedup_preserves_size() {
501        // Build a minimal proof with all distinct conclusions and all nodes
502        // reachable from root — nothing to trim, nothing to dedup.
503        let mut proof = Proof::new();
504        let a1 = proof.add_axiom("distinct_x");
505        let a2 = proof.add_axiom("distinct_y");
506        let a3 = proof.add_axiom("distinct_z");
507        let i1 = proof.add_inference("and", vec![a1, a2], "and_xy");
508        proof.add_inference("combine", vec![i1, a3], "root_out");
509        let size_before = proof.len(); // all 5 nodes reachable
510
511        let minimizer = ProofMinimizer::new(MinimizeConfig {
512            max_passes: 4,
513            enable_dedup: false,
514        });
515        let result = minimizer.minimize(&mut proof);
516
517        // No nodes should be removed from a fully-connected, duplicate-free proof.
518        assert_eq!(
519            result.nodes_removed, 0,
520            "no nodes should be removed from a minimal, fully-connected proof"
521        );
522        assert_eq!(
523            proof.len(),
524            size_before,
525            "disabling dedup on a minimal proof must not shrink it"
526        );
527    }
528
529    // (f) Empty proof must not panic.
530    #[test]
531    fn test_empty_proof_safe() {
532        let mut proof = Proof::new();
533        let minimizer = ProofMinimizer::with_defaults();
534        let result = minimizer.minimize(&mut proof);
535        assert_eq!(result.nodes_removed, 0);
536        assert_eq!(result.passes, 0);
537        assert!(proof.is_empty());
538    }
539}