Skip to main content

oxiz_proof/
fingerprint.rs

1//! Proof fingerprinting for fast similarity detection.
2//!
3//! This module generates compact fingerprints of proofs that enable
4//! fast similarity detection and proof reuse.
5
6use crate::proof::{Proof, ProofStep};
7use rustc_hash::{FxHashMap, FxHashSet};
8use std::collections::hash_map::DefaultHasher;
9use std::fmt;
10use std::hash::{Hash, Hasher};
11
12/// A compact fingerprint of a proof.
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
15pub struct ProofFingerprint {
16    /// Primary hash (structural)
17    pub structural_hash: u64,
18    /// Rule sequence hash
19    pub rule_hash: u64,
20    /// Size features
21    pub size_features: SizeFeatures,
22    /// Bloom filter for fast membership testing
23    pub bloom_filter: Vec<u64>,
24}
25
26/// Size-based features of a proof.
27#[derive(Debug, Clone, PartialEq, Eq, Hash)]
28#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
29pub struct SizeFeatures {
30    /// Number of nodes
31    pub num_nodes: usize,
32    /// Maximum depth
33    pub max_depth: usize,
34    /// Number of axioms
35    pub num_axioms: usize,
36    /// Number of unique rules
37    pub num_unique_rules: usize,
38}
39
40impl fmt::Display for ProofFingerprint {
41    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42        write!(
43            f,
44            "Fingerprint[struct:{:016x}, rules:{:016x}, nodes:{}, depth:{}]",
45            self.structural_hash,
46            self.rule_hash,
47            self.size_features.num_nodes,
48            self.size_features.max_depth
49        )
50    }
51}
52
53/// Fingerprint generator for proofs.
54pub struct FingerprintGenerator {
55    /// Bloom filter size (number of bits)
56    bloom_size: usize,
57    /// Number of hash functions for bloom filter
58    num_hash_functions: usize,
59}
60
61impl Default for FingerprintGenerator {
62    fn default() -> Self {
63        Self::new()
64    }
65}
66
67impl FingerprintGenerator {
68    /// Create a new fingerprint generator with default settings.
69    pub fn new() -> Self {
70        Self {
71            bloom_size: 256,
72            num_hash_functions: 3,
73        }
74    }
75
76    /// Set the bloom filter size.
77    pub fn with_bloom_size(mut self, size: usize) -> Self {
78        self.bloom_size = size;
79        self
80    }
81
82    /// Set the number of hash functions.
83    pub fn with_hash_functions(mut self, num: usize) -> Self {
84        self.num_hash_functions = num;
85        self
86    }
87
88    /// Generate a fingerprint for a proof.
89    pub fn generate(&self, proof: &Proof) -> ProofFingerprint {
90        let structural_hash = self.compute_structural_hash(proof);
91        let rule_hash = self.compute_rule_hash(proof);
92        let size_features = self.extract_size_features(proof);
93        let bloom_filter = self.create_bloom_filter(proof);
94
95        ProofFingerprint {
96            structural_hash,
97            rule_hash,
98            size_features,
99            bloom_filter,
100        }
101    }
102
103    /// Check if two fingerprints are likely similar.
104    pub fn are_similar(&self, fp1: &ProofFingerprint, fp2: &ProofFingerprint) -> bool {
105        // Check exact structural match
106        if fp1.structural_hash == fp2.structural_hash {
107            return true;
108        }
109
110        // Check size similarity
111        if !self.size_features_similar(&fp1.size_features, &fp2.size_features) {
112            return false;
113        }
114
115        // Check bloom filter overlap
116        self.bloom_overlap(&fp1.bloom_filter, &fp2.bloom_filter) > 0.7
117    }
118
119    /// Compute similarity score between two fingerprints (0.0 - 1.0).
120    pub fn similarity_score(&self, fp1: &ProofFingerprint, fp2: &ProofFingerprint) -> f64 {
121        if fp1 == fp2 {
122            return 1.0;
123        }
124
125        let mut score = 0.0;
126
127        // Structural similarity (40% weight)
128        if fp1.structural_hash == fp2.structural_hash {
129            score += 0.4;
130        }
131
132        // Rule similarity (30% weight)
133        if fp1.rule_hash == fp2.rule_hash {
134            score += 0.3;
135        }
136
137        // Size similarity (10% weight)
138        let size_sim = self.compute_size_similarity(&fp1.size_features, &fp2.size_features);
139        score += 0.1 * size_sim;
140
141        // Bloom filter overlap (20% weight)
142        let bloom_sim = self.bloom_overlap(&fp1.bloom_filter, &fp2.bloom_filter);
143        score += 0.2 * bloom_sim;
144
145        score
146    }
147
148    // Helper: Compute structural hash of a proof
149    fn compute_structural_hash(&self, proof: &Proof) -> u64 {
150        let mut hasher = DefaultHasher::new();
151
152        // Hash the proof structure
153        for node in proof.nodes() {
154            match &node.step {
155                ProofStep::Axiom { conclusion } => {
156                    "axiom".hash(&mut hasher);
157                    conclusion.hash(&mut hasher);
158                }
159                ProofStep::Inference { rule, premises, .. } => {
160                    "inference".hash(&mut hasher);
161                    rule.hash(&mut hasher);
162                    premises.len().hash(&mut hasher);
163                }
164            }
165        }
166
167        hasher.finish()
168    }
169
170    // Helper: Compute hash of rule sequence
171    fn compute_rule_hash(&self, proof: &Proof) -> u64 {
172        let mut hasher = DefaultHasher::new();
173        let mut rules = Vec::new();
174
175        for node in proof.nodes() {
176            if let ProofStep::Inference { rule, .. } = &node.step {
177                rules.push(rule.clone());
178            }
179        }
180
181        rules.sort();
182        rules.dedup();
183        rules.hash(&mut hasher);
184
185        hasher.finish()
186    }
187
188    // Helper: Extract size features
189    fn extract_size_features(&self, proof: &Proof) -> SizeFeatures {
190        let num_nodes = proof.len();
191        let max_depth = proof.nodes().iter().map(|n| n.depth).max().unwrap_or(0);
192        let num_axioms = proof
193            .nodes()
194            .iter()
195            .filter(|n| matches!(n.step, ProofStep::Axiom { .. }))
196            .count();
197
198        let mut unique_rules = FxHashSet::default();
199        for node in proof.nodes() {
200            if let ProofStep::Inference { rule, .. } = &node.step {
201                unique_rules.insert(rule.clone());
202            }
203        }
204
205        SizeFeatures {
206            num_nodes,
207            max_depth: max_depth as usize,
208            num_axioms,
209            num_unique_rules: unique_rules.len(),
210        }
211    }
212
213    // Helper: Create bloom filter for proof elements
214    fn create_bloom_filter(&self, proof: &Proof) -> Vec<u64> {
215        let num_words = self.bloom_size.div_ceil(64);
216        let mut bloom = vec![0u64; num_words];
217
218        for node in proof.nodes() {
219            // Add node conclusion to bloom filter
220            self.bloom_add(&mut bloom, node.conclusion());
221
222            // Add rule to bloom filter
223            if let ProofStep::Inference { rule, .. } = &node.step {
224                self.bloom_add(&mut bloom, rule);
225            }
226        }
227
228        bloom
229    }
230
231    // Helper: Add element to bloom filter
232    fn bloom_add(&self, bloom: &mut [u64], element: &str) {
233        for i in 0..self.num_hash_functions {
234            let hash = self.hash_with_seed(element, i as u64);
235            let bit_index = (hash as usize) % (bloom.len() * 64);
236            let word_index = bit_index / 64;
237            let bit_offset = bit_index % 64;
238            bloom[word_index] |= 1u64 << bit_offset;
239        }
240    }
241
242    // Helper: Hash with seed
243    fn hash_with_seed(&self, element: &str, seed: u64) -> u64 {
244        let mut hasher = DefaultHasher::new();
245        seed.hash(&mut hasher);
246        element.hash(&mut hasher);
247        hasher.finish()
248    }
249
250    // Helper: Check if size features are similar
251    fn size_features_similar(&self, sf1: &SizeFeatures, sf2: &SizeFeatures) -> bool {
252        // Allow 20% difference in size
253        let size_ratio = (sf1.num_nodes as f64) / (sf2.num_nodes.max(1) as f64);
254        if !(0.8..=1.2).contains(&size_ratio) {
255            return false;
256        }
257
258        // Allow 30% difference in depth
259        let depth_ratio = (sf1.max_depth as f64) / (sf2.max_depth.max(1) as f64);
260        if !(0.7..=1.3).contains(&depth_ratio) {
261            return false;
262        }
263
264        true
265    }
266
267    // Helper: Compute size similarity score
268    fn compute_size_similarity(&self, sf1: &SizeFeatures, sf2: &SizeFeatures) -> f64 {
269        let size_sim = 1.0
270            - ((sf1.num_nodes as f64 - sf2.num_nodes as f64).abs()
271                / sf1.num_nodes.max(sf2.num_nodes).max(1) as f64);
272        let depth_sim = 1.0
273            - ((sf1.max_depth as f64 - sf2.max_depth as f64).abs()
274                / sf1.max_depth.max(sf2.max_depth).max(1) as f64);
275        let axiom_sim = 1.0
276            - ((sf1.num_axioms as f64 - sf2.num_axioms as f64).abs()
277                / sf1.num_axioms.max(sf2.num_axioms).max(1) as f64);
278
279        (size_sim + depth_sim + axiom_sim) / 3.0
280    }
281
282    // Helper: Compute bloom filter overlap
283    fn bloom_overlap(&self, bloom1: &[u64], bloom2: &[u64]) -> f64 {
284        if bloom1.len() != bloom2.len() {
285            return 0.0;
286        }
287
288        let mut intersection_bits = 0;
289        let mut union_bits = 0;
290
291        for (&b1, &b2) in bloom1.iter().zip(bloom2.iter()) {
292            intersection_bits += (b1 & b2).count_ones();
293            union_bits += (b1 | b2).count_ones();
294        }
295
296        if union_bits == 0 {
297            return 1.0;
298        }
299
300        (intersection_bits as f64) / (union_bits as f64)
301    }
302}
303
304/// Database for storing and querying proof fingerprints.
305pub struct FingerprintDatabase {
306    /// Generator for creating fingerprints
307    generator: FingerprintGenerator,
308    /// Stored fingerprints with associated IDs
309    fingerprints: FxHashMap<String, ProofFingerprint>,
310}
311
312impl Default for FingerprintDatabase {
313    fn default() -> Self {
314        Self::new()
315    }
316}
317
318impl FingerprintDatabase {
319    /// Create a new fingerprint database.
320    pub fn new() -> Self {
321        Self {
322            generator: FingerprintGenerator::new(),
323            fingerprints: FxHashMap::default(),
324        }
325    }
326
327    /// Create with a custom generator.
328    pub fn with_generator(generator: FingerprintGenerator) -> Self {
329        Self {
330            generator,
331            fingerprints: FxHashMap::default(),
332        }
333    }
334
335    /// Add a proof to the database.
336    pub fn add_proof(&mut self, id: String, proof: &Proof) {
337        let fingerprint = self.generator.generate(proof);
338        self.fingerprints.insert(id, fingerprint);
339    }
340
341    /// Find similar proofs in the database.
342    pub fn find_similar(&self, proof: &Proof, threshold: f64) -> Vec<(String, f64)> {
343        let query_fp = self.generator.generate(proof);
344        let mut results = Vec::new();
345
346        for (id, fp) in &self.fingerprints {
347            let score = self.generator.similarity_score(&query_fp, fp);
348            if score >= threshold {
349                results.push((id.clone(), score));
350            }
351        }
352
353        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
354        results
355    }
356
357    /// Get the number of stored fingerprints.
358    pub fn len(&self) -> usize {
359        self.fingerprints.len()
360    }
361
362    /// Check if the database is empty.
363    pub fn is_empty(&self) -> bool {
364        self.fingerprints.is_empty()
365    }
366
367    /// Clear the database.
368    pub fn clear(&mut self) {
369        self.fingerprints.clear();
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376
377    #[test]
378    fn test_fingerprint_generator_new() {
379        let generator = FingerprintGenerator::new();
380        assert_eq!(generator.bloom_size, 256);
381        assert_eq!(generator.num_hash_functions, 3);
382    }
383
384    #[test]
385    fn test_fingerprint_generator_with_settings() {
386        let generator = FingerprintGenerator::new()
387            .with_bloom_size(512)
388            .with_hash_functions(5);
389        assert_eq!(generator.bloom_size, 512);
390        assert_eq!(generator.num_hash_functions, 5);
391    }
392
393    #[test]
394    fn test_generate_fingerprint_empty_proof() {
395        let generator = FingerprintGenerator::new();
396        let proof = Proof::new();
397        let fp = generator.generate(&proof);
398        assert_eq!(fp.size_features.num_nodes, 0);
399        assert_eq!(fp.size_features.max_depth, 0);
400        assert_eq!(fp.size_features.num_axioms, 0);
401    }
402
403    #[test]
404    fn test_fingerprint_display() {
405        let fp = ProofFingerprint {
406            structural_hash: 0x1234567890abcdef,
407            rule_hash: 0xfedcba0987654321,
408            size_features: SizeFeatures {
409                num_nodes: 10,
410                max_depth: 5,
411                num_axioms: 2,
412                num_unique_rules: 3,
413            },
414            bloom_filter: vec![0, 0, 0, 0],
415        };
416        let display = fp.to_string();
417        assert!(display.contains("1234567890abcdef"));
418        assert!(display.contains("nodes:10"));
419        assert!(display.contains("depth:5"));
420    }
421
422    #[test]
423    fn test_identical_proofs_same_fingerprint() {
424        let generator = FingerprintGenerator::new();
425        let mut proof = Proof::new();
426        proof.add_axiom("x = x");
427
428        let fp1 = generator.generate(&proof);
429        let fp2 = generator.generate(&proof);
430
431        assert_eq!(fp1, fp2);
432    }
433
434    #[test]
435    fn test_similarity_score_identical() {
436        let generator = FingerprintGenerator::new();
437        let proof = Proof::new();
438        let fp = generator.generate(&proof);
439
440        let score = generator.similarity_score(&fp, &fp);
441        assert_eq!(score, 1.0);
442    }
443
444    #[test]
445    fn test_size_features_similar() {
446        let generator = FingerprintGenerator::new();
447        let sf1 = SizeFeatures {
448            num_nodes: 100,
449            max_depth: 10,
450            num_axioms: 5,
451            num_unique_rules: 3,
452        };
453        let sf2 = SizeFeatures {
454            num_nodes: 105,
455            max_depth: 11,
456            num_axioms: 5,
457            num_unique_rules: 3,
458        };
459        assert!(generator.size_features_similar(&sf1, &sf2));
460    }
461
462    #[test]
463    fn test_size_features_dissimilar() {
464        let generator = FingerprintGenerator::new();
465        let sf1 = SizeFeatures {
466            num_nodes: 100,
467            max_depth: 10,
468            num_axioms: 5,
469            num_unique_rules: 3,
470        };
471        let sf2 = SizeFeatures {
472            num_nodes: 200,
473            max_depth: 10,
474            num_axioms: 5,
475            num_unique_rules: 3,
476        };
477        assert!(!generator.size_features_similar(&sf1, &sf2));
478    }
479
480    #[test]
481    fn test_fingerprint_database_new() {
482        let db = FingerprintDatabase::new();
483        assert_eq!(db.len(), 0);
484        assert!(db.is_empty());
485    }
486
487    #[test]
488    fn test_fingerprint_database_add() {
489        let mut db = FingerprintDatabase::new();
490        let proof = Proof::new();
491        db.add_proof("proof1".to_string(), &proof);
492        assert_eq!(db.len(), 1);
493        assert!(!db.is_empty());
494    }
495
496    #[test]
497    fn test_fingerprint_database_find_similar() {
498        let mut db = FingerprintDatabase::new();
499        let proof1 = Proof::new();
500        let proof2 = Proof::new();
501
502        db.add_proof("proof1".to_string(), &proof1);
503
504        let similar = db.find_similar(&proof2, 0.9);
505        assert_eq!(similar.len(), 1);
506        assert_eq!(similar[0].0, "proof1");
507        assert_eq!(similar[0].1, 1.0);
508    }
509
510    #[test]
511    fn test_fingerprint_database_clear() {
512        let mut db = FingerprintDatabase::new();
513        let proof = Proof::new();
514        db.add_proof("proof1".to_string(), &proof);
515        db.clear();
516        assert!(db.is_empty());
517    }
518}