1use crate::proof::{Proof, ProofStep};
7use rustc_hash::{FxHashMap, FxHashSet};
8use std::collections::hash_map::DefaultHasher;
9use std::fmt;
10use std::hash::{Hash, Hasher};
11
12#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
15pub struct ProofFingerprint {
16 pub structural_hash: u64,
18 pub rule_hash: u64,
20 pub size_features: SizeFeatures,
22 pub bloom_filter: Vec<u64>,
24}
25
26#[derive(Debug, Clone, PartialEq, Eq, Hash)]
28#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
29pub struct SizeFeatures {
30 pub num_nodes: usize,
32 pub max_depth: usize,
34 pub num_axioms: usize,
36 pub num_unique_rules: usize,
38}
39
40impl fmt::Display for ProofFingerprint {
41 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
42 write!(
43 f,
44 "Fingerprint[struct:{:016x}, rules:{:016x}, nodes:{}, depth:{}]",
45 self.structural_hash,
46 self.rule_hash,
47 self.size_features.num_nodes,
48 self.size_features.max_depth
49 )
50 }
51}
52
53pub struct FingerprintGenerator {
55 bloom_size: usize,
57 num_hash_functions: usize,
59}
60
61impl Default for FingerprintGenerator {
62 fn default() -> Self {
63 Self::new()
64 }
65}
66
67impl FingerprintGenerator {
68 pub fn new() -> Self {
70 Self {
71 bloom_size: 256,
72 num_hash_functions: 3,
73 }
74 }
75
76 pub fn with_bloom_size(mut self, size: usize) -> Self {
78 self.bloom_size = size;
79 self
80 }
81
82 pub fn with_hash_functions(mut self, num: usize) -> Self {
84 self.num_hash_functions = num;
85 self
86 }
87
88 pub fn generate(&self, proof: &Proof) -> ProofFingerprint {
90 let structural_hash = self.compute_structural_hash(proof);
91 let rule_hash = self.compute_rule_hash(proof);
92 let size_features = self.extract_size_features(proof);
93 let bloom_filter = self.create_bloom_filter(proof);
94
95 ProofFingerprint {
96 structural_hash,
97 rule_hash,
98 size_features,
99 bloom_filter,
100 }
101 }
102
103 pub fn are_similar(&self, fp1: &ProofFingerprint, fp2: &ProofFingerprint) -> bool {
105 if fp1.structural_hash == fp2.structural_hash {
107 return true;
108 }
109
110 if !self.size_features_similar(&fp1.size_features, &fp2.size_features) {
112 return false;
113 }
114
115 self.bloom_overlap(&fp1.bloom_filter, &fp2.bloom_filter) > 0.7
117 }
118
119 pub fn similarity_score(&self, fp1: &ProofFingerprint, fp2: &ProofFingerprint) -> f64 {
121 if fp1 == fp2 {
122 return 1.0;
123 }
124
125 let mut score = 0.0;
126
127 if fp1.structural_hash == fp2.structural_hash {
129 score += 0.4;
130 }
131
132 if fp1.rule_hash == fp2.rule_hash {
134 score += 0.3;
135 }
136
137 let size_sim = self.compute_size_similarity(&fp1.size_features, &fp2.size_features);
139 score += 0.1 * size_sim;
140
141 let bloom_sim = self.bloom_overlap(&fp1.bloom_filter, &fp2.bloom_filter);
143 score += 0.2 * bloom_sim;
144
145 score
146 }
147
148 fn compute_structural_hash(&self, proof: &Proof) -> u64 {
150 let mut hasher = DefaultHasher::new();
151
152 for node in proof.nodes() {
154 match &node.step {
155 ProofStep::Axiom { conclusion } => {
156 "axiom".hash(&mut hasher);
157 conclusion.hash(&mut hasher);
158 }
159 ProofStep::Inference { rule, premises, .. } => {
160 "inference".hash(&mut hasher);
161 rule.hash(&mut hasher);
162 premises.len().hash(&mut hasher);
163 }
164 }
165 }
166
167 hasher.finish()
168 }
169
170 fn compute_rule_hash(&self, proof: &Proof) -> u64 {
172 let mut hasher = DefaultHasher::new();
173 let mut rules = Vec::new();
174
175 for node in proof.nodes() {
176 if let ProofStep::Inference { rule, .. } = &node.step {
177 rules.push(rule.clone());
178 }
179 }
180
181 rules.sort();
182 rules.dedup();
183 rules.hash(&mut hasher);
184
185 hasher.finish()
186 }
187
188 fn extract_size_features(&self, proof: &Proof) -> SizeFeatures {
190 let num_nodes = proof.len();
191 let max_depth = proof.nodes().iter().map(|n| n.depth).max().unwrap_or(0);
192 let num_axioms = proof
193 .nodes()
194 .iter()
195 .filter(|n| matches!(n.step, ProofStep::Axiom { .. }))
196 .count();
197
198 let mut unique_rules = FxHashSet::default();
199 for node in proof.nodes() {
200 if let ProofStep::Inference { rule, .. } = &node.step {
201 unique_rules.insert(rule.clone());
202 }
203 }
204
205 SizeFeatures {
206 num_nodes,
207 max_depth: max_depth as usize,
208 num_axioms,
209 num_unique_rules: unique_rules.len(),
210 }
211 }
212
213 fn create_bloom_filter(&self, proof: &Proof) -> Vec<u64> {
215 let num_words = self.bloom_size.div_ceil(64);
216 let mut bloom = vec![0u64; num_words];
217
218 for node in proof.nodes() {
219 self.bloom_add(&mut bloom, node.conclusion());
221
222 if let ProofStep::Inference { rule, .. } = &node.step {
224 self.bloom_add(&mut bloom, rule);
225 }
226 }
227
228 bloom
229 }
230
231 fn bloom_add(&self, bloom: &mut [u64], element: &str) {
233 for i in 0..self.num_hash_functions {
234 let hash = self.hash_with_seed(element, i as u64);
235 let bit_index = (hash as usize) % (bloom.len() * 64);
236 let word_index = bit_index / 64;
237 let bit_offset = bit_index % 64;
238 bloom[word_index] |= 1u64 << bit_offset;
239 }
240 }
241
242 fn hash_with_seed(&self, element: &str, seed: u64) -> u64 {
244 let mut hasher = DefaultHasher::new();
245 seed.hash(&mut hasher);
246 element.hash(&mut hasher);
247 hasher.finish()
248 }
249
250 fn size_features_similar(&self, sf1: &SizeFeatures, sf2: &SizeFeatures) -> bool {
252 let size_ratio = (sf1.num_nodes as f64) / (sf2.num_nodes.max(1) as f64);
254 if !(0.8..=1.2).contains(&size_ratio) {
255 return false;
256 }
257
258 let depth_ratio = (sf1.max_depth as f64) / (sf2.max_depth.max(1) as f64);
260 if !(0.7..=1.3).contains(&depth_ratio) {
261 return false;
262 }
263
264 true
265 }
266
267 fn compute_size_similarity(&self, sf1: &SizeFeatures, sf2: &SizeFeatures) -> f64 {
269 let size_sim = 1.0
270 - ((sf1.num_nodes as f64 - sf2.num_nodes as f64).abs()
271 / sf1.num_nodes.max(sf2.num_nodes).max(1) as f64);
272 let depth_sim = 1.0
273 - ((sf1.max_depth as f64 - sf2.max_depth as f64).abs()
274 / sf1.max_depth.max(sf2.max_depth).max(1) as f64);
275 let axiom_sim = 1.0
276 - ((sf1.num_axioms as f64 - sf2.num_axioms as f64).abs()
277 / sf1.num_axioms.max(sf2.num_axioms).max(1) as f64);
278
279 (size_sim + depth_sim + axiom_sim) / 3.0
280 }
281
282 fn bloom_overlap(&self, bloom1: &[u64], bloom2: &[u64]) -> f64 {
284 if bloom1.len() != bloom2.len() {
285 return 0.0;
286 }
287
288 let mut intersection_bits = 0;
289 let mut union_bits = 0;
290
291 for (&b1, &b2) in bloom1.iter().zip(bloom2.iter()) {
292 intersection_bits += (b1 & b2).count_ones();
293 union_bits += (b1 | b2).count_ones();
294 }
295
296 if union_bits == 0 {
297 return 1.0;
298 }
299
300 (intersection_bits as f64) / (union_bits as f64)
301 }
302}
303
304pub struct FingerprintDatabase {
306 generator: FingerprintGenerator,
308 fingerprints: FxHashMap<String, ProofFingerprint>,
310}
311
312impl Default for FingerprintDatabase {
313 fn default() -> Self {
314 Self::new()
315 }
316}
317
318impl FingerprintDatabase {
319 pub fn new() -> Self {
321 Self {
322 generator: FingerprintGenerator::new(),
323 fingerprints: FxHashMap::default(),
324 }
325 }
326
327 pub fn with_generator(generator: FingerprintGenerator) -> Self {
329 Self {
330 generator,
331 fingerprints: FxHashMap::default(),
332 }
333 }
334
335 pub fn add_proof(&mut self, id: String, proof: &Proof) {
337 let fingerprint = self.generator.generate(proof);
338 self.fingerprints.insert(id, fingerprint);
339 }
340
341 pub fn find_similar(&self, proof: &Proof, threshold: f64) -> Vec<(String, f64)> {
343 let query_fp = self.generator.generate(proof);
344 let mut results = Vec::new();
345
346 for (id, fp) in &self.fingerprints {
347 let score = self.generator.similarity_score(&query_fp, fp);
348 if score >= threshold {
349 results.push((id.clone(), score));
350 }
351 }
352
353 results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
354 results
355 }
356
357 pub fn len(&self) -> usize {
359 self.fingerprints.len()
360 }
361
362 pub fn is_empty(&self) -> bool {
364 self.fingerprints.is_empty()
365 }
366
367 pub fn clear(&mut self) {
369 self.fingerprints.clear();
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376
377 #[test]
378 fn test_fingerprint_generator_new() {
379 let generator = FingerprintGenerator::new();
380 assert_eq!(generator.bloom_size, 256);
381 assert_eq!(generator.num_hash_functions, 3);
382 }
383
384 #[test]
385 fn test_fingerprint_generator_with_settings() {
386 let generator = FingerprintGenerator::new()
387 .with_bloom_size(512)
388 .with_hash_functions(5);
389 assert_eq!(generator.bloom_size, 512);
390 assert_eq!(generator.num_hash_functions, 5);
391 }
392
393 #[test]
394 fn test_generate_fingerprint_empty_proof() {
395 let generator = FingerprintGenerator::new();
396 let proof = Proof::new();
397 let fp = generator.generate(&proof);
398 assert_eq!(fp.size_features.num_nodes, 0);
399 assert_eq!(fp.size_features.max_depth, 0);
400 assert_eq!(fp.size_features.num_axioms, 0);
401 }
402
403 #[test]
404 fn test_fingerprint_display() {
405 let fp = ProofFingerprint {
406 structural_hash: 0x1234567890abcdef,
407 rule_hash: 0xfedcba0987654321,
408 size_features: SizeFeatures {
409 num_nodes: 10,
410 max_depth: 5,
411 num_axioms: 2,
412 num_unique_rules: 3,
413 },
414 bloom_filter: vec![0, 0, 0, 0],
415 };
416 let display = fp.to_string();
417 assert!(display.contains("1234567890abcdef"));
418 assert!(display.contains("nodes:10"));
419 assert!(display.contains("depth:5"));
420 }
421
422 #[test]
423 fn test_identical_proofs_same_fingerprint() {
424 let generator = FingerprintGenerator::new();
425 let mut proof = Proof::new();
426 proof.add_axiom("x = x");
427
428 let fp1 = generator.generate(&proof);
429 let fp2 = generator.generate(&proof);
430
431 assert_eq!(fp1, fp2);
432 }
433
434 #[test]
435 fn test_similarity_score_identical() {
436 let generator = FingerprintGenerator::new();
437 let proof = Proof::new();
438 let fp = generator.generate(&proof);
439
440 let score = generator.similarity_score(&fp, &fp);
441 assert_eq!(score, 1.0);
442 }
443
444 #[test]
445 fn test_size_features_similar() {
446 let generator = FingerprintGenerator::new();
447 let sf1 = SizeFeatures {
448 num_nodes: 100,
449 max_depth: 10,
450 num_axioms: 5,
451 num_unique_rules: 3,
452 };
453 let sf2 = SizeFeatures {
454 num_nodes: 105,
455 max_depth: 11,
456 num_axioms: 5,
457 num_unique_rules: 3,
458 };
459 assert!(generator.size_features_similar(&sf1, &sf2));
460 }
461
462 #[test]
463 fn test_size_features_dissimilar() {
464 let generator = FingerprintGenerator::new();
465 let sf1 = SizeFeatures {
466 num_nodes: 100,
467 max_depth: 10,
468 num_axioms: 5,
469 num_unique_rules: 3,
470 };
471 let sf2 = SizeFeatures {
472 num_nodes: 200,
473 max_depth: 10,
474 num_axioms: 5,
475 num_unique_rules: 3,
476 };
477 assert!(!generator.size_features_similar(&sf1, &sf2));
478 }
479
480 #[test]
481 fn test_fingerprint_database_new() {
482 let db = FingerprintDatabase::new();
483 assert_eq!(db.len(), 0);
484 assert!(db.is_empty());
485 }
486
487 #[test]
488 fn test_fingerprint_database_add() {
489 let mut db = FingerprintDatabase::new();
490 let proof = Proof::new();
491 db.add_proof("proof1".to_string(), &proof);
492 assert_eq!(db.len(), 1);
493 assert!(!db.is_empty());
494 }
495
496 #[test]
497 fn test_fingerprint_database_find_similar() {
498 let mut db = FingerprintDatabase::new();
499 let proof1 = Proof::new();
500 let proof2 = Proof::new();
501
502 db.add_proof("proof1".to_string(), &proof1);
503
504 let similar = db.find_similar(&proof2, 0.9);
505 assert_eq!(similar.len(), 1);
506 assert_eq!(similar[0].0, "proof1");
507 assert_eq!(similar[0].1, 1.0);
508 }
509
510 #[test]
511 fn test_fingerprint_database_clear() {
512 let mut db = FingerprintDatabase::new();
513 let proof = Proof::new();
514 db.add_proof("proof1".to_string(), &proof);
515 db.clear();
516 assert!(db.is_empty());
517 }
518}