1use std::collections::{HashMap, HashSet};
14
15#[derive(Debug, Clone, PartialEq)]
21pub struct ProvenancedTriple {
22 pub subject: String,
24 pub predicate: String,
26 pub object: String,
28 pub source_id: String,
30 pub confidence: f64,
32 pub timestamp: u64,
34}
35
36impl ProvenancedTriple {
37 pub fn new(
39 subject: impl Into<String>,
40 predicate: impl Into<String>,
41 object: impl Into<String>,
42 source_id: impl Into<String>,
43 confidence: f64,
44 ) -> Self {
45 Self {
46 subject: subject.into(),
47 predicate: predicate.into(),
48 object: object.into(),
49 source_id: source_id.into(),
50 confidence: confidence.clamp(0.0, 1.0),
51 timestamp: 0,
52 }
53 }
54
55 pub fn with_timestamp(mut self, ts: u64) -> Self {
57 self.timestamp = ts;
58 self
59 }
60
61 pub fn triple_key(&self) -> (String, String, String) {
63 (
64 self.subject.clone(),
65 self.predicate.clone(),
66 self.object.clone(),
67 )
68 }
69}
70
71#[derive(Debug, Clone)]
73pub struct SourceQuality {
74 pub source_id: String,
76 pub accuracy: f64,
78 pub completeness: f64,
80 pub timeliness: f64,
82 pub overall: f64,
84}
85
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
88pub enum ConflictStrategy {
89 Voting,
91 Recency,
93 Authority,
95 AverageConfidence,
97}
98
99#[derive(Debug, Clone)]
101pub struct FusionConfig {
102 pub conflict_strategy: ConflictStrategy,
104 pub min_confidence: f64,
106 pub accuracy_weight: f64,
108 pub completeness_weight: f64,
110 pub timeliness_weight: f64,
112 pub entity_alignment_threshold: f64,
114}
115
116impl Default for FusionConfig {
117 fn default() -> Self {
118 Self {
119 conflict_strategy: ConflictStrategy::Voting,
120 min_confidence: 0.3,
121 accuracy_weight: 0.5,
122 completeness_weight: 0.3,
123 timeliness_weight: 0.2,
124 entity_alignment_threshold: 0.8,
125 }
126 }
127}
128
129#[derive(Debug, Clone)]
131pub struct FusedTriple {
132 pub subject: String,
134 pub predicate: String,
136 pub object: String,
138 pub confidence: f64,
140 pub sources: Vec<String>,
142 pub support_count: usize,
144}
145
146#[derive(Debug, Clone)]
148pub struct FusionStats {
149 pub input_triple_count: usize,
151 pub unique_triple_keys: usize,
153 pub output_triple_count: usize,
155 pub conflicts_resolved: usize,
157 pub source_count: usize,
159 pub mean_confidence: f64,
161 pub aligned_entity_pairs: usize,
163}
164
165#[derive(Debug, Clone)]
167pub struct FusionResult {
168 pub triples: Vec<FusedTriple>,
170 pub stats: FusionStats,
172 pub provenance: HashMap<(String, String, String), Vec<String>>,
174}
175
176pub struct KnowledgeFusion {
182 config: FusionConfig,
183 sources: HashMap<String, SourceQuality>,
184 total_fusions: u64,
185}
186
187impl KnowledgeFusion {
188 pub fn new(config: FusionConfig) -> Self {
190 Self {
191 config,
192 sources: HashMap::new(),
193 total_fusions: 0,
194 }
195 }
196
197 pub fn register_source(
199 &mut self,
200 source_id: impl Into<String>,
201 accuracy: f64,
202 completeness: f64,
203 timeliness: f64,
204 ) {
205 let source_id = source_id.into();
206 let overall = self.config.accuracy_weight * accuracy
207 + self.config.completeness_weight * completeness
208 + self.config.timeliness_weight * timeliness;
209 self.sources.insert(
210 source_id.clone(),
211 SourceQuality {
212 source_id,
213 accuracy: accuracy.clamp(0.0, 1.0),
214 completeness: completeness.clamp(0.0, 1.0),
215 timeliness: timeliness.clamp(0.0, 1.0),
216 overall: overall.clamp(0.0, 1.0),
217 },
218 );
219 }
220
221 pub fn fuse(&mut self, triples: &[ProvenancedTriple]) -> FusionResult {
223 let mut groups: HashMap<(String, String, String), Vec<&ProvenancedTriple>> = HashMap::new();
225 for t in triples {
226 groups.entry(t.triple_key()).or_default().push(t);
227 }
228
229 let unique_keys = groups.len();
230 let source_ids: HashSet<&str> = triples.iter().map(|t| t.source_id.as_str()).collect();
231 let source_count = source_ids.len();
232
233 let mut fused_triples: Vec<FusedTriple> = Vec::new();
234 let mut provenance_map: HashMap<(String, String, String), Vec<String>> = HashMap::new();
235 let mut conflicts_resolved = 0;
236
237 for (key, group) in &groups {
238 let sources_for_key: Vec<String> = group.iter().map(|t| t.source_id.clone()).collect();
239 provenance_map.insert(key.clone(), sources_for_key.clone());
240
241 let fused_confidence = self.resolve_confidence(group);
247
248 if fused_confidence >= self.config.min_confidence {
249 let support = group.len();
250 fused_triples.push(FusedTriple {
251 subject: key.0.clone(),
252 predicate: key.1.clone(),
253 object: key.2.clone(),
254 confidence: fused_confidence,
255 sources: sources_for_key,
256 support_count: support,
257 });
258 }
259 }
260
261 let mut sp_map: HashMap<(String, String), Vec<String>> = HashMap::new();
263 for ft in &fused_triples {
264 sp_map
265 .entry((ft.subject.clone(), ft.predicate.clone()))
266 .or_default()
267 .push(ft.object.clone());
268 }
269
270 let resolved_triples = fused_triples.clone();
272 for ((_s, _p), objects) in &sp_map {
273 if objects.len() > 1 {
274 conflicts_resolved += 1;
275 }
278 }
279
280 let mean_confidence = if resolved_triples.is_empty() {
281 0.0
282 } else {
283 resolved_triples.iter().map(|t| t.confidence).sum::<f64>()
284 / resolved_triples.len() as f64
285 };
286
287 self.total_fusions += 1;
288
289 FusionResult {
290 triples: resolved_triples,
291 stats: FusionStats {
292 input_triple_count: triples.len(),
293 unique_triple_keys: unique_keys,
294 output_triple_count: fused_triples.len(),
295 conflicts_resolved,
296 source_count,
297 mean_confidence,
298 aligned_entity_pairs: 0,
299 },
300 provenance: provenance_map,
301 }
302 }
303
304 pub fn fuse_incremental(
306 &mut self,
307 existing: &[FusedTriple],
308 new_triples: &[ProvenancedTriple],
309 ) -> FusionResult {
310 let mut all: Vec<ProvenancedTriple> = Vec::new();
312 for ft in existing {
313 let source_id = ft
314 .sources
315 .first()
316 .cloned()
317 .unwrap_or_else(|| "unknown".to_string());
318 all.push(ProvenancedTriple::new(
319 &ft.subject,
320 &ft.predicate,
321 &ft.object,
322 source_id,
323 ft.confidence,
324 ));
325 }
326 all.extend(new_triples.iter().cloned());
327 self.fuse(&all)
328 }
329
330 pub fn align_entities(&self, triples: &[ProvenancedTriple]) -> Vec<(String, String)> {
332 let mut entities_by_source: HashMap<&str, HashSet<&str>> = HashMap::new();
334 for t in triples {
335 entities_by_source
336 .entry(&t.source_id)
337 .or_default()
338 .insert(&t.subject);
339 }
340
341 let source_ids: Vec<&&str> = entities_by_source.keys().collect();
342 let mut alignments: Vec<(String, String)> = Vec::new();
343
344 for i in 0..source_ids.len() {
345 for j in (i + 1)..source_ids.len() {
346 let entities_a = &entities_by_source[source_ids[i]];
347 let entities_b = &entities_by_source[source_ids[j]];
348 for &ea in entities_a {
349 for &eb in entities_b {
350 let sim = normalized_levenshtein(ea, eb);
351 if sim >= self.config.entity_alignment_threshold && ea != eb {
352 alignments.push((ea.to_string(), eb.to_string()));
353 }
354 }
355 }
356 }
357 }
358 alignments
359 }
360
361 pub fn source_quality(&self, source_id: &str) -> Option<&SourceQuality> {
363 self.sources.get(source_id)
364 }
365
366 pub fn total_fusions(&self) -> u64 {
368 self.total_fusions
369 }
370
371 pub fn registered_source_count(&self) -> usize {
373 self.sources.len()
374 }
375
376 fn resolve_confidence(&self, group: &[&ProvenancedTriple]) -> f64 {
380 match self.config.conflict_strategy {
381 ConflictStrategy::Voting => {
382 let max_possible = self.sources.len().max(group.len()) as f64;
384 if max_possible == 0.0 {
385 group.iter().map(|t| t.confidence).sum::<f64>() / group.len().max(1) as f64
386 } else {
387 group.len() as f64 / max_possible
388 }
389 }
390 ConflictStrategy::Recency => {
391 group
393 .iter()
394 .max_by_key(|t| t.timestamp)
395 .map(|t| t.confidence)
396 .unwrap_or(0.0)
397 }
398 ConflictStrategy::Authority => {
399 group
401 .iter()
402 .filter_map(|t| {
403 self.sources
404 .get(&t.source_id)
405 .map(|sq| sq.overall * t.confidence)
406 })
407 .fold(0.0_f64, f64::max)
408 .max(
409 group.iter().map(|t| t.confidence).fold(0.0_f64, f64::max),
411 )
412 }
413 ConflictStrategy::AverageConfidence => {
414 let sum: f64 = group.iter().map(|t| t.confidence).sum();
415 sum / group.len().max(1) as f64
416 }
417 }
418 }
419}
420
421fn normalized_levenshtein(a: &str, b: &str) -> f64 {
427 if a == b {
428 return 1.0;
429 }
430 let max_len = a.len().max(b.len());
431 if max_len == 0 {
432 return 1.0;
433 }
434 let dist = levenshtein_distance(a, b);
435 1.0 - (dist as f64 / max_len as f64)
436}
437
438fn levenshtein_distance(a: &str, b: &str) -> usize {
440 let a_chars: Vec<char> = a.chars().collect();
441 let b_chars: Vec<char> = b.chars().collect();
442 let m = a_chars.len();
443 let n = b_chars.len();
444
445 let mut prev = (0..=n).collect::<Vec<usize>>();
446 let mut curr = vec![0; n + 1];
447
448 for i in 1..=m {
449 curr[0] = i;
450 for j in 1..=n {
451 let cost = if a_chars[i - 1] == b_chars[j - 1] {
452 0
453 } else {
454 1
455 };
456 curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
457 }
458 std::mem::swap(&mut prev, &mut curr);
459 }
460 prev[n]
461}
462
463#[cfg(test)]
468mod tests {
469 use super::*;
470
471 fn default_fusion() -> KnowledgeFusion {
472 KnowledgeFusion::new(FusionConfig::default())
473 }
474
475 fn sample_triples() -> Vec<ProvenancedTriple> {
476 vec![
477 ProvenancedTriple::new("Alice", "knows", "Bob", "src1", 0.9),
478 ProvenancedTriple::new("Alice", "knows", "Bob", "src2", 0.8),
479 ProvenancedTriple::new("Bob", "likes", "Music", "src1", 0.7),
480 ]
481 }
482
483 #[test]
486 fn test_provenanced_triple_creation() {
487 let t = ProvenancedTriple::new("A", "B", "C", "src", 0.5);
488 assert_eq!(t.subject, "A");
489 assert_eq!(t.predicate, "B");
490 assert_eq!(t.object, "C");
491 assert_eq!(t.source_id, "src");
492 assert!((t.confidence - 0.5).abs() < 1e-10);
493 }
494
495 #[test]
496 fn test_confidence_clamped() {
497 let t = ProvenancedTriple::new("A", "B", "C", "src", 1.5);
498 assert!((t.confidence - 1.0).abs() < 1e-10);
499 }
500
501 #[test]
502 fn test_triple_key() {
503 let t = ProvenancedTriple::new("A", "B", "C", "src", 0.5);
504 assert_eq!(
505 t.triple_key(),
506 ("A".to_string(), "B".to_string(), "C".to_string())
507 );
508 }
509
510 #[test]
511 fn test_with_timestamp() {
512 let t = ProvenancedTriple::new("A", "B", "C", "src", 0.5).with_timestamp(1000);
513 assert_eq!(t.timestamp, 1000);
514 }
515
516 #[test]
519 fn test_register_source() {
520 let mut f = default_fusion();
521 f.register_source("src1", 0.9, 0.8, 0.7);
522 assert_eq!(f.registered_source_count(), 1);
523 }
524
525 #[test]
526 fn test_source_quality_retrieval() {
527 let mut f = default_fusion();
528 f.register_source("src1", 0.9, 0.8, 0.7);
529 let q = f.source_quality("src1").expect("should exist");
530 assert!((q.accuracy - 0.9).abs() < 1e-10);
531 assert!((q.completeness - 0.8).abs() < 1e-10);
532 }
533
534 #[test]
535 fn test_source_quality_overall() {
536 let mut f = default_fusion();
537 f.register_source("src1", 1.0, 1.0, 1.0);
538 let q = f.source_quality("src1").expect("should exist");
539 assert!((q.overall - 1.0).abs() < 1e-10);
541 }
542
543 #[test]
544 fn test_unknown_source_returns_none() {
545 let f = default_fusion();
546 assert!(f.source_quality("nonexistent").is_none());
547 }
548
549 #[test]
552 fn test_fuse_deduplicates() {
553 let mut f = default_fusion();
554 let triples = sample_triples();
555 let result = f.fuse(&triples);
556 assert_eq!(result.stats.unique_triple_keys, 2);
558 }
559
560 #[test]
561 fn test_fuse_input_count() {
562 let mut f = default_fusion();
563 let result = f.fuse(&sample_triples());
564 assert_eq!(result.stats.input_triple_count, 3);
565 }
566
567 #[test]
568 fn test_fuse_source_count() {
569 let mut f = default_fusion();
570 let result = f.fuse(&sample_triples());
571 assert_eq!(result.stats.source_count, 2); }
573
574 #[test]
575 fn test_fused_triple_has_support_count() {
576 let mut f = default_fusion();
577 let result = f.fuse(&sample_triples());
578 let alice_bob = result
580 .triples
581 .iter()
582 .find(|t| t.subject == "Alice" && t.object == "Bob");
583 assert!(alice_bob.is_some());
584 assert_eq!(alice_bob.map(|t| t.support_count).unwrap_or(0), 2);
585 }
586
587 #[test]
588 fn test_fused_triple_has_sources() {
589 let mut f = default_fusion();
590 let result = f.fuse(&sample_triples());
591 let alice_bob = result
592 .triples
593 .iter()
594 .find(|t| t.subject == "Alice" && t.object == "Bob")
595 .expect("should find fused triple");
596 assert!(alice_bob.sources.contains(&"src1".to_string()));
597 assert!(alice_bob.sources.contains(&"src2".to_string()));
598 }
599
600 #[test]
603 fn test_provenance_map_populated() {
604 let mut f = default_fusion();
605 let result = f.fuse(&sample_triples());
606 let key = ("Alice".to_string(), "knows".to_string(), "Bob".to_string());
607 let sources = result.provenance.get(&key).expect("should have provenance");
608 assert_eq!(sources.len(), 2);
609 }
610
611 #[test]
614 fn test_voting_strategy() {
615 let mut f = KnowledgeFusion::new(FusionConfig {
616 conflict_strategy: ConflictStrategy::Voting,
617 min_confidence: 0.0,
618 ..FusionConfig::default()
619 });
620 let result = f.fuse(&sample_triples());
621 assert!(!result.triples.is_empty());
622 }
623
624 #[test]
625 fn test_recency_strategy() {
626 let mut f = KnowledgeFusion::new(FusionConfig {
627 conflict_strategy: ConflictStrategy::Recency,
628 min_confidence: 0.0,
629 ..FusionConfig::default()
630 });
631 let triples = vec![
632 ProvenancedTriple::new("A", "p", "B", "s1", 0.5).with_timestamp(100),
633 ProvenancedTriple::new("A", "p", "B", "s2", 0.9).with_timestamp(200),
634 ];
635 let result = f.fuse(&triples);
636 let fused = &result.triples[0];
638 assert!((fused.confidence - 0.9).abs() < 1e-10);
639 }
640
641 #[test]
642 fn test_authority_strategy() {
643 let mut f = KnowledgeFusion::new(FusionConfig {
644 conflict_strategy: ConflictStrategy::Authority,
645 min_confidence: 0.0,
646 ..FusionConfig::default()
647 });
648 f.register_source("high_quality", 1.0, 1.0, 1.0);
649 f.register_source("low_quality", 0.1, 0.1, 0.1);
650 let triples = vec![
651 ProvenancedTriple::new("A", "p", "B", "high_quality", 0.8),
652 ProvenancedTriple::new("A", "p", "B", "low_quality", 0.8),
653 ];
654 let result = f.fuse(&triples);
655 assert!(!result.triples.is_empty());
656 assert!(result.triples[0].confidence > 0.0);
658 }
659
660 #[test]
661 fn test_average_confidence_strategy() {
662 let mut f = KnowledgeFusion::new(FusionConfig {
663 conflict_strategy: ConflictStrategy::AverageConfidence,
664 min_confidence: 0.0,
665 ..FusionConfig::default()
666 });
667 let triples = vec![
668 ProvenancedTriple::new("A", "p", "B", "s1", 0.6),
669 ProvenancedTriple::new("A", "p", "B", "s2", 0.8),
670 ];
671 let result = f.fuse(&triples);
672 assert!((result.triples[0].confidence - 0.7).abs() < 1e-10);
673 }
674
675 #[test]
678 fn test_min_confidence_filters_low() {
679 let mut f = KnowledgeFusion::new(FusionConfig {
680 conflict_strategy: ConflictStrategy::AverageConfidence,
681 min_confidence: 0.8,
682 ..FusionConfig::default()
683 });
684 let triples = vec![ProvenancedTriple::new("A", "p", "B", "s1", 0.3)];
685 let result = f.fuse(&triples);
686 assert!(
687 result.triples.is_empty(),
688 "low confidence should be filtered"
689 );
690 }
691
692 #[test]
695 fn test_incremental_fusion() {
696 let mut f = KnowledgeFusion::new(FusionConfig {
697 conflict_strategy: ConflictStrategy::AverageConfidence,
698 min_confidence: 0.0,
699 ..FusionConfig::default()
700 });
701
702 let existing = vec![FusedTriple {
703 subject: "A".into(),
704 predicate: "p".into(),
705 object: "B".into(),
706 confidence: 0.8,
707 sources: vec!["s1".into()],
708 support_count: 1,
709 }];
710
711 let new_triples = vec![
712 ProvenancedTriple::new("A", "p", "B", "s2", 0.9),
713 ProvenancedTriple::new("C", "q", "D", "s2", 0.7),
714 ];
715
716 let result = f.fuse_incremental(&existing, &new_triples);
717 assert!(result.triples.len() >= 2, "should have at least 2 triples");
718 }
719
720 #[test]
721 fn test_incremental_increases_support() {
722 let mut f = KnowledgeFusion::new(FusionConfig {
723 conflict_strategy: ConflictStrategy::AverageConfidence,
724 min_confidence: 0.0,
725 ..FusionConfig::default()
726 });
727
728 let existing = vec![FusedTriple {
729 subject: "A".into(),
730 predicate: "p".into(),
731 object: "B".into(),
732 confidence: 0.8,
733 sources: vec!["s1".into()],
734 support_count: 1,
735 }];
736
737 let new = vec![ProvenancedTriple::new("A", "p", "B", "s2", 0.9)];
738 let result = f.fuse_incremental(&existing, &new);
739
740 let ab = result
741 .triples
742 .iter()
743 .find(|t| t.subject == "A" && t.object == "B")
744 .expect("should exist");
745 assert_eq!(ab.support_count, 2);
746 }
747
748 #[test]
751 fn test_align_entities_similar_names() {
752 let f = KnowledgeFusion::new(FusionConfig {
753 entity_alignment_threshold: 0.8,
754 ..FusionConfig::default()
755 });
756 let triples = vec![
757 ProvenancedTriple::new("Alice_Smith", "knows", "Bob", "s1", 0.9),
758 ProvenancedTriple::new("Alice_Smit", "knows", "Carol", "s2", 0.8),
759 ];
760 let alignments = f.align_entities(&triples);
761 assert!(!alignments.is_empty(), "should detect similar entity names");
763 }
764
765 #[test]
766 fn test_align_entities_exact_same_not_aligned() {
767 let f = default_fusion();
768 let triples = vec![
769 ProvenancedTriple::new("Alice", "knows", "Bob", "s1", 0.9),
770 ProvenancedTriple::new("Alice", "likes", "Carol", "s2", 0.8),
771 ];
772 let alignments = f.align_entities(&triples);
773 assert!(
775 alignments.is_empty(),
776 "exact same names should not produce alignment"
777 );
778 }
779
780 #[test]
781 fn test_align_entities_completely_different() {
782 let f = default_fusion();
783 let triples = vec![
784 ProvenancedTriple::new("Alice", "knows", "Bob", "s1", 0.9),
785 ProvenancedTriple::new("Xyz123", "likes", "Carol", "s2", 0.8),
786 ];
787 let alignments = f.align_entities(&triples);
788 assert!(alignments.is_empty());
789 }
790
791 #[test]
794 fn test_total_fusions_initially_zero() {
795 let f = default_fusion();
796 assert_eq!(f.total_fusions(), 0);
797 }
798
799 #[test]
800 fn test_total_fusions_increments() {
801 let mut f = default_fusion();
802 f.fuse(&sample_triples());
803 f.fuse(&sample_triples());
804 assert_eq!(f.total_fusions(), 2);
805 }
806
807 #[test]
810 fn test_fuse_empty() {
811 let mut f = default_fusion();
812 let result = f.fuse(&[]);
813 assert!(result.triples.is_empty());
814 assert_eq!(result.stats.input_triple_count, 0);
815 }
816
817 #[test]
820 fn test_config_default_values() {
821 let config = FusionConfig::default();
822 assert_eq!(config.conflict_strategy, ConflictStrategy::Voting);
823 assert!((config.min_confidence - 0.3).abs() < 1e-10);
824 assert!((config.entity_alignment_threshold - 0.8).abs() < 1e-10);
825 }
826
827 #[test]
830 fn test_fusion_stats_mean_confidence() {
831 let mut f = KnowledgeFusion::new(FusionConfig {
832 conflict_strategy: ConflictStrategy::AverageConfidence,
833 min_confidence: 0.0,
834 ..FusionConfig::default()
835 });
836 let triples = vec![
837 ProvenancedTriple::new("A", "p", "B", "s1", 0.6),
838 ProvenancedTriple::new("C", "q", "D", "s1", 0.8),
839 ];
840 let result = f.fuse(&triples);
841 assert!(result.stats.mean_confidence > 0.0);
842 }
843
844 #[test]
847 fn test_single_source_fusion() {
848 let mut f = KnowledgeFusion::new(FusionConfig {
849 conflict_strategy: ConflictStrategy::AverageConfidence,
850 min_confidence: 0.0,
851 ..FusionConfig::default()
852 });
853 let triples = vec![ProvenancedTriple::new("A", "p", "B", "s1", 0.9)];
854 let result = f.fuse(&triples);
855 assert_eq!(result.triples.len(), 1);
856 assert_eq!(result.stats.source_count, 1);
857 }
858}