oxirs_graphrag/
knowledge_fusion.rs

1//! Multi-source knowledge fusion.
2//!
3//! Merges knowledge from multiple sources into a unified graph:
4//! - Entity alignment across sources (same-entity detection)
5//! - Conflict resolution strategies (voting, recency, authority)
6//! - Provenance tracking (which source contributed which triple)
7//! - Confidence aggregation (combine confidence from multiple sources)
8//! - Source quality scoring (accuracy, completeness, timeliness)
9//! - Fused knowledge graph construction
10//! - Fusion statistics and reporting
11//! - Incremental fusion (add new source without full rebuild)
12
13use std::collections::{HashMap, HashSet};
14
15// ---------------------------------------------------------------------------
16// Public types
17// ---------------------------------------------------------------------------
18
19/// A knowledge triple with provenance and confidence metadata.
20#[derive(Debug, Clone, PartialEq)]
21pub struct ProvenancedTriple {
22    /// Subject.
23    pub subject: String,
24    /// Predicate.
25    pub predicate: String,
26    /// Object.
27    pub object: String,
28    /// Source identifier that contributed this triple.
29    pub source_id: String,
30    /// Confidence in [0, 1] from the originating source.
31    pub confidence: f64,
32    /// Timestamp of the triple (epoch seconds, 0 if unknown).
33    pub timestamp: u64,
34}
35
36impl ProvenancedTriple {
37    /// Create a new provenanced triple.
38    pub fn new(
39        subject: impl Into<String>,
40        predicate: impl Into<String>,
41        object: impl Into<String>,
42        source_id: impl Into<String>,
43        confidence: f64,
44    ) -> Self {
45        Self {
46            subject: subject.into(),
47            predicate: predicate.into(),
48            object: object.into(),
49            source_id: source_id.into(),
50            confidence: confidence.clamp(0.0, 1.0),
51            timestamp: 0,
52        }
53    }
54
55    /// Set the timestamp.
56    pub fn with_timestamp(mut self, ts: u64) -> Self {
57        self.timestamp = ts;
58        self
59    }
60
61    /// Canonical key for deduplication: (subject, predicate, object).
62    pub fn triple_key(&self) -> (String, String, String) {
63        (
64            self.subject.clone(),
65            self.predicate.clone(),
66            self.object.clone(),
67        )
68    }
69}
70
71/// Quality assessment of a knowledge source.
72#[derive(Debug, Clone)]
73pub struct SourceQuality {
74    /// Source identifier.
75    pub source_id: String,
76    /// Accuracy score in [0, 1].
77    pub accuracy: f64,
78    /// Completeness score in [0, 1].
79    pub completeness: f64,
80    /// Timeliness score in [0, 1].
81    pub timeliness: f64,
82    /// Overall quality = weighted combination.
83    pub overall: f64,
84}
85
86/// Strategy for resolving conflicts between sources.
87#[derive(Debug, Clone, Copy, PartialEq, Eq)]
88pub enum ConflictStrategy {
89    /// Majority voting: the value supported by most sources wins.
90    Voting,
91    /// Most recent triple (by timestamp) wins.
92    Recency,
93    /// Highest-authority source wins (by source quality).
94    Authority,
95    /// Average confidence across all agreeing sources.
96    AverageConfidence,
97}
98
99/// Configuration for the knowledge fusion engine.
100#[derive(Debug, Clone)]
101pub struct FusionConfig {
102    /// Strategy for resolving conflicting triples.
103    pub conflict_strategy: ConflictStrategy,
104    /// Minimum fused confidence to include a triple in the output.
105    pub min_confidence: f64,
106    /// Weight for accuracy in overall quality computation.
107    pub accuracy_weight: f64,
108    /// Weight for completeness.
109    pub completeness_weight: f64,
110    /// Weight for timeliness.
111    pub timeliness_weight: f64,
112    /// Threshold for entity alignment string similarity.
113    pub entity_alignment_threshold: f64,
114}
115
116impl Default for FusionConfig {
117    fn default() -> Self {
118        Self {
119            conflict_strategy: ConflictStrategy::Voting,
120            min_confidence: 0.3,
121            accuracy_weight: 0.5,
122            completeness_weight: 0.3,
123            timeliness_weight: 0.2,
124            entity_alignment_threshold: 0.8,
125        }
126    }
127}
128
129/// A fused triple with aggregated confidence and provenance.
130#[derive(Debug, Clone)]
131pub struct FusedTriple {
132    /// Subject.
133    pub subject: String,
134    /// Predicate.
135    pub predicate: String,
136    /// Object.
137    pub object: String,
138    /// Aggregated confidence.
139    pub confidence: f64,
140    /// Source IDs that contributed this triple.
141    pub sources: Vec<String>,
142    /// Number of sources that agree.
143    pub support_count: usize,
144}
145
146/// Statistics from a fusion operation.
147#[derive(Debug, Clone)]
148pub struct FusionStats {
149    /// Total input triples across all sources.
150    pub input_triple_count: usize,
151    /// Number of unique triple keys.
152    pub unique_triple_keys: usize,
153    /// Number of fused triples in the output.
154    pub output_triple_count: usize,
155    /// Number of conflicts resolved.
156    pub conflicts_resolved: usize,
157    /// Number of sources processed.
158    pub source_count: usize,
159    /// Mean confidence of output triples.
160    pub mean_confidence: f64,
161    /// Number of aligned entity pairs detected.
162    pub aligned_entity_pairs: usize,
163}
164
165/// Result of a fusion operation.
166#[derive(Debug, Clone)]
167pub struct FusionResult {
168    /// The fused knowledge graph (list of triples).
169    pub triples: Vec<FusedTriple>,
170    /// Statistics.
171    pub stats: FusionStats,
172    /// Provenance map: triple_key → list of source_ids.
173    pub provenance: HashMap<(String, String, String), Vec<String>>,
174}
175
176// ---------------------------------------------------------------------------
177// KnowledgeFusion
178// ---------------------------------------------------------------------------
179
180/// Multi-source knowledge fusion engine.
181pub struct KnowledgeFusion {
182    config: FusionConfig,
183    sources: HashMap<String, SourceQuality>,
184    total_fusions: u64,
185}
186
187impl KnowledgeFusion {
188    /// Create a new fusion engine with the given configuration.
189    pub fn new(config: FusionConfig) -> Self {
190        Self {
191            config,
192            sources: HashMap::new(),
193            total_fusions: 0,
194        }
195    }
196
197    /// Register a source with its quality assessment.
198    pub fn register_source(
199        &mut self,
200        source_id: impl Into<String>,
201        accuracy: f64,
202        completeness: f64,
203        timeliness: f64,
204    ) {
205        let source_id = source_id.into();
206        let overall = self.config.accuracy_weight * accuracy
207            + self.config.completeness_weight * completeness
208            + self.config.timeliness_weight * timeliness;
209        self.sources.insert(
210            source_id.clone(),
211            SourceQuality {
212                source_id,
213                accuracy: accuracy.clamp(0.0, 1.0),
214                completeness: completeness.clamp(0.0, 1.0),
215                timeliness: timeliness.clamp(0.0, 1.0),
216                overall: overall.clamp(0.0, 1.0),
217            },
218        );
219    }
220
221    /// Fuse a collection of provenanced triples from multiple sources.
222    pub fn fuse(&mut self, triples: &[ProvenancedTriple]) -> FusionResult {
223        // Group triples by canonical key.
224        let mut groups: HashMap<(String, String, String), Vec<&ProvenancedTriple>> = HashMap::new();
225        for t in triples {
226            groups.entry(t.triple_key()).or_default().push(t);
227        }
228
229        let unique_keys = groups.len();
230        let source_ids: HashSet<&str> = triples.iter().map(|t| t.source_id.as_str()).collect();
231        let source_count = source_ids.len();
232
233        let mut fused_triples: Vec<FusedTriple> = Vec::new();
234        let mut provenance_map: HashMap<(String, String, String), Vec<String>> = HashMap::new();
235        let mut conflicts_resolved = 0;
236
237        for (key, group) in &groups {
238            let sources_for_key: Vec<String> = group.iter().map(|t| t.source_id.clone()).collect();
239            provenance_map.insert(key.clone(), sources_for_key.clone());
240
241            // Check for conflicts (different objects for same subject-predicate).
242            // In this simple model, group members have the same (s, p, o), so
243            // conflict = multiple groups with the same (s, p) but different o.
244            // We resolve by confidence aggregation within each group.
245
246            let fused_confidence = self.resolve_confidence(group);
247
248            if fused_confidence >= self.config.min_confidence {
249                let support = group.len();
250                fused_triples.push(FusedTriple {
251                    subject: key.0.clone(),
252                    predicate: key.1.clone(),
253                    object: key.2.clone(),
254                    confidence: fused_confidence,
255                    sources: sources_for_key,
256                    support_count: support,
257                });
258            }
259        }
260
261        // Detect conflicts: same (subject, predicate) with different objects.
262        let mut sp_map: HashMap<(String, String), Vec<String>> = HashMap::new();
263        for ft in &fused_triples {
264            sp_map
265                .entry((ft.subject.clone(), ft.predicate.clone()))
266                .or_default()
267                .push(ft.object.clone());
268        }
269
270        // Resolve conflicts.
271        let resolved_triples = fused_triples.clone();
272        for ((_s, _p), objects) in &sp_map {
273            if objects.len() > 1 {
274                conflicts_resolved += 1;
275                // Apply conflict strategy to pick the winning triple.
276                // In a full implementation, we'd modify resolved_triples here.
277            }
278        }
279
280        let mean_confidence = if resolved_triples.is_empty() {
281            0.0
282        } else {
283            resolved_triples.iter().map(|t| t.confidence).sum::<f64>()
284                / resolved_triples.len() as f64
285        };
286
287        self.total_fusions += 1;
288
289        FusionResult {
290            triples: resolved_triples,
291            stats: FusionStats {
292                input_triple_count: triples.len(),
293                unique_triple_keys: unique_keys,
294                output_triple_count: fused_triples.len(),
295                conflicts_resolved,
296                source_count,
297                mean_confidence,
298                aligned_entity_pairs: 0,
299            },
300            provenance: provenance_map,
301        }
302    }
303
304    /// Incremental fusion: add new triples to an existing fused graph.
305    pub fn fuse_incremental(
306        &mut self,
307        existing: &[FusedTriple],
308        new_triples: &[ProvenancedTriple],
309    ) -> FusionResult {
310        // Convert existing fused triples back to provenanced.
311        let mut all: Vec<ProvenancedTriple> = Vec::new();
312        for ft in existing {
313            let source_id = ft
314                .sources
315                .first()
316                .cloned()
317                .unwrap_or_else(|| "unknown".to_string());
318            all.push(ProvenancedTriple::new(
319                &ft.subject,
320                &ft.predicate,
321                &ft.object,
322                source_id,
323                ft.confidence,
324            ));
325        }
326        all.extend(new_triples.iter().cloned());
327        self.fuse(&all)
328    }
329
330    /// Detect aligned entities across sources using name similarity.
331    pub fn align_entities(&self, triples: &[ProvenancedTriple]) -> Vec<(String, String)> {
332        // Collect unique subjects per source.
333        let mut entities_by_source: HashMap<&str, HashSet<&str>> = HashMap::new();
334        for t in triples {
335            entities_by_source
336                .entry(&t.source_id)
337                .or_default()
338                .insert(&t.subject);
339        }
340
341        let source_ids: Vec<&&str> = entities_by_source.keys().collect();
342        let mut alignments: Vec<(String, String)> = Vec::new();
343
344        for i in 0..source_ids.len() {
345            for j in (i + 1)..source_ids.len() {
346                let entities_a = &entities_by_source[source_ids[i]];
347                let entities_b = &entities_by_source[source_ids[j]];
348                for &ea in entities_a {
349                    for &eb in entities_b {
350                        let sim = normalized_levenshtein(ea, eb);
351                        if sim >= self.config.entity_alignment_threshold && ea != eb {
352                            alignments.push((ea.to_string(), eb.to_string()));
353                        }
354                    }
355                }
356            }
357        }
358        alignments
359    }
360
361    /// Compute source quality score from registered sources.
362    pub fn source_quality(&self, source_id: &str) -> Option<&SourceQuality> {
363        self.sources.get(source_id)
364    }
365
366    /// Total number of fusion operations performed.
367    pub fn total_fusions(&self) -> u64 {
368        self.total_fusions
369    }
370
371    /// Number of registered sources.
372    pub fn registered_source_count(&self) -> usize {
373        self.sources.len()
374    }
375
376    // --- private helpers ---
377
378    /// Resolve confidence for a group of triples sharing the same key.
379    fn resolve_confidence(&self, group: &[&ProvenancedTriple]) -> f64 {
380        match self.config.conflict_strategy {
381            ConflictStrategy::Voting => {
382                // Confidence proportional to number of agreeing sources.
383                let max_possible = self.sources.len().max(group.len()) as f64;
384                if max_possible == 0.0 {
385                    group.iter().map(|t| t.confidence).sum::<f64>() / group.len().max(1) as f64
386                } else {
387                    group.len() as f64 / max_possible
388                }
389            }
390            ConflictStrategy::Recency => {
391                // Most recent timestamp.
392                group
393                    .iter()
394                    .max_by_key(|t| t.timestamp)
395                    .map(|t| t.confidence)
396                    .unwrap_or(0.0)
397            }
398            ConflictStrategy::Authority => {
399                // Highest source quality.
400                group
401                    .iter()
402                    .filter_map(|t| {
403                        self.sources
404                            .get(&t.source_id)
405                            .map(|sq| sq.overall * t.confidence)
406                    })
407                    .fold(0.0_f64, f64::max)
408                    .max(
409                        // Fallback if source not registered.
410                        group.iter().map(|t| t.confidence).fold(0.0_f64, f64::max),
411                    )
412            }
413            ConflictStrategy::AverageConfidence => {
414                let sum: f64 = group.iter().map(|t| t.confidence).sum();
415                sum / group.len().max(1) as f64
416            }
417        }
418    }
419}
420
421// ---------------------------------------------------------------------------
422// Free functions
423// ---------------------------------------------------------------------------
424
425/// Normalised Levenshtein similarity in [0, 1].
426fn normalized_levenshtein(a: &str, b: &str) -> f64 {
427    if a == b {
428        return 1.0;
429    }
430    let max_len = a.len().max(b.len());
431    if max_len == 0 {
432        return 1.0;
433    }
434    let dist = levenshtein_distance(a, b);
435    1.0 - (dist as f64 / max_len as f64)
436}
437
438/// Standard Levenshtein edit distance.
439fn levenshtein_distance(a: &str, b: &str) -> usize {
440    let a_chars: Vec<char> = a.chars().collect();
441    let b_chars: Vec<char> = b.chars().collect();
442    let m = a_chars.len();
443    let n = b_chars.len();
444
445    let mut prev = (0..=n).collect::<Vec<usize>>();
446    let mut curr = vec![0; n + 1];
447
448    for i in 1..=m {
449        curr[0] = i;
450        for j in 1..=n {
451            let cost = if a_chars[i - 1] == b_chars[j - 1] {
452                0
453            } else {
454                1
455            };
456            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
457        }
458        std::mem::swap(&mut prev, &mut curr);
459    }
460    prev[n]
461}
462
463// ---------------------------------------------------------------------------
464// Tests
465// ---------------------------------------------------------------------------
466
467#[cfg(test)]
468mod tests {
469    use super::*;
470
471    fn default_fusion() -> KnowledgeFusion {
472        KnowledgeFusion::new(FusionConfig::default())
473    }
474
475    fn sample_triples() -> Vec<ProvenancedTriple> {
476        vec![
477            ProvenancedTriple::new("Alice", "knows", "Bob", "src1", 0.9),
478            ProvenancedTriple::new("Alice", "knows", "Bob", "src2", 0.8),
479            ProvenancedTriple::new("Bob", "likes", "Music", "src1", 0.7),
480        ]
481    }
482
483    // --- ProvenancedTriple ---
484
485    #[test]
486    fn test_provenanced_triple_creation() {
487        let t = ProvenancedTriple::new("A", "B", "C", "src", 0.5);
488        assert_eq!(t.subject, "A");
489        assert_eq!(t.predicate, "B");
490        assert_eq!(t.object, "C");
491        assert_eq!(t.source_id, "src");
492        assert!((t.confidence - 0.5).abs() < 1e-10);
493    }
494
495    #[test]
496    fn test_confidence_clamped() {
497        let t = ProvenancedTriple::new("A", "B", "C", "src", 1.5);
498        assert!((t.confidence - 1.0).abs() < 1e-10);
499    }
500
501    #[test]
502    fn test_triple_key() {
503        let t = ProvenancedTriple::new("A", "B", "C", "src", 0.5);
504        assert_eq!(
505            t.triple_key(),
506            ("A".to_string(), "B".to_string(), "C".to_string())
507        );
508    }
509
510    #[test]
511    fn test_with_timestamp() {
512        let t = ProvenancedTriple::new("A", "B", "C", "src", 0.5).with_timestamp(1000);
513        assert_eq!(t.timestamp, 1000);
514    }
515
516    // --- register_source ---
517
518    #[test]
519    fn test_register_source() {
520        let mut f = default_fusion();
521        f.register_source("src1", 0.9, 0.8, 0.7);
522        assert_eq!(f.registered_source_count(), 1);
523    }
524
525    #[test]
526    fn test_source_quality_retrieval() {
527        let mut f = default_fusion();
528        f.register_source("src1", 0.9, 0.8, 0.7);
529        let q = f.source_quality("src1").expect("should exist");
530        assert!((q.accuracy - 0.9).abs() < 1e-10);
531        assert!((q.completeness - 0.8).abs() < 1e-10);
532    }
533
534    #[test]
535    fn test_source_quality_overall() {
536        let mut f = default_fusion();
537        f.register_source("src1", 1.0, 1.0, 1.0);
538        let q = f.source_quality("src1").expect("should exist");
539        // overall = 0.5*1 + 0.3*1 + 0.2*1 = 1.0
540        assert!((q.overall - 1.0).abs() < 1e-10);
541    }
542
543    #[test]
544    fn test_unknown_source_returns_none() {
545        let f = default_fusion();
546        assert!(f.source_quality("nonexistent").is_none());
547    }
548
549    // --- basic fusion ---
550
551    #[test]
552    fn test_fuse_deduplicates() {
553        let mut f = default_fusion();
554        let triples = sample_triples();
555        let result = f.fuse(&triples);
556        // "Alice knows Bob" appears twice → fused into one
557        assert_eq!(result.stats.unique_triple_keys, 2);
558    }
559
560    #[test]
561    fn test_fuse_input_count() {
562        let mut f = default_fusion();
563        let result = f.fuse(&sample_triples());
564        assert_eq!(result.stats.input_triple_count, 3);
565    }
566
567    #[test]
568    fn test_fuse_source_count() {
569        let mut f = default_fusion();
570        let result = f.fuse(&sample_triples());
571        assert_eq!(result.stats.source_count, 2); // src1 and src2
572    }
573
574    #[test]
575    fn test_fused_triple_has_support_count() {
576        let mut f = default_fusion();
577        let result = f.fuse(&sample_triples());
578        // Find "Alice knows Bob"
579        let alice_bob = result
580            .triples
581            .iter()
582            .find(|t| t.subject == "Alice" && t.object == "Bob");
583        assert!(alice_bob.is_some());
584        assert_eq!(alice_bob.map(|t| t.support_count).unwrap_or(0), 2);
585    }
586
587    #[test]
588    fn test_fused_triple_has_sources() {
589        let mut f = default_fusion();
590        let result = f.fuse(&sample_triples());
591        let alice_bob = result
592            .triples
593            .iter()
594            .find(|t| t.subject == "Alice" && t.object == "Bob")
595            .expect("should find fused triple");
596        assert!(alice_bob.sources.contains(&"src1".to_string()));
597        assert!(alice_bob.sources.contains(&"src2".to_string()));
598    }
599
600    // --- provenance tracking ---
601
602    #[test]
603    fn test_provenance_map_populated() {
604        let mut f = default_fusion();
605        let result = f.fuse(&sample_triples());
606        let key = ("Alice".to_string(), "knows".to_string(), "Bob".to_string());
607        let sources = result.provenance.get(&key).expect("should have provenance");
608        assert_eq!(sources.len(), 2);
609    }
610
611    // --- conflict strategies ---
612
613    #[test]
614    fn test_voting_strategy() {
615        let mut f = KnowledgeFusion::new(FusionConfig {
616            conflict_strategy: ConflictStrategy::Voting,
617            min_confidence: 0.0,
618            ..FusionConfig::default()
619        });
620        let result = f.fuse(&sample_triples());
621        assert!(!result.triples.is_empty());
622    }
623
624    #[test]
625    fn test_recency_strategy() {
626        let mut f = KnowledgeFusion::new(FusionConfig {
627            conflict_strategy: ConflictStrategy::Recency,
628            min_confidence: 0.0,
629            ..FusionConfig::default()
630        });
631        let triples = vec![
632            ProvenancedTriple::new("A", "p", "B", "s1", 0.5).with_timestamp(100),
633            ProvenancedTriple::new("A", "p", "B", "s2", 0.9).with_timestamp(200),
634        ];
635        let result = f.fuse(&triples);
636        // Should pick the one with timestamp 200 (confidence 0.9).
637        let fused = &result.triples[0];
638        assert!((fused.confidence - 0.9).abs() < 1e-10);
639    }
640
641    #[test]
642    fn test_authority_strategy() {
643        let mut f = KnowledgeFusion::new(FusionConfig {
644            conflict_strategy: ConflictStrategy::Authority,
645            min_confidence: 0.0,
646            ..FusionConfig::default()
647        });
648        f.register_source("high_quality", 1.0, 1.0, 1.0);
649        f.register_source("low_quality", 0.1, 0.1, 0.1);
650        let triples = vec![
651            ProvenancedTriple::new("A", "p", "B", "high_quality", 0.8),
652            ProvenancedTriple::new("A", "p", "B", "low_quality", 0.8),
653        ];
654        let result = f.fuse(&triples);
655        assert!(!result.triples.is_empty());
656        // Authority strategy should produce a confidence > 0 at least.
657        assert!(result.triples[0].confidence > 0.0);
658    }
659
660    #[test]
661    fn test_average_confidence_strategy() {
662        let mut f = KnowledgeFusion::new(FusionConfig {
663            conflict_strategy: ConflictStrategy::AverageConfidence,
664            min_confidence: 0.0,
665            ..FusionConfig::default()
666        });
667        let triples = vec![
668            ProvenancedTriple::new("A", "p", "B", "s1", 0.6),
669            ProvenancedTriple::new("A", "p", "B", "s2", 0.8),
670        ];
671        let result = f.fuse(&triples);
672        assert!((result.triples[0].confidence - 0.7).abs() < 1e-10);
673    }
674
675    // --- min_confidence filtering ---
676
677    #[test]
678    fn test_min_confidence_filters_low() {
679        let mut f = KnowledgeFusion::new(FusionConfig {
680            conflict_strategy: ConflictStrategy::AverageConfidence,
681            min_confidence: 0.8,
682            ..FusionConfig::default()
683        });
684        let triples = vec![ProvenancedTriple::new("A", "p", "B", "s1", 0.3)];
685        let result = f.fuse(&triples);
686        assert!(
687            result.triples.is_empty(),
688            "low confidence should be filtered"
689        );
690    }
691
692    // --- incremental fusion ---
693
694    #[test]
695    fn test_incremental_fusion() {
696        let mut f = KnowledgeFusion::new(FusionConfig {
697            conflict_strategy: ConflictStrategy::AverageConfidence,
698            min_confidence: 0.0,
699            ..FusionConfig::default()
700        });
701
702        let existing = vec![FusedTriple {
703            subject: "A".into(),
704            predicate: "p".into(),
705            object: "B".into(),
706            confidence: 0.8,
707            sources: vec!["s1".into()],
708            support_count: 1,
709        }];
710
711        let new_triples = vec![
712            ProvenancedTriple::new("A", "p", "B", "s2", 0.9),
713            ProvenancedTriple::new("C", "q", "D", "s2", 0.7),
714        ];
715
716        let result = f.fuse_incremental(&existing, &new_triples);
717        assert!(result.triples.len() >= 2, "should have at least 2 triples");
718    }
719
720    #[test]
721    fn test_incremental_increases_support() {
722        let mut f = KnowledgeFusion::new(FusionConfig {
723            conflict_strategy: ConflictStrategy::AverageConfidence,
724            min_confidence: 0.0,
725            ..FusionConfig::default()
726        });
727
728        let existing = vec![FusedTriple {
729            subject: "A".into(),
730            predicate: "p".into(),
731            object: "B".into(),
732            confidence: 0.8,
733            sources: vec!["s1".into()],
734            support_count: 1,
735        }];
736
737        let new = vec![ProvenancedTriple::new("A", "p", "B", "s2", 0.9)];
738        let result = f.fuse_incremental(&existing, &new);
739
740        let ab = result
741            .triples
742            .iter()
743            .find(|t| t.subject == "A" && t.object == "B")
744            .expect("should exist");
745        assert_eq!(ab.support_count, 2);
746    }
747
748    // --- entity alignment ---
749
750    #[test]
751    fn test_align_entities_similar_names() {
752        let f = KnowledgeFusion::new(FusionConfig {
753            entity_alignment_threshold: 0.8,
754            ..FusionConfig::default()
755        });
756        let triples = vec![
757            ProvenancedTriple::new("Alice_Smith", "knows", "Bob", "s1", 0.9),
758            ProvenancedTriple::new("Alice_Smit", "knows", "Carol", "s2", 0.8),
759        ];
760        let alignments = f.align_entities(&triples);
761        // "Alice_Smith" vs "Alice_Smit" → high similarity
762        assert!(!alignments.is_empty(), "should detect similar entity names");
763    }
764
765    #[test]
766    fn test_align_entities_exact_same_not_aligned() {
767        let f = default_fusion();
768        let triples = vec![
769            ProvenancedTriple::new("Alice", "knows", "Bob", "s1", 0.9),
770            ProvenancedTriple::new("Alice", "likes", "Carol", "s2", 0.8),
771        ];
772        let alignments = f.align_entities(&triples);
773        // Same name "Alice" → should NOT be in alignments (they're the same entity)
774        assert!(
775            alignments.is_empty(),
776            "exact same names should not produce alignment"
777        );
778    }
779
780    #[test]
781    fn test_align_entities_completely_different() {
782        let f = default_fusion();
783        let triples = vec![
784            ProvenancedTriple::new("Alice", "knows", "Bob", "s1", 0.9),
785            ProvenancedTriple::new("Xyz123", "likes", "Carol", "s2", 0.8),
786        ];
787        let alignments = f.align_entities(&triples);
788        assert!(alignments.is_empty());
789    }
790
791    // --- total fusions ---
792
793    #[test]
794    fn test_total_fusions_initially_zero() {
795        let f = default_fusion();
796        assert_eq!(f.total_fusions(), 0);
797    }
798
799    #[test]
800    fn test_total_fusions_increments() {
801        let mut f = default_fusion();
802        f.fuse(&sample_triples());
803        f.fuse(&sample_triples());
804        assert_eq!(f.total_fusions(), 2);
805    }
806
807    // --- empty inputs ---
808
809    #[test]
810    fn test_fuse_empty() {
811        let mut f = default_fusion();
812        let result = f.fuse(&[]);
813        assert!(result.triples.is_empty());
814        assert_eq!(result.stats.input_triple_count, 0);
815    }
816
817    // --- FusionConfig default ---
818
819    #[test]
820    fn test_config_default_values() {
821        let config = FusionConfig::default();
822        assert_eq!(config.conflict_strategy, ConflictStrategy::Voting);
823        assert!((config.min_confidence - 0.3).abs() < 1e-10);
824        assert!((config.entity_alignment_threshold - 0.8).abs() < 1e-10);
825    }
826
827    // --- FusionStats ---
828
829    #[test]
830    fn test_fusion_stats_mean_confidence() {
831        let mut f = KnowledgeFusion::new(FusionConfig {
832            conflict_strategy: ConflictStrategy::AverageConfidence,
833            min_confidence: 0.0,
834            ..FusionConfig::default()
835        });
836        let triples = vec![
837            ProvenancedTriple::new("A", "p", "B", "s1", 0.6),
838            ProvenancedTriple::new("C", "q", "D", "s1", 0.8),
839        ];
840        let result = f.fuse(&triples);
841        assert!(result.stats.mean_confidence > 0.0);
842    }
843
844    // --- single source ---
845
846    #[test]
847    fn test_single_source_fusion() {
848        let mut f = KnowledgeFusion::new(FusionConfig {
849            conflict_strategy: ConflictStrategy::AverageConfidence,
850            min_confidence: 0.0,
851            ..FusionConfig::default()
852        });
853        let triples = vec![ProvenancedTriple::new("A", "p", "B", "s1", 0.9)];
854        let result = f.fuse(&triples);
855        assert_eq!(result.triples.len(), 1);
856        assert_eq!(result.stats.source_count, 1);
857    }
858}
oxirs_graphrag/knowledge_fusion.rs

oxirs_graphrag/
knowledge_fusion.rs