use fxhash::{FxHashMap, FxHashSet};
use crate::core::triple_store::TripleStore;
#[derive(Clone, Debug)]
pub struct KnowledgeGap {
pub entity: String,
pub missing_relation: String,
pub peers_with_relation: Vec<String>,
pub peer_coverage: f64,
}
pub struct GapDetector;
impl GapDetector {
pub fn detect(
triple_store: &TripleStore,
min_shared_relations: usize,
min_peer_coverage: f64,
) -> Vec<KnowledgeGap> {
let snapshot = triple_store.snapshot();
let mut entity_rels: FxHashMap<String, FxHashSet<String>> = FxHashMap::default();
for t in &snapshot {
entity_rels
.entry(t.subject_id.clone())
.or_default()
.insert(t.relation_id.clone());
}
let mut rel_subjects: FxHashMap<String, FxHashSet<String>> = FxHashMap::default();
for t in &snapshot {
rel_subjects
.entry(t.relation_id.clone())
.or_default()
.insert(t.subject_id.clone());
}
let entities: Vec<(String, FxHashSet<String>)> = entity_rels.into_iter().collect();
let mut gaps = Vec::new();
for (entity, rels) in &entities {
let mut peer_candidates: FxHashMap<String, usize> = FxHashMap::default();
for rel in rels {
if let Some(subjects) = rel_subjects.get(rel) {
for peer in subjects {
if peer != entity {
*peer_candidates.entry(peer.clone()).or_insert(0) += 1;
}
}
}
}
let peers: Vec<String> = peer_candidates
.into_iter()
.filter(|&(_, count)| count >= min_shared_relations)
.map(|(id, _)| id)
.collect();
if peers.is_empty() {
continue;
}
let mut peer_rel_counts: FxHashMap<String, usize> = FxHashMap::default();
for peer in &peers {
if let Some((_, peer_rels)) = entities.iter().find(|(id, _)| id == peer) {
for rel in peer_rels {
*peer_rel_counts.entry(rel.clone()).or_insert(0) += 1;
}
}
}
let peer_count = peers.len();
for (rel, count) in &peer_rel_counts {
if rels.contains(rel) {
continue;
}
let coverage = *count as f64 / peer_count as f64;
if coverage >= min_peer_coverage {
let peers_with: Vec<String> = peers
.iter()
.filter(|p| {
entities
.iter()
.find(|(id, _)| id == *p)
.is_some_and(|(_, pr)| pr.contains(rel))
})
.cloned()
.collect();
gaps.push(KnowledgeGap {
entity: entity.clone(),
missing_relation: rel.clone(),
peers_with_relation: peers_with,
peer_coverage: coverage,
});
}
}
}
gaps.sort_by(|a, b| {
b.peer_coverage
.partial_cmp(&a.peer_coverage)
.unwrap_or(std::cmp::Ordering::Equal)
});
gaps
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_gaps() {
let store = TripleStore::new();
store.add("paris", "capital_of", "france", "c1");
store.add("paris", "located_in", "europe", "c2");
store.add("berlin", "capital_of", "germany", "c3");
store.add("berlin", "located_in", "europe", "c4");
store.add("tokyo", "capital_of", "japan", "c5");
let gaps = GapDetector::detect(&store, 1, 0.5);
let tokyo_gap = gaps
.iter()
.find(|g| g.entity == "tokyo" && g.missing_relation == "located_in");
assert!(tokyo_gap.is_some(), "Tokyo should be missing located_in");
let gap = tokyo_gap.unwrap();
assert!(gap.peer_coverage >= 0.5);
assert!(!gap.peers_with_relation.is_empty());
}
#[test]
fn test_no_gaps_when_complete() {
let store = TripleStore::new();
store.add("a", "r1", "x", "c1");
store.add("b", "r1", "y", "c2");
let gaps = GapDetector::detect(&store, 1, 0.5);
assert!(gaps.is_empty());
}
#[test]
fn test_no_gaps_empty_store() {
let store = TripleStore::new();
let gaps = GapDetector::detect(&store, 1, 0.5);
assert!(gaps.is_empty());
}
#[test]
fn test_high_coverage_threshold() {
let store = TripleStore::new();
store.add("a", "r1", "x", "c1");
store.add("a", "r2", "x", "c2");
store.add("b", "r1", "y", "c3");
let gaps = GapDetector::detect(&store, 1, 1.0);
let b_gap = gaps
.iter()
.find(|g| g.entity == "b" && g.missing_relation == "r2");
assert!(b_gap.is_some());
}
}