use crate::memory_core::store::kg::KnowledgeGraph;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct KnowledgeGap {
pub entities: Vec<String>,
pub internal_density: f32,
pub external_bridges: usize,
pub suggested_exploration: String,
}
pub fn find_communities(kg: &KnowledgeGraph) -> Vec<KnowledgeGap> {
let (nodes, edges) = match kg.snapshot_undirected() {
Ok(snapshot) => snapshot,
Err(_) => return Vec::new(),
};
if nodes.is_empty() {
return Vec::new();
}
let communities = louvain_partition(nodes.len(), &edges);
let mut node_to_community: Vec<usize> = vec![0; nodes.len()];
for (cid, members) in communities.iter().enumerate() {
for &m in members {
node_to_community[m] = cid;
}
}
let mut internal_edges: Vec<usize> = vec![0; communities.len()];
let mut bridge_edges: Vec<usize> = vec![0; communities.len()];
for &(u, v) in &edges {
let cu = node_to_community[u];
let cv = node_to_community[v];
if cu == cv {
internal_edges[cu] += 1;
} else {
bridge_edges[cu] += 1;
bridge_edges[cv] += 1;
}
}
let mut degree: Vec<usize> = vec![0; nodes.len()];
for &(u, v) in &edges {
degree[u] += 1;
degree[v] += 1;
}
let mut gaps: Vec<KnowledgeGap> = Vec::new();
for (cid, members) in communities.iter().enumerate() {
let n = members.len();
let possible = if n >= 2 { n * (n - 1) / 2 } else { 0 };
let density = if possible == 0 {
0.0
} else {
internal_edges[cid] as f32 / possible as f32
};
let bridges = bridge_edges[cid];
if density < 0.2 && bridges <= n {
let entities: Vec<String> = members.iter().map(|&i| nodes[i].clone()).collect();
let rep_idx = members
.iter()
.copied()
.max_by_key(|&i| degree[i])
.unwrap_or(members[0]);
let representative = nodes[rep_idx].clone();
gaps.push(KnowledgeGap {
entities,
internal_density: density,
external_bridges: bridges,
suggested_exploration: format!(
"Explore connections between {representative} and related concepts"
),
});
}
}
gaps
}
pub fn partition(kg: &KnowledgeGraph) -> Vec<Vec<String>> {
let (nodes, edges) = match kg.snapshot_undirected() {
Ok(snapshot) => snapshot,
Err(_) => return Vec::new(),
};
if nodes.is_empty() {
return Vec::new();
}
let communities = louvain_partition(nodes.len(), &edges);
communities
.into_iter()
.map(|members| members.into_iter().map(|i| nodes[i].clone()).collect())
.collect()
}
fn louvain_partition(num_nodes: usize, edges: &[(usize, usize)]) -> Vec<Vec<usize>> {
if num_nodes == 0 {
return Vec::new();
}
let mut g = WeightedGraph::from_unit_edges(num_nodes, edges);
let mut community_of_original: Vec<usize> = (0..num_nodes).collect();
loop {
let local_partition = phase1_local_move(&g);
let new_community_count = max_or_zero(&local_partition) + 1;
for c in community_of_original.iter_mut() {
*c = local_partition[*c];
}
if new_community_count == g.num_nodes() {
break;
}
g = g.collapse(&local_partition, new_community_count);
if new_community_count <= 1 {
break;
}
}
let mut label_remap: HashMap<usize, usize> = HashMap::new();
let mut out: Vec<Vec<usize>> = Vec::new();
for (node, &raw_label) in community_of_original.iter().enumerate() {
let cid = *label_remap.entry(raw_label).or_insert_with(|| {
out.push(Vec::new());
out.len() - 1
});
out[cid].push(node);
}
out
}
struct WeightedGraph {
n: usize,
two_m: f64,
degree: Vec<f64>,
adjacency: Vec<HashMap<usize, f64>>,
}
impl WeightedGraph {
fn from_unit_edges(n: usize, edges: &[(usize, usize)]) -> Self {
let mut adjacency: Vec<HashMap<usize, f64>> = vec![HashMap::new(); n];
for &(u, v) in edges {
*adjacency[u].entry(v).or_insert(0.0) += 1.0;
if u != v {
*adjacency[v].entry(u).or_insert(0.0) += 1.0;
}
}
let mut degree = vec![0.0; n];
let mut two_m = 0.0;
for (u, neighbours) in adjacency.iter().enumerate() {
for (&v, &w) in neighbours {
degree[u] += w;
if u == v {
degree[u] += w;
two_m += 2.0 * w;
} else {
two_m += w;
}
}
}
let mut two_m_clean = 0.0;
for d in °ree {
two_m_clean += *d;
}
WeightedGraph {
n,
two_m: two_m_clean.max(f64::EPSILON),
degree,
adjacency,
}
}
fn num_nodes(&self) -> usize {
self.n
}
fn collapse(&self, community_of: &[usize], num_communities: usize) -> WeightedGraph {
let mut adjacency: Vec<HashMap<usize, f64>> = vec![HashMap::new(); num_communities];
for u in 0..self.n {
let cu = community_of[u];
for (&v, &w) in &self.adjacency[u] {
let cv = community_of[v];
if u > v {
continue;
}
*adjacency[cu].entry(cv).or_insert(0.0) += w;
if cu != cv {
*adjacency[cv].entry(cu).or_insert(0.0) += w;
}
}
}
let mut degree = vec![0.0; num_communities];
for (c, neighbours) in adjacency.iter().enumerate() {
for (&d, &w) in neighbours {
degree[c] += w;
if c == d {
degree[c] += w;
}
}
}
let mut two_m_clean = 0.0;
for d in °ree {
two_m_clean += *d;
}
WeightedGraph {
n: num_communities,
two_m: two_m_clean.max(f64::EPSILON),
degree,
adjacency,
}
}
}
fn phase1_local_move(g: &WeightedGraph) -> Vec<usize> {
let n = g.num_nodes();
if n == 0 {
return Vec::new();
}
let mut community: Vec<usize> = (0..n).collect();
let mut sigma_tot: Vec<f64> = g.degree.clone();
let two_m = g.two_m;
if two_m <= f64::EPSILON {
return community;
}
let max_passes = 32usize;
for _ in 0..max_passes {
let mut moved = false;
for u in 0..n {
let cu = community[u];
let k_u = g.degree[u];
let mut k_in_by_c: HashMap<usize, f64> = HashMap::new();
for (&v, &w) in &g.adjacency[u] {
if v == u {
continue;
}
*k_in_by_c.entry(community[v]).or_insert(0.0) += w;
}
let self_loop = g.adjacency[u].get(&u).copied().unwrap_or(0.0);
sigma_tot[cu] -= k_u;
let k_in_cu = k_in_by_c.get(&cu).copied().unwrap_or(0.0);
let mut best_c = cu;
let mut best_gain = 0.0_f64;
for (&c, &k_in_c) in &k_in_by_c {
let gain = k_in_c / two_m - sigma_tot[c] * k_u / (two_m * two_m);
if gain > best_gain + 1e-12 {
best_gain = gain;
best_c = c;
}
}
let stay_gain = k_in_cu / two_m - sigma_tot[cu] * k_u / (two_m * two_m);
if best_gain <= stay_gain + 1e-12 {
best_c = cu;
}
community[u] = best_c;
sigma_tot[best_c] += k_u;
if best_c != cu {
moved = true;
}
let _ = self_loop;
}
if !moved {
break;
}
}
let mut remap: HashMap<usize, usize> = HashMap::new();
let mut next_id = 0usize;
for c in community.iter_mut() {
*c = *remap.entry(*c).or_insert_with(|| {
let id = next_id;
next_id += 1;
id
});
}
community
}
fn max_or_zero(xs: &[usize]) -> usize {
xs.iter().copied().max().unwrap_or(0)
}