Skip to main content

codelens_engine/
community.rs

1//! Louvain-style community detection on the import graph.
2//! Produces a high-level architecture overview by grouping tightly-coupled files.
3
4use crate::import_graph::FileNode;
5use anyhow::Result;
6use petgraph::graph::{NodeIndex, UnGraph};
7use serde::Serialize;
8use std::collections::HashMap;
9
10#[derive(Debug, Clone, Serialize)]
11pub struct Community {
12    pub id: usize,
13    pub files: Vec<String>,
14    pub size: usize,
15    /// Internal edge count / total possible edges (density)
16    pub density: f64,
17    /// Descriptive label derived from common path prefix
18    pub label: String,
19}
20
21#[derive(Debug, Clone, Serialize)]
22pub struct ArchitectureOverview {
23    pub communities: Vec<Community>,
24    pub total_files: usize,
25    pub total_edges: usize,
26    pub modularity: f64,
27}
28
29/// Build an architecture overview from the import graph using Louvain community detection.
30pub fn detect_communities(
31    graph: &HashMap<String, FileNode>,
32    min_community_size: usize,
33) -> Result<ArchitectureOverview> {
34    if graph.is_empty() {
35        return Ok(ArchitectureOverview {
36            communities: Vec::new(),
37            total_files: 0,
38            total_edges: 0,
39            modularity: 0.0,
40        });
41    }
42
43    // Build petgraph undirected graph
44    let mut pg = UnGraph::<String, ()>::new_undirected();
45    let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
46
47    for file in graph.keys() {
48        let idx = pg.add_node(file.clone());
49        node_map.insert(file.clone(), idx);
50    }
51
52    let mut edge_count = 0;
53    for (file, node) in graph {
54        if let Some(&src) = node_map.get(file) {
55            for imported in &node.imports {
56                if let Some(&dst) = node_map.get(imported)
57                    && src != dst
58                    && !pg.contains_edge(src, dst)
59                {
60                    pg.add_edge(src, dst, ());
61                    edge_count += 1;
62                }
63            }
64        }
65    }
66
67    // Louvain Phase 1: greedy modularity optimization
68    let n = pg.node_count();
69    let m = edge_count.max(1) as f64;
70
71    // Initialize: each node in its own community
72    let mut community: Vec<usize> = (0..n).collect();
73    let node_indices: Vec<NodeIndex> = pg.node_indices().collect();
74
75    // Degree of each node
76    let degree: Vec<f64> = node_indices
77        .iter()
78        .map(|&ni| pg.edges(ni).count() as f64)
79        .collect();
80
81    // Pre-compute community degree sums for O(1) lookup (instead of O(n) per query)
82    let mut comm_degree_sum: HashMap<usize, f64> = HashMap::new();
83    for i in 0..n {
84        *comm_degree_sum.entry(community[i]).or_default() += degree[i];
85    }
86
87    // Iterative improvement (simplified Louvain — single level)
88    let mut improved = true;
89    let mut iterations = 0;
90    while improved && iterations < 20 {
91        improved = false;
92        iterations += 1;
93
94        for i in 0..n {
95            let ni = node_indices[i];
96            let current_comm = community[i];
97
98            // Count edges to each neighboring community
99            let mut comm_edges: HashMap<usize, f64> = HashMap::new();
100            for neighbor in pg.neighbors(ni) {
101                let j = neighbor.index();
102                let c = community[j];
103                *comm_edges.entry(c).or_default() += 1.0;
104            }
105
106            // Find the community that gives the best modularity gain
107            let ki = degree[i];
108            let mut best_comm = current_comm;
109            let mut best_gain = 0.0_f64;
110
111            for (&c, &edges_to_c) in &comm_edges {
112                if c == current_comm {
113                    continue;
114                }
115                let sigma_c = comm_degree_sum.get(&c).copied().unwrap_or(0.0);
116                let gain = edges_to_c / m - (sigma_c * ki) / (2.0 * m * m);
117                if gain > best_gain {
118                    best_gain = gain;
119                    best_comm = c;
120                }
121            }
122
123            if best_comm != current_comm {
124                // Update degree sum cache: remove from old, add to new
125                *comm_degree_sum.entry(current_comm).or_default() -= ki;
126                *comm_degree_sum.entry(best_comm).or_default() += ki;
127                community[i] = best_comm;
128                improved = true;
129            }
130        }
131    }
132
133    // Collect communities
134    let mut comm_files: HashMap<usize, Vec<String>> = HashMap::new();
135    for (i, &c) in community.iter().enumerate() {
136        let file_name = pg[node_indices[i]].clone();
137        comm_files.entry(c).or_default().push(file_name);
138    }
139
140    // Calculate modularity Q
141    let modularity = calculate_modularity(&community, &node_indices, &pg, m);
142
143    // Build result
144    let mut communities: Vec<Community> = comm_files
145        .into_iter()
146        .filter(|(_, files)| files.len() >= min_community_size)
147        .map(|(id, mut files)| {
148            files.sort();
149            let size = files.len();
150            let label = common_path_prefix(&files);
151            let density = community_density(id, &community, &node_indices, &pg);
152            Community {
153                id,
154                files,
155                size,
156                density,
157                label,
158            }
159        })
160        .collect();
161
162    communities.sort_by(|a, b| b.size.cmp(&a.size));
163
164    // Re-number community IDs sequentially
165    for (i, comm) in communities.iter_mut().enumerate() {
166        comm.id = i;
167    }
168
169    Ok(ArchitectureOverview {
170        total_files: n,
171        total_edges: edge_count,
172        modularity,
173        communities,
174    })
175}
176
177fn calculate_modularity(
178    community: &[usize],
179    node_indices: &[NodeIndex],
180    pg: &UnGraph<String, ()>,
181    m: f64,
182) -> f64 {
183    let mut q = 0.0;
184    let n = community.len();
185    for i in 0..n {
186        for j in (i + 1)..n {
187            if community[i] != community[j] {
188                continue;
189            }
190            let ki = pg.edges(node_indices[i]).count() as f64;
191            let kj = pg.edges(node_indices[j]).count() as f64;
192            let aij = if pg.contains_edge(node_indices[i], node_indices[j]) {
193                1.0
194            } else {
195                0.0
196            };
197            q += aij - (ki * kj) / (2.0 * m);
198        }
199    }
200    q / (2.0 * m).max(1.0)
201}
202
203fn community_density(
204    comm_id: usize,
205    community: &[usize],
206    node_indices: &[NodeIndex],
207    pg: &UnGraph<String, ()>,
208) -> f64 {
209    let members: Vec<usize> = community
210        .iter()
211        .enumerate()
212        .filter(|(_, c)| **c == comm_id)
213        .map(|(i, _)| i)
214        .collect();
215    let n = members.len();
216    if n <= 1 {
217        return 1.0;
218    }
219    let mut internal_edges = 0;
220    for (a_idx, &i) in members.iter().enumerate() {
221        for &j in &members[a_idx + 1..] {
222            if pg.contains_edge(node_indices[i], node_indices[j]) {
223                internal_edges += 1;
224            }
225        }
226    }
227    let possible = n * (n - 1) / 2;
228    internal_edges as f64 / possible.max(1) as f64
229}
230
231fn common_path_prefix(files: &[String]) -> String {
232    if files.is_empty() {
233        return String::new();
234    }
235    if files.len() == 1 {
236        return files[0].rsplit('/').nth(1).unwrap_or(&files[0]).to_owned();
237    }
238
239    let parts: Vec<Vec<&str>> = files.iter().map(|f| f.split('/').collect()).collect();
240    let mut prefix = Vec::new();
241    let min_len = parts.iter().map(|p| p.len()).min().unwrap_or(0);
242
243    for i in 0..min_len {
244        let segment = parts[0][i];
245        if parts.iter().all(|p| p[i] == segment) {
246            prefix.push(segment);
247        } else {
248            break;
249        }
250    }
251
252    if prefix.is_empty() {
253        "root".to_owned()
254    } else {
255        prefix.join("/")
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::import_graph::FileNode;
263    use std::collections::HashSet;
264
265    fn node(imports: &[&str], imported_by: &[&str]) -> FileNode {
266        FileNode {
267            imports: imports
268                .iter()
269                .map(|s| s.to_string())
270                .collect::<HashSet<_>>(),
271            imported_by: imported_by
272                .iter()
273                .map(|s| s.to_string())
274                .collect::<HashSet<_>>(),
275        }
276    }
277
278    #[test]
279    fn detects_two_communities() {
280        let mut graph = HashMap::new();
281        // Cluster A: a1 ↔ a2 ↔ a3
282        graph.insert("a1".into(), node(&["a2", "a3"], &["a2"]));
283        graph.insert("a2".into(), node(&["a1", "a3"], &["a1"]));
284        graph.insert("a3".into(), node(&["a1"], &["a1", "a2"]));
285        // Cluster B: b1 ↔ b2
286        graph.insert("b1".into(), node(&["b2"], &["b2"]));
287        graph.insert("b2".into(), node(&["b1"], &["b1"]));
288        // One cross-link
289        graph.insert("bridge".into(), node(&["a1", "b1"], &[]));
290
291        let result = detect_communities(&graph, 2).unwrap();
292        assert!(
293            result.communities.len() >= 2,
294            "expected >= 2 communities, got {}",
295            result.communities.len()
296        );
297        assert!(result.modularity > 0.0, "modularity should be positive");
298    }
299
300    #[test]
301    fn empty_graph_returns_empty() {
302        let graph = HashMap::new();
303        let result = detect_communities(&graph, 1).unwrap();
304        assert!(result.communities.is_empty());
305        assert_eq!(result.total_files, 0);
306    }
307}