1use crate::import_graph::FileNode;
5use anyhow::Result;
6use petgraph::graph::{NodeIndex, UnGraph};
7use serde::Serialize;
8use std::collections::HashMap;
9
10#[derive(Debug, Clone, Serialize)]
11pub struct Community {
12 pub id: usize,
13 pub files: Vec<String>,
14 pub size: usize,
15 pub density: f64,
17 pub label: String,
19}
20
21#[derive(Debug, Clone, Serialize)]
22pub struct ArchitectureOverview {
23 pub communities: Vec<Community>,
24 pub total_files: usize,
25 pub total_edges: usize,
26 pub modularity: f64,
27}
28
29pub fn detect_communities(
31 graph: &HashMap<String, FileNode>,
32 min_community_size: usize,
33) -> Result<ArchitectureOverview> {
34 if graph.is_empty() {
35 return Ok(ArchitectureOverview {
36 communities: Vec::new(),
37 total_files: 0,
38 total_edges: 0,
39 modularity: 0.0,
40 });
41 }
42
43 let mut pg = UnGraph::<String, ()>::new_undirected();
45 let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
46
47 for file in graph.keys() {
48 let idx = pg.add_node(file.clone());
49 node_map.insert(file.clone(), idx);
50 }
51
52 let mut edge_count = 0;
53 for (file, node) in graph {
54 if let Some(&src) = node_map.get(file) {
55 for imported in &node.imports {
56 if let Some(&dst) = node_map.get(imported)
57 && src != dst
58 && !pg.contains_edge(src, dst)
59 {
60 pg.add_edge(src, dst, ());
61 edge_count += 1;
62 }
63 }
64 }
65 }
66
67 let n = pg.node_count();
69 let m = edge_count.max(1) as f64;
70
71 let mut community: Vec<usize> = (0..n).collect();
73 let node_indices: Vec<NodeIndex> = pg.node_indices().collect();
74
75 let degree: Vec<f64> = node_indices
77 .iter()
78 .map(|&ni| pg.edges(ni).count() as f64)
79 .collect();
80
81 let mut comm_degree_sum: HashMap<usize, f64> = HashMap::new();
83 for i in 0..n {
84 *comm_degree_sum.entry(community[i]).or_default() += degree[i];
85 }
86
87 let mut improved = true;
89 let mut iterations = 0;
90 while improved && iterations < 20 {
91 improved = false;
92 iterations += 1;
93
94 for i in 0..n {
95 let ni = node_indices[i];
96 let current_comm = community[i];
97
98 let mut comm_edges: HashMap<usize, f64> = HashMap::new();
100 for neighbor in pg.neighbors(ni) {
101 let j = neighbor.index();
102 let c = community[j];
103 *comm_edges.entry(c).or_default() += 1.0;
104 }
105
106 let ki = degree[i];
108 let mut best_comm = current_comm;
109 let mut best_gain = 0.0_f64;
110
111 for (&c, &edges_to_c) in &comm_edges {
112 if c == current_comm {
113 continue;
114 }
115 let sigma_c = comm_degree_sum.get(&c).copied().unwrap_or(0.0);
116 let gain = edges_to_c / m - (sigma_c * ki) / (2.0 * m * m);
117 if gain > best_gain {
118 best_gain = gain;
119 best_comm = c;
120 }
121 }
122
123 if best_comm != current_comm {
124 *comm_degree_sum.entry(current_comm).or_default() -= ki;
126 *comm_degree_sum.entry(best_comm).or_default() += ki;
127 community[i] = best_comm;
128 improved = true;
129 }
130 }
131 }
132
133 let mut comm_files: HashMap<usize, Vec<String>> = HashMap::new();
135 for (i, &c) in community.iter().enumerate() {
136 let file_name = pg[node_indices[i]].clone();
137 comm_files.entry(c).or_default().push(file_name);
138 }
139
140 let modularity = calculate_modularity(&community, &node_indices, &pg, m);
142
143 let mut communities: Vec<Community> = comm_files
145 .into_iter()
146 .filter(|(_, files)| files.len() >= min_community_size)
147 .map(|(id, mut files)| {
148 files.sort();
149 let size = files.len();
150 let label = common_path_prefix(&files);
151 let density = community_density(id, &community, &node_indices, &pg);
152 Community {
153 id,
154 files,
155 size,
156 density,
157 label,
158 }
159 })
160 .collect();
161
162 communities.sort_by(|a, b| b.size.cmp(&a.size));
163
164 for (i, comm) in communities.iter_mut().enumerate() {
166 comm.id = i;
167 }
168
169 Ok(ArchitectureOverview {
170 total_files: n,
171 total_edges: edge_count,
172 modularity,
173 communities,
174 })
175}
176
177fn calculate_modularity(
178 community: &[usize],
179 node_indices: &[NodeIndex],
180 pg: &UnGraph<String, ()>,
181 m: f64,
182) -> f64 {
183 let mut q = 0.0;
184 let n = community.len();
185 for i in 0..n {
186 for j in (i + 1)..n {
187 if community[i] != community[j] {
188 continue;
189 }
190 let ki = pg.edges(node_indices[i]).count() as f64;
191 let kj = pg.edges(node_indices[j]).count() as f64;
192 let aij = if pg.contains_edge(node_indices[i], node_indices[j]) {
193 1.0
194 } else {
195 0.0
196 };
197 q += aij - (ki * kj) / (2.0 * m);
198 }
199 }
200 q / (2.0 * m).max(1.0)
201}
202
203fn community_density(
204 comm_id: usize,
205 community: &[usize],
206 node_indices: &[NodeIndex],
207 pg: &UnGraph<String, ()>,
208) -> f64 {
209 let members: Vec<usize> = community
210 .iter()
211 .enumerate()
212 .filter(|(_, c)| **c == comm_id)
213 .map(|(i, _)| i)
214 .collect();
215 let n = members.len();
216 if n <= 1 {
217 return 1.0;
218 }
219 let mut internal_edges = 0;
220 for (a_idx, &i) in members.iter().enumerate() {
221 for &j in &members[a_idx + 1..] {
222 if pg.contains_edge(node_indices[i], node_indices[j]) {
223 internal_edges += 1;
224 }
225 }
226 }
227 let possible = n * (n - 1) / 2;
228 internal_edges as f64 / possible.max(1) as f64
229}
230
231fn common_path_prefix(files: &[String]) -> String {
232 if files.is_empty() {
233 return String::new();
234 }
235 if files.len() == 1 {
236 return files[0].rsplit('/').nth(1).unwrap_or(&files[0]).to_owned();
237 }
238
239 let parts: Vec<Vec<&str>> = files.iter().map(|f| f.split('/').collect()).collect();
240 let mut prefix = Vec::new();
241 let min_len = parts.iter().map(|p| p.len()).min().unwrap_or(0);
242
243 for i in 0..min_len {
244 let segment = parts[0][i];
245 if parts.iter().all(|p| p[i] == segment) {
246 prefix.push(segment);
247 } else {
248 break;
249 }
250 }
251
252 if prefix.is_empty() {
253 "root".to_owned()
254 } else {
255 prefix.join("/")
256 }
257}
258
259#[cfg(test)]
260mod tests {
261 use super::*;
262 use crate::import_graph::FileNode;
263 use std::collections::HashSet;
264
265 fn node(imports: &[&str], imported_by: &[&str]) -> FileNode {
266 FileNode {
267 imports: imports
268 .iter()
269 .map(|s| s.to_string())
270 .collect::<HashSet<_>>(),
271 imported_by: imported_by
272 .iter()
273 .map(|s| s.to_string())
274 .collect::<HashSet<_>>(),
275 }
276 }
277
278 #[test]
279 fn detects_two_communities() {
280 let mut graph = HashMap::new();
281 graph.insert("a1".into(), node(&["a2", "a3"], &["a2"]));
283 graph.insert("a2".into(), node(&["a1", "a3"], &["a1"]));
284 graph.insert("a3".into(), node(&["a1"], &["a1", "a2"]));
285 graph.insert("b1".into(), node(&["b2"], &["b2"]));
287 graph.insert("b2".into(), node(&["b1"], &["b1"]));
288 graph.insert("bridge".into(), node(&["a1", "b1"], &[]));
290
291 let result = detect_communities(&graph, 2).unwrap();
292 assert!(
293 result.communities.len() >= 2,
294 "expected >= 2 communities, got {}",
295 result.communities.len()
296 );
297 assert!(result.modularity > 0.0, "modularity should be positive");
298 }
299
300 #[test]
301 fn empty_graph_returns_empty() {
302 let graph = HashMap::new();
303 let result = detect_communities(&graph, 1).unwrap();
304 assert!(result.communities.is_empty());
305 assert_eq!(result.total_files, 0);
306 }
307}