1use std::collections::{BTreeMap, HashMap, HashSet};
2
3#[derive(Debug, Clone)]
5pub struct Module {
6 pub name: String,
7 pub files: Vec<String>,
8 pub lcom4: usize,
11 pub tcc: f64,
14 pub cohesion: f64,
16}
17
18pub fn infer_modules(
26 file_imports: &[(String, String)],
27 all_files: &[String],
28 depth: Option<usize>,
29) -> Vec<Module> {
30 let adj = build_undirected_adj(file_imports, all_files);
31 let depth = depth.unwrap_or_else(|| auto_detect_depth(all_files));
32 let groups = group_by_directory(all_files, depth);
33
34 let mut modules = Vec::new();
35 for (dir, files) in groups {
36 adaptive_split(&dir, &files, &adj, &mut modules);
37 }
38
39 for m in &mut modules {
41 let mset: HashSet<&str> = m.files.iter().map(|s| s.as_str()).collect();
42 m.lcom4 = compute_lcom4(&mset, &adj);
43 m.tcc = compute_tcc(&mset, &adj);
44 m.cohesion = compute_cohesion(&mset, &adj);
45 }
46 modules
47}
48
49fn auto_detect_depth(files: &[String]) -> usize {
52 let max_d = files
53 .iter()
54 .map(|f| f.matches('/').count().saturating_sub(1)) .max()
56 .unwrap_or(0);
57 if max_d == 0 {
58 return 1;
59 }
60 let counts: Vec<usize> = (1..=max_d)
61 .map(|d| group_by_directory(files, d).len())
62 .collect();
63 let mut best = 0;
65 let mut max_ratio = 0.0_f64;
66 for i in 0..counts.len().saturating_sub(1) {
67 if counts[i] > 0 {
68 let ratio = (counts[i + 1] as f64 - counts[i] as f64) / counts[i] as f64;
69 if ratio > max_ratio {
70 max_ratio = ratio;
71 best = i;
72 }
73 }
74 }
75 best + 1 }
77
78fn group_by_directory(files: &[String], depth: usize) -> BTreeMap<String, Vec<String>> {
79 let mut groups: BTreeMap<String, Vec<String>> = BTreeMap::new();
80 for f in files {
81 let parts: Vec<&str> = f.split('/').collect();
82 let dir_parts = &parts[..parts.len().saturating_sub(1)]; let key = if dir_parts.len() >= depth {
84 dir_parts[..depth].join("/")
85 } else if dir_parts.is_empty() {
86 "(root)".to_string()
87 } else {
88 dir_parts.join("/")
89 };
90 groups.entry(key).or_default().push(f.clone());
91 }
92 groups
93}
94
95fn adaptive_split(
98 dir: &str,
99 files: &[String],
100 adj: &HashMap<String, HashSet<String>>,
101 out: &mut Vec<Module>,
102) {
103 if files.len() < 3 {
104 out.push(make_module(dir, files));
105 return;
106 }
107
108 let mset: HashSet<&str> = files.iter().map(|s| s.as_str()).collect();
109
110 if try_lcom4_split(dir, files, &mset, adj, out) {
111 return;
112 }
113 if try_icr_split(dir, files, adj, out) {
114 return;
115 }
116 out.push(make_module(dir, files));
117}
118
119fn try_lcom4_split(
120 dir: &str,
121 files: &[String],
122 mset: &HashSet<&str>,
123 adj: &HashMap<String, HashSet<String>>,
124 out: &mut Vec<Module>,
125) -> bool {
126 if compute_lcom4(mset, adj) <= 1 {
127 return false;
128 }
129 let subs = group_by_next_level(dir, files);
130 if subs.len() <= 1 {
131 return false;
132 }
133 for (sd, sf) in &subs {
134 adaptive_split(sd, sf, adj, out);
135 }
136 true
137}
138
139fn try_icr_split(
140 dir: &str,
141 files: &[String],
142 adj: &HashMap<String, HashSet<String>>,
143 out: &mut Vec<Module>,
144) -> bool {
145 if files.len() <= 30 {
146 return false;
147 }
148 let subs = group_by_next_level(dir, files);
149 if subs.len() <= 1 || inter_child_ratio(&subs, adj) >= 0.30 {
150 return false;
151 }
152 for (sd, sf) in &subs {
153 adaptive_split(sd, sf, adj, out);
154 }
155 true
156}
157
158fn group_by_next_level(dir: &str, files: &[String]) -> BTreeMap<String, Vec<String>> {
159 let mut subs: BTreeMap<String, Vec<String>> = BTreeMap::new();
160 let prefix = if dir == "(root)" { "" } else { dir };
161 for f in files {
162 let rest = if prefix.is_empty() {
163 f.as_str()
164 } else {
165 f.strip_prefix(prefix)
166 .and_then(|s| s.strip_prefix('/'))
167 .unwrap_or(f)
168 };
169 let parts: Vec<&str> = rest.split('/').collect();
170 let key = if parts.len() > 1 {
171 if prefix.is_empty() {
172 parts[0].to_string()
173 } else {
174 format!("{prefix}/{}", parts[0])
175 }
176 } else {
177 format!("{dir}/*")
179 };
180 subs.entry(key).or_default().push(f.clone());
181 }
182 subs
183}
184
185fn make_module(dir: &str, files: &[String]) -> Module {
186 Module {
187 name: dir.to_string(),
188 files: files.to_vec(),
189 lcom4: 0,
190 tcc: 0.0,
191 cohesion: 0.0,
192 }
193}
194
195fn build_undirected_adj(
198 edges: &[(String, String)],
199 all_files: &[String],
200) -> HashMap<String, HashSet<String>> {
201 let file_set: HashSet<&str> = all_files.iter().map(|s| s.as_str()).collect();
202 let mut adj: HashMap<String, HashSet<String>> = HashMap::new();
203 for f in all_files {
204 adj.entry(f.clone()).or_default();
205 }
206 for (s, d) in edges {
207 if file_set.contains(s.as_str()) && file_set.contains(d.as_str()) {
208 adj.entry(s.clone()).or_default().insert(d.clone());
209 adj.entry(d.clone()).or_default().insert(s.clone());
210 }
211 }
212 adj
213}
214
215fn compute_lcom4(members: &HashSet<&str>, adj: &HashMap<String, HashSet<String>>) -> usize {
216 let mut visited: HashSet<&str> = HashSet::new();
217 let mut count = 0;
218 for &f in members {
219 if visited.contains(f) {
220 continue;
221 }
222 count += 1;
223 bfs_visit(f, members, adj, &mut visited);
224 }
225 count
226}
227
228fn bfs_visit<'a>(
229 start: &'a str,
230 members: &HashSet<&str>,
231 adj: &'a HashMap<String, HashSet<String>>,
232 visited: &mut HashSet<&'a str>,
233) {
234 let mut stack = vec![start];
235 while let Some(cur) = stack.pop() {
236 if !visited.insert(cur) {
237 continue;
238 }
239 let Some(neighbors) = adj.get(cur) else {
240 continue;
241 };
242 for n in neighbors {
243 if members.contains(n.as_str()) && !visited.contains(n.as_str()) {
244 stack.push(n);
245 }
246 }
247 }
248}
249
250fn compute_tcc(members: &HashSet<&str>, adj: &HashMap<String, HashSet<String>>) -> f64 {
251 let n = members.len();
252 if n < 2 {
253 return 1.0;
254 }
255 if n > 200 {
257 return -1.0;
258 }
259 let list: Vec<&str> = members.iter().copied().collect();
260 let targets: HashMap<&str, HashSet<&str>> = list
262 .iter()
263 .map(|&f| {
264 let t: HashSet<&str> = adj
265 .get(f)
266 .map(|ns| {
267 ns.iter()
268 .filter(|n| members.contains(n.as_str()))
269 .map(|n| n.as_str())
270 .collect()
271 })
272 .unwrap_or_default();
273 (f, t)
274 })
275 .collect();
276
277 let mut connected = 0usize;
278 for i in 0..n {
279 for j in (i + 1)..n {
280 let a = list[i];
281 let b = list[j];
282 if targets[a].contains(b)
284 || targets[b].contains(a)
285 || !targets[a].is_disjoint(&targets[b])
286 {
287 connected += 1;
288 }
289 }
290 }
291 connected as f64 / (n * (n - 1) / 2) as f64
292}
293
294fn compute_cohesion(members: &HashSet<&str>, adj: &HashMap<String, HashSet<String>>) -> f64 {
295 let mut sum = 0.0;
296 let mut count = 0;
297 for &f in members {
298 if let Some(neighbors) = adj.get(f) {
299 let total = neighbors.len();
300 if total == 0 {
301 continue;
302 }
303 let internal = neighbors
304 .iter()
305 .filter(|n| members.contains(n.as_str()))
306 .count();
307 sum += internal as f64 / total as f64;
308 count += 1;
309 }
310 }
311 if count > 0 { sum / count as f64 } else { 0.0 }
312}
313
314fn inter_child_ratio(
315 subs: &BTreeMap<String, Vec<String>>,
316 adj: &HashMap<String, HashSet<String>>,
317) -> f64 {
318 let file_to_sub: HashMap<&str, &str> = subs
319 .iter()
320 .flat_map(|(sd, files)| files.iter().map(move |f| (f.as_str(), sd.as_str())))
321 .collect();
322 let all_files: HashSet<&str> = file_to_sub.keys().copied().collect();
323 let (mut intra, mut inter) = (0usize, 0usize);
324 for &f in &all_files {
325 let Some(neighbors) = adj.get(f) else {
326 continue;
327 };
328 let f_sub = file_to_sub[f];
329 for n in neighbors {
330 let ns = n.as_str();
331 if !all_files.contains(ns) {
332 continue;
333 }
334 if f_sub == file_to_sub[ns] {
335 intra += 1;
336 } else {
337 inter += 1;
338 }
339 }
340 }
341 let total = intra + inter;
342 if total > 0 {
343 inter as f64 / total as f64
344 } else {
345 1.0
346 }
347}
348
349#[cfg(test)]
350mod tests {
351 use super::*;
352
353 #[test]
354 fn auto_depth_simple() {
355 let files: Vec<String> = vec![
356 "src/core/a.rs",
357 "src/core/b.rs",
358 "src/util/c.rs",
359 "src/util/d.rs",
360 "src/util/sub/e.rs",
361 ]
362 .into_iter()
363 .map(String::from)
364 .collect();
365 let d = auto_detect_depth(&files);
366 assert!(d >= 1 && d <= 2);
367 }
368
369 #[test]
370 fn lcom4_two_components() {
371 let files: Vec<String> = vec!["a.rs", "b.rs", "c.rs", "d.rs"]
372 .into_iter()
373 .map(String::from)
374 .collect();
375 let edges = vec![
377 ("a.rs".into(), "b.rs".into()),
378 ("c.rs".into(), "d.rs".into()),
379 ];
380 let modules = infer_modules(&edges, &files, Some(1));
381 assert!(modules.iter().any(|m| m.lcom4 >= 1));
383 }
384
385 #[test]
386 fn directory_grouping() {
387 let files: Vec<String> = vec!["src/core/x.rs", "src/core/y.rs", "src/util/z.rs"]
388 .into_iter()
389 .map(String::from)
390 .collect();
391 let modules = infer_modules(&[], &files, Some(1));
392 let core_mod = modules
393 .iter()
394 .find(|m| m.files.contains(&"src/core/x.rs".to_string()));
395 assert!(core_mod.is_some());
396 assert!(
397 core_mod
398 .unwrap()
399 .files
400 .contains(&"src/core/y.rs".to_string())
401 );
402 }
403}