use crate::detectors::base::{Detector, DetectorConfig, DetectorScope};
use crate::models::{Finding, Severity};
use anyhow::Result;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use tracing::debug;
pub struct CommunityMisplacementDetector {
config: DetectorConfig,
}
detector_constructors! {
CommunityMisplacementDetector { }
}
fn top_level_dir(path: &str) -> &str {
match path.find('/') {
Some(pos) => &path[..pos],
None => "",
}
}
fn package_scope(path: &str) -> &str {
if let Some(rest) = path.strip_prefix("packages/") {
if let Some(slash) = rest.find('/') {
return &path[..("packages/".len() + slash)];
}
return path;
}
if path.starts_with("repotoire-cli/") || path == "repotoire-cli" {
return "repotoire-cli";
}
if path.starts_with("repotoire/web/") || path == "repotoire/web" {
return "repotoire/web";
}
top_level_dir(path)
}
impl Detector for CommunityMisplacementDetector {
fn name(&self) -> &'static str {
"CommunityMisplacementDetector"
}
fn description(&self) -> &'static str {
"Detects files whose directory doesn't match their Louvain community"
}
fn category(&self) -> &'static str {
"architecture"
}
fn config(&self) -> Option<&DetectorConfig> {
Some(&self.config)
}
fn detector_scope(&self) -> DetectorScope {
DetectorScope::GraphWide
}
fn is_deterministic(&self) -> bool {
true
}
fn detect(
&self,
ctx: &crate::detectors::analysis_context::AnalysisContext,
) -> Result<Vec<Finding>> {
let graph = ctx.graph;
let gi = graph.interner();
let min_community_size: usize = self.config.get_option_or("min_community_size", 5);
let max_outlier_ratio: f64 = self.config.get_option_or("max_outlier_ratio", 0.2);
let mut communities: HashMap<usize, Vec<crate::graph::node_index::NodeIndex>> =
HashMap::new();
for &idx in graph.functions_idx() {
if let Some(community_id) = graph.primitives().community.get(&idx).copied() {
communities.entry(community_id).or_default().push(idx);
}
}
if communities.is_empty() {
return Ok(vec![]);
}
debug!(
"CommunityMisplacementDetector: examining {} communities",
communities.len()
);
let mut findings = Vec::new();
for (&community_id, members) in &communities {
let community_size = members.len();
if community_size < min_community_size {
continue;
}
let mut dir_counts: HashMap<&str, usize> = HashMap::new();
let mut member_paths: Vec<(&str, &str)> = Vec::new();
for &idx in members {
if let Some(node) = graph.node_idx(idx) {
let path = node.path(gi);
let dir = top_level_dir(path);
*dir_counts.entry(dir).or_insert(0) += 1;
member_paths.push((path, dir));
}
}
let dominant_dir = match dir_counts.iter().max_by_key(|&(_, count)| *count) {
Some((dir, _)) => *dir,
None => continue,
};
let dominant_pkg = member_paths
.iter()
.find(|&&(_, d)| d == dominant_dir)
.map(|&(p, _)| package_scope(p))
.unwrap_or(dominant_dir);
for &(file_path, file_dir) in &member_paths {
if file_dir == dominant_dir {
continue;
}
let file_pkg = package_scope(file_path);
if file_pkg != dominant_pkg {
continue;
}
let same_dir_count = dir_counts.get(file_dir).copied().unwrap_or(0);
let ratio = same_dir_count as f64 / community_size as f64;
if ratio > max_outlier_ratio {
continue; }
let severity = if !file_dir.is_empty() && !dominant_dir.is_empty() {
Severity::Medium
} else {
Severity::Low
};
let filename = file_path.rsplit('/').next().unwrap_or(file_path);
findings.push(Finding {
id: String::new(),
detector: "community-misplacement".to_string(),
severity,
confidence: Some(0.75),
deterministic: true,
title: format!(
"Community misplacement: {} clusters with {}/ module",
filename, dominant_dir
),
description: format!(
"`{}` clusters with community #{} ({} members, dominant directory: `{}/`). \
Consider relocating or extracting the shared dependency.",
file_path, community_id, community_size, dominant_dir
),
affected_files: vec![PathBuf::from(file_path)],
suggested_fix: Some(
"Consider one of: (1) Move the file to the dominant community directory, \
(2) Extract shared logic into a common module, \
(3) Re-evaluate the module boundary if the coupling is intentional."
.to_string(),
),
category: Some("architecture".to_string()),
why_it_matters: Some(
"Files that operationally belong to a different module than their \
directory suggests create confusion and make the codebase harder to \
navigate. Aligning directory structure with actual coupling improves \
discoverability and maintainability."
.to_string(),
),
..Default::default()
});
}
}
findings.sort_by(|a, b| {
a.affected_files[0]
.cmp(&b.affected_files[0])
.then(b.severity.cmp(&a.severity))
});
findings.dedup_by(|a, b| a.affected_files[0] == b.affected_files[0]);
findings.sort_by_key(|f| std::cmp::Reverse(f.severity));
debug!(
"CommunityMisplacementDetector found {} findings",
findings.len()
);
Ok(findings)
}
}
impl crate::detectors::RegisteredDetector for CommunityMisplacementDetector {
fn create(init: &crate::detectors::DetectorInit) -> Arc<dyn Detector> {
Arc::new(Self::with_config(
init.config_for("CommunityMisplacementDetector"),
))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::{CodeEdge, CodeNode, GraphBuilder};
#[test]
fn test_no_findings_empty_communities() {
let mut builder = GraphBuilder::new();
let f1 = builder.add_node(CodeNode::function("f1", "src/a.py"));
let f2 = builder.add_node(CodeNode::function("f2", "src/b.py"));
builder.add_edge(f1, f2, CodeEdge::calls());
let graph = builder.freeze();
let detector = CommunityMisplacementDetector::new();
let ctx = crate::detectors::analysis_context::AnalysisContext::test(&graph);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should have no findings without community data"
);
}
#[test]
fn test_no_findings_small_community() {
let mut builder = GraphBuilder::new();
let f1 = builder.add_node(CodeNode::function("f1", "src/a.py"));
let f2 = builder.add_node(CodeNode::function("f2", "lib/b.py"));
builder.add_edge(f1, f2, CodeEdge::calls());
let graph = builder.freeze();
let detector = CommunityMisplacementDetector::new();
let ctx = crate::detectors::analysis_context::AnalysisContext::test(&graph);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should have no findings for communities below min size"
);
}
#[test]
fn test_scope_and_category() {
let detector = CommunityMisplacementDetector::new();
assert_eq!(detector.detector_scope(), DetectorScope::GraphWide);
assert_eq!(detector.category(), "architecture");
assert!(detector.is_deterministic());
assert_eq!(detector.name(), "CommunityMisplacementDetector");
}
#[test]
fn test_top_level_dir_extraction() {
assert_eq!(top_level_dir("src/api/handlers.rs"), "src");
assert_eq!(top_level_dir("lib/utils.py"), "lib");
assert_eq!(top_level_dir("utils.py"), "");
assert_eq!(top_level_dir("a/b/c/d.rs"), "a");
}
#[test]
fn test_package_scope_extraction() {
assert_eq!(
package_scope("packages/vscode-repotoire/src/extension.ts"),
"packages/vscode-repotoire"
);
assert_eq!(
package_scope("packages/web-app/index.html"),
"packages/web-app"
);
assert_eq!(package_scope("repotoire-cli/src/main.rs"), "repotoire-cli");
assert_eq!(package_scope("repotoire/web/src/app.tsx"), "repotoire/web");
assert_eq!(package_scope("src/api/handlers.rs"), "src");
assert_eq!(package_scope("lib/utils.py"), "lib");
assert_eq!(package_scope("utils.py"), "");
}
#[test]
fn test_detector_slug() {
let mut builder = GraphBuilder::new();
let f1 = builder.add_node(CodeNode::function("f1", "a.py"));
let f2 = builder.add_node(CodeNode::function("f2", "b.py"));
builder.add_edge(f1, f2, CodeEdge::calls());
let graph = builder.freeze();
let detector = CommunityMisplacementDetector::new();
let ctx = crate::detectors::analysis_context::AnalysisContext::test(&graph);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(findings.is_empty());
}
#[test]
fn test_positive_misplacement_with_co_change() {
let mut builder = GraphBuilder::new();
let auth_fns: Vec<_> = (0..6)
.map(|i| {
let fname = format!("auth_fn_{i}");
let path = format!("src/auth/mod{i}.py");
let f = builder.add_node(CodeNode::function(&fname, &path));
let file = builder.add_node(CodeNode::file(&path));
builder.add_edge(file, f, CodeEdge::contains());
(f, path)
})
.collect();
let db_fns: Vec<_> = (0..2)
.map(|i| {
let fname = format!("db_fn_{i}");
let path = format!("src/db/query{i}.py");
let f = builder.add_node(CodeNode::function(&fname, &path));
let file = builder.add_node(CodeNode::file(&path));
builder.add_edge(file, f, CodeEdge::contains());
(f, path)
})
.collect();
let utils_fn = builder.add_node(CodeNode::function("utils_helper", "src/utils/helper.py"));
let utils_file = builder.add_node(CodeNode::file("src/utils/helper.py"));
builder.add_edge(utils_file, utils_fn, CodeEdge::contains());
for i in 0..5 {
builder.add_edge(auth_fns[i].0, auth_fns[i + 1].0, CodeEdge::calls());
}
builder.add_edge(db_fns[0].0, db_fns[1].0, CodeEdge::calls());
builder.add_edge(utils_fn, auth_fns[0].0, CodeEdge::calls());
let now = chrono::Utc::now();
let config = crate::git::co_change::CoChangeConfig {
min_weight: 0.01,
..Default::default()
};
let mut commits = Vec::new();
for auth in &auth_fns {
for _ in 0..5 {
commits.push((now, vec!["src/utils/helper.py".to_string(), auth.1.clone()]));
}
}
for i in 0..5 {
for _ in 0..3 {
commits.push((now, vec![auth_fns[i].1.clone(), auth_fns[i + 1].1.clone()]));
}
}
let co_change = crate::git::co_change::CoChangeMatrix::from_commits(&commits, &config, now);
let graph = builder.freeze_with_co_change(&co_change);
let detector = CommunityMisplacementDetector::new();
let ctx = crate::detectors::analysis_context::AnalysisContext::test(&graph);
let findings = detector.detect(&ctx).expect("detection should succeed");
if !findings.is_empty() {
assert_eq!(findings[0].detector, "community-misplacement");
assert!(
findings[0].affected_files.len() == 1,
"Each finding should affect exactly one file"
);
}
}
}