use std::collections::BTreeMap;
use std::path::Path;
use petgraph::Direction;
use petgraph::stable_graph::NodeIndex;
use petgraph::visit::EdgeRef;
use crate::graph::{CodeGraph, edge::EdgeKind, node::GraphNode};
#[derive(Debug, Clone, PartialEq, serde::Serialize)]
pub struct ClusterResult {
pub label: String,
pub member_count: usize,
pub top_symbols: Vec<String>,
}
pub fn find_clusters(
graph: &CodeGraph,
root: &Path,
scope: Option<&Path>,
max_iterations: usize,
) -> Vec<ClusterResult> {
let abs_scope: Option<std::path::PathBuf> = scope.map(|s| {
if s.is_absolute() {
s.to_path_buf()
} else {
root.join(s)
}
});
let in_scope = |path: &Path| -> bool {
match &abs_scope {
None => true,
Some(scope_path) => path.starts_with(scope_path),
}
};
let mut labels: BTreeMap<NodeIndex, String> = BTreeMap::new();
for file_idx in graph.file_index.values() {
let file_path = match &graph.graph[*file_idx] {
GraphNode::File(fi) => fi.path.clone(),
_ => continue,
};
if !in_scope(&file_path) {
continue;
}
let label = directory_label(&file_path, root);
for edge_ref in graph.graph.edges_directed(*file_idx, Direction::Outgoing) {
if matches!(edge_ref.weight(), EdgeKind::Contains) {
let sym_idx = edge_ref.target();
if matches!(graph.graph[sym_idx], GraphNode::Symbol(_)) {
labels.insert(sym_idx, label.clone());
}
}
}
}
if labels.is_empty() {
return Vec::new();
}
let mut snapshot: BTreeMap<NodeIndex, String> = BTreeMap::new();
for _ in 0..max_iterations {
let mut changed = false;
snapshot.clone_from(&labels);
for (&node, current_label) in &mut labels {
let mut neighbor_labels: BTreeMap<String, usize> = BTreeMap::new();
for edge_ref in graph.graph.edges_directed(node, Direction::Outgoing) {
let neighbor = edge_ref.target();
if matches!(edge_ref.weight(), EdgeKind::Calls | EdgeKind::ChildOf)
&& let Some(nlabel) = snapshot.get(&neighbor)
{
*neighbor_labels.entry(nlabel.clone()).or_insert(0) += 1;
}
}
for edge_ref in graph.graph.edges_directed(node, Direction::Incoming) {
let neighbor = edge_ref.source();
if matches!(edge_ref.weight(), EdgeKind::Calls | EdgeKind::ChildOf)
&& let Some(nlabel) = snapshot.get(&neighbor)
{
*neighbor_labels.entry(nlabel.clone()).or_insert(0) += 1;
}
}
if neighbor_labels.is_empty() {
continue;
}
let max_count = *neighbor_labels.values().max().unwrap_or(&0);
let majority = neighbor_labels
.iter()
.filter(|(_, count)| **count == max_count)
.map(|(lbl, _)| lbl.clone())
.next() .unwrap_or_else(|| current_label.clone());
if majority != *current_label {
*current_label = majority;
changed = true;
}
}
if !changed {
break;
}
}
let mut groups: BTreeMap<String, Vec<NodeIndex>> = BTreeMap::new();
for (node_idx, label) in &labels {
groups.entry(label.clone()).or_default().push(*node_idx);
}
groups
.into_iter()
.map(|(label, members)| {
let member_count = members.len();
let mut scored: Vec<(NodeIndex, usize)> = members
.iter()
.map(|&idx| {
let incoming = graph.graph.edges_directed(idx, Direction::Incoming).count();
(idx, incoming)
})
.collect();
scored.sort_by(|a, b| {
b.1.cmp(&a.1).then_with(|| {
let name_a = symbol_name(graph, a.0);
let name_b = symbol_name(graph, b.0);
name_a.cmp(&name_b)
})
});
let top_symbols: Vec<String> = scored
.iter()
.take(5)
.map(|&(idx, _)| symbol_name(graph, idx))
.collect();
ClusterResult {
label,
member_count,
top_symbols,
}
})
.collect()
}
fn directory_label(file_path: &Path, root: &Path) -> String {
let rel = file_path.strip_prefix(root).unwrap_or(file_path);
let components: Vec<_> = rel.components().collect();
if components.len() <= 1 {
return "core".to_string();
}
let mut start = 0;
for (i, comp) in components.iter().enumerate() {
let s = comp.as_os_str().to_str().unwrap_or("");
if s == "src" || s == "lib" {
start = i + 1;
} else {
break;
}
}
if start < components.len() - 1 {
components[start]
.as_os_str()
.to_str()
.unwrap_or("core")
.to_string()
} else {
"core".to_string()
}
}
fn symbol_name(graph: &CodeGraph, idx: NodeIndex) -> String {
match &graph.graph[idx] {
GraphNode::Symbol(info) => info.name.clone(),
_ => String::new(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
use crate::graph::node::{SymbolInfo, SymbolKind};
fn root() -> PathBuf {
PathBuf::from("/proj")
}
fn graph_with_auth_and_api() -> crate::graph::CodeGraph {
let root = root();
let mut g = crate::graph::CodeGraph::new();
let auth_file = g.add_file(root.join("src/auth/auth.rs"), "rust");
g.add_symbol(
auth_file,
SymbolInfo {
name: "authenticate".into(),
kind: SymbolKind::Function,
line: 1,
..Default::default()
},
);
g.add_symbol(
auth_file,
SymbolInfo {
name: "authorize".into(),
kind: SymbolKind::Function,
line: 10,
..Default::default()
},
);
let api_file = g.add_file(root.join("src/api/routes.rs"), "rust");
g.add_symbol(
api_file,
SymbolInfo {
name: "get_users".into(),
kind: SymbolKind::Function,
line: 1,
..Default::default()
},
);
g
}
#[test]
fn test_find_clusters_basic() {
let graph = graph_with_auth_and_api();
let r = root();
let clusters = find_clusters(&graph, &r, None, 10);
assert!(!clusters.is_empty(), "expected at least one cluster");
let labels: Vec<&str> = clusters.iter().map(|c| c.label.as_str()).collect();
assert!(
labels.contains(&"auth"),
"expected 'auth' cluster, got: {labels:?}"
);
assert!(
labels.contains(&"api"),
"expected 'api' cluster, got: {labels:?}"
);
let auth_cluster = clusters.iter().find(|c| c.label == "auth").unwrap();
assert_eq!(auth_cluster.member_count, 2, "auth should have 2 members");
let api_cluster = clusters.iter().find(|c| c.label == "api").unwrap();
assert_eq!(api_cluster.member_count, 1, "api should have 1 member");
}
#[test]
fn test_find_clusters_determinism() {
let graph = graph_with_auth_and_api();
let r = root();
let c1 = find_clusters(&graph, &r, None, 10);
let c2 = find_clusters(&graph, &r, None, 10);
assert_eq!(
c1, c2,
"find_clusters must be deterministic — two calls differ"
);
}
#[test]
fn test_find_clusters_with_scope() {
let graph = graph_with_auth_and_api();
let r = root();
let scope = PathBuf::from("src/auth");
let clusters = find_clusters(&graph, &r, Some(&scope), 10);
let labels: Vec<&str> = clusters.iter().map(|c| c.label.as_str()).collect();
assert!(
labels.contains(&"auth"),
"expected 'auth' cluster in scoped result"
);
assert!(
!labels.contains(&"api"),
"api cluster should be excluded by scope filter"
);
}
#[test]
fn test_find_clusters_empty_graph() {
let g = crate::graph::CodeGraph::new();
let r = root();
let clusters = find_clusters(&g, &r, None, 10);
assert!(clusters.is_empty(), "empty graph should produce empty Vec");
}
#[test]
fn test_find_clusters_isolated_nodes() {
let r = root();
let mut g = crate::graph::CodeGraph::new();
let f1 = g.add_file(r.join("src/utils/helper.rs"), "rust");
g.add_symbol(
f1,
SymbolInfo {
name: "helper_fn".into(),
kind: SymbolKind::Function,
line: 1,
..Default::default()
},
);
let clusters = find_clusters(&g, &r, None, 10);
assert_eq!(clusters.len(), 1, "one directory => one cluster");
assert_eq!(clusters[0].label, "utils");
assert!(clusters[0].top_symbols.contains(&"helper_fn".to_string()));
}
}