pub fn suggest_split(
index: &AgentContextIndex,
file_path: &str,
resolution: f64,
min_cluster_lines: usize,
) -> Option<SplitPlan> {
let func_indices = index.file_index.get(file_path)?;
if func_indices.is_empty() {
return None;
}
let mut global_to_local: HashMap<usize, usize> = HashMap::new();
let mut local_entries: Vec<&FunctionEntry> = Vec::new();
for (local_idx, &global_idx) in func_indices.iter().enumerate() {
if global_idx < index.functions.len() {
global_to_local.insert(global_idx, local_idx);
local_entries.push(&index.functions[global_idx]);
}
}
if local_entries.is_empty() {
return None;
}
let total_lines = estimate_total_lines(&local_entries);
let (graph, node_to_local) = build_intra_file_graph(index, func_indices, &global_to_local);
let (communities, modularity) = if local_entries.len() < 10 {
let comms = connected_components(&graph);
let detector = LouvainDetector::new();
let mod_score = detector.calculate_modularity(&graph, &comms);
(comms, mod_score)
} else {
let mut detector = LouvainDetector::new().with_resolution(resolution);
let comms = detector.detect_communities(&graph);
let mod_score = detector.calculate_modularity(&graph, &comms);
(comms, mod_score)
};
let mut community_items: HashMap<usize, Vec<usize>> = HashMap::new();
for (node_idx, &comm_id) in communities.iter().enumerate() {
if let Some(&local_idx) = node_to_local.get(&node_idx) {
community_items.entry(comm_id).or_default().push(local_idx);
}
}
let mut clusters = Vec::new();
let mut orphan_items: Vec<(ClusterItem, usize)> = Vec::new();
for local_indices in community_items.values() {
let items: Vec<ClusterItem> = local_indices
.iter()
.map(|&li| make_cluster_item(li, &local_entries, index, func_indices, &global_to_local))
.collect();
let estimated_lines: usize = items
.iter()
.map(|i| i.line_range.1 - i.line_range.0 + 1)
.sum();
if items.len() == 1 || estimated_lines < min_cluster_lines {
for (i, item) in items.into_iter().enumerate() {
orphan_items.push((item, local_indices[i]));
}
} else {
let cluster_entries: Vec<&FunctionEntry> =
local_indices.iter().map(|&li| local_entries[li]).collect();
let (name, signal, confidence) = name_cluster(&cluster_entries, file_path);
let cohesion = compute_cohesion(local_indices, index, func_indices, &global_to_local);
clusters.push(SplitCluster {
suggested_name: name,
naming_signal: signal,
confidence,
items,
estimated_lines,
cohesion,
});
}
}
let unclustered = assign_orphans_to_clusters(&mut clusters, orphan_items);
for cluster in &mut clusters {
cluster.items.sort_by_key(|i| i.line_range.0);
}
clusters.sort_by(|a, b| b.estimated_lines.cmp(&a.estimated_lines));
let impact = compute_impact(index, file_path);
Some(SplitPlan {
source_file: file_path.to_string(),
total_lines,
clusters,
unclustered,
impact,
modularity,
})
}
fn assign_orphans_to_clusters(
clusters: &mut Vec<SplitCluster>,
orphan_items: Vec<(ClusterItem, usize)>,
) -> Vec<ClusterItem> {
if clusters.is_empty() {
return orphan_items.into_iter().map(|(item, _)| item).collect();
}
let mut unclustered = Vec::new();
for (item, _local_idx) in orphan_items {
let item_mid = (item.line_range.0 + item.line_range.1) / 2;
let nearest = clusters.iter_mut().min_by_key(|c| {
c.items
.iter()
.map(|ci| {
let ci_mid = (ci.line_range.0 + ci.line_range.1) / 2;
(ci_mid as isize - item_mid as isize).unsigned_abs()
})
.min()
.unwrap_or(usize::MAX)
});
if let Some(cluster) = nearest {
let line_span = item.line_range.1 - item.line_range.0 + 1;
cluster.estimated_lines += line_span;
cluster.items.push(item);
} else {
unclustered.push(item);
}
}
unclustered
}
fn estimate_total_lines(entries: &[&FunctionEntry]) -> usize {
entries.iter().map(|e| e.end_line).max().unwrap_or(0)
}