use std::collections::{HashMap, HashSet};
use super::super::categorization::cluster_methods_by_behavior;
use super::super::types::{BehaviorCategory, MethodCluster};
use super::cohesion::calculate_standalone_cohesion;
use super::community::apply_community_detection;
pub fn apply_hybrid_clustering(
methods: &[String],
adjacency: &HashMap<(String, String), usize>,
) -> Vec<MethodCluster> {
let name_clusters = cluster_methods_by_behavior(methods);
if name_clusters.is_empty() {
return apply_community_detection(methods, adjacency);
}
name_clusters
.into_iter()
.flat_map(|(category, cluster_methods)| {
refine_category_cluster(category, cluster_methods, adjacency)
})
.collect()
}
fn refine_category_cluster(
category: BehaviorCategory,
methods: Vec<String>,
adjacency: &HashMap<(String, String), usize>,
) -> Vec<MethodCluster> {
if methods.len() <= 5 {
return vec![create_small_cluster(category, methods, adjacency)];
}
refine_large_cluster(category, methods, adjacency)
}
fn create_small_cluster(
category: BehaviorCategory,
methods: Vec<String>,
adjacency: &HashMap<(String, String), usize>,
) -> MethodCluster {
let (internal_calls, external_calls) = calculate_standalone_cohesion(&methods, adjacency);
let mut cluster = MethodCluster {
category,
methods,
fields_accessed: vec![],
internal_calls,
external_calls,
cohesion_score: 0.0,
};
cluster.calculate_cohesion();
cluster
}
fn refine_large_cluster(
category: BehaviorCategory,
methods: Vec<String>,
adjacency: &HashMap<(String, String), usize>,
) -> Vec<MethodCluster> {
let sub_clusters = apply_community_detection(&methods, adjacency);
if sub_clusters.is_empty() {
return vec![create_small_cluster(category, methods, adjacency)];
}
let refined = refine_subclusters(&sub_clusters, &category);
let lost_methods = find_lost_methods(&sub_clusters, &methods);
recover_lost_methods(refined, lost_methods, category, adjacency)
}
fn refine_subclusters(
sub_clusters: &[MethodCluster],
original_category: &BehaviorCategory,
) -> Vec<MethodCluster> {
sub_clusters
.iter()
.map(|subcluster| refine_single_subcluster(subcluster, original_category))
.collect()
}
fn refine_single_subcluster(
subcluster: &MethodCluster,
original_category: &BehaviorCategory,
) -> MethodCluster {
let mut refined = subcluster.clone();
if matches!(refined.category, BehaviorCategory::Domain(_)) {
refined.category = original_category.clone();
}
refined
}
fn find_lost_methods(sub_clusters: &[MethodCluster], all_methods: &[String]) -> Vec<String> {
let clustered: HashSet<&String> = sub_clusters.iter().flat_map(|c| &c.methods).collect();
all_methods
.iter()
.filter(|m| !clustered.contains(m))
.cloned()
.collect()
}
fn recover_lost_methods(
mut refined: Vec<MethodCluster>,
lost_methods: Vec<String>,
category: BehaviorCategory,
adjacency: &HashMap<(String, String), usize>,
) -> Vec<MethodCluster> {
if lost_methods.is_empty() {
return refined;
}
let recovery_cluster = create_small_cluster(category, lost_methods, adjacency);
refined.push(recovery_cluster);
refined
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_small_cluster_no_refinement() {
let methods = vec!["parse_a".into(), "parse_b".into()];
let clusters = apply_hybrid_clustering(&methods, &HashMap::new());
assert!(!clusters.is_empty());
}
#[test]
fn test_find_lost_methods_none_lost() {
let sub_clusters = vec![MethodCluster {
category: BehaviorCategory::Parsing,
methods: vec!["a".into(), "b".into()],
fields_accessed: vec![],
internal_calls: 0,
external_calls: 0,
cohesion_score: 0.0,
}];
let all_methods = vec!["a".into(), "b".into()];
let lost = find_lost_methods(&sub_clusters, &all_methods);
assert!(lost.is_empty());
}
#[test]
fn test_find_lost_methods_some_lost() {
let sub_clusters = vec![MethodCluster {
category: BehaviorCategory::Parsing,
methods: vec!["a".into()],
fields_accessed: vec![],
internal_calls: 0,
external_calls: 0,
cohesion_score: 0.0,
}];
let all_methods = vec!["a".into(), "b".into(), "c".into()];
let lost = find_lost_methods(&sub_clusters, &all_methods);
assert_eq!(lost.len(), 2);
assert!(lost.contains(&"b".into()));
assert!(lost.contains(&"c".into()));
}
}