use super::{ArchitecturalLayer, Method, Visibility};
#[cfg(test)]
use std::collections::HashMap;
use std::collections::HashSet;
#[derive(Debug, Clone)]
pub struct SimilarityWeights {
pub call_graph: f64,
pub data_deps: f64,
pub naming: f64,
pub behavioral: f64,
pub layer: f64,
}
impl Default for SimilarityWeights {
fn default() -> Self {
Self {
call_graph: 0.40,
data_deps: 0.25,
naming: 0.20,
behavioral: 0.10,
layer: 0.05,
}
}
}
pub trait CallGraphProvider {
fn call_count(&self, from: &str, to: &str) -> usize;
fn callees(&self, method: &str) -> HashSet<String>;
fn callers(&self, method: &str) -> HashSet<String>;
}
pub trait FieldAccessProvider {
fn fields_accessed_by(&self, method: &str) -> HashSet<String>;
fn writes_to_field(&self, method: &str, field: &str) -> bool;
}
pub struct ClusteringSimilarityCalculator<C, F> {
call_graph: C,
field_tracker: F,
weights: SimilarityWeights,
}
impl<C: CallGraphProvider, F: FieldAccessProvider> ClusteringSimilarityCalculator<C, F> {
pub fn new(call_graph: C, field_tracker: F) -> Self {
Self {
call_graph,
field_tracker,
weights: SimilarityWeights::default(),
}
}
pub fn with_weights(call_graph: C, field_tracker: F, weights: SimilarityWeights) -> Self {
Self {
call_graph,
field_tracker,
weights,
}
}
pub fn calculate_similarity(&self, method1: &Method, method2: &Method) -> f64 {
let call_sim = self.call_graph_similarity(method1, method2);
let data_sim = self.data_dependency_similarity(method1, method2);
let naming_sim = self.naming_similarity(method1, method2);
let behavior_sim = self.behavioral_similarity(method1, method2);
let layer_sim = self.layer_similarity(method1, method2);
self.weights.call_graph * call_sim
+ self.weights.data_deps * data_sim
+ self.weights.naming * naming_sim
+ self.weights.behavioral * behavior_sim
+ self.weights.layer * layer_sim
}
fn call_graph_similarity(&self, method1: &Method, method2: &Method) -> f64 {
let calls_1_to_2 = self.call_graph.call_count(&method1.name, &method2.name);
let calls_2_to_1 = self.call_graph.call_count(&method2.name, &method1.name);
let callees1 = self.call_graph.callees(&method1.name);
let callees2 = self.call_graph.callees(&method2.name);
let shared_callees = callees1.intersection(&callees2).count();
let callers1 = self.call_graph.callers(&method1.name);
let callers2 = self.call_graph.callers(&method2.name);
let shared_callers = callers1.intersection(&callers2).count();
let direct_calls = (calls_1_to_2 + calls_2_to_1) as f64;
let shared = (shared_callees + shared_callers) as f64;
if direct_calls > 0.0 {
1.0 } else if shared > 0.0 {
0.5 + (shared / 20.0).min(0.4) } else {
0.0
}
}
fn data_dependency_similarity(&self, method1: &Method, method2: &Method) -> f64 {
let fields1 = self.field_tracker.fields_accessed_by(&method1.name);
let fields2 = self.field_tracker.fields_accessed_by(&method2.name);
let shared_fields = fields1.intersection(&fields2).count();
let total_fields = fields1.union(&fields2).count();
if total_fields == 0 {
return 0.0;
}
let jaccard = shared_fields as f64 / total_fields as f64;
let shared_writes = fields1
.iter()
.filter(|f| {
self.field_tracker.writes_to_field(&method1.name, f)
&& self.field_tracker.writes_to_field(&method2.name, f)
})
.count();
if shared_writes > 0 {
(jaccard + 0.3).min(1.0)
} else {
jaccard
}
}
fn naming_similarity(&self, method1: &Method, method2: &Method) -> f64 {
let name1 = &method1.name;
let name2 = &method2.name;
let common_prefix_len = name1
.chars()
.zip(name2.chars())
.take_while(|(a, b)| a == b)
.count();
if common_prefix_len >= 4 {
return 0.8;
}
let tokens1 = tokenize_method_name(name1);
let tokens2 = tokenize_method_name(name2);
let shared_tokens = tokens1.intersection(&tokens2).count();
let total_tokens = tokens1.union(&tokens2).count();
if total_tokens == 0 {
return 0.0;
}
shared_tokens as f64 / total_tokens as f64
}
fn behavioral_similarity(&self, method1: &Method, method2: &Method) -> f64 {
let mut score = 0.0;
if method1.is_pure == method2.is_pure {
score += 0.3;
}
if method1.visibility == method2.visibility {
score += 0.2;
}
let complexity_ratio = if method2.complexity > 0 {
(method1.complexity as f64 / method2.complexity as f64).min(1.0)
} else {
1.0
};
score += 0.3 * complexity_ratio;
if (method1.has_io && method2.has_io) || (!method1.has_io && !method2.has_io) {
score += 0.2;
}
score.min(1.0)
}
fn layer_similarity(&self, method1: &Method, method2: &Method) -> f64 {
let layer1 = self.detect_layer(method1);
let layer2 = self.detect_layer(method2);
if layer1 == layer2 {
1.0
} else if layer1.is_adjacent_to(&layer2) {
0.5
} else {
0.0
}
}
fn detect_layer(&self, method: &Method) -> ArchitecturalLayer {
if method.visibility == Visibility::Public {
ArchitecturalLayer::Api
} else if method.complexity > 20 {
ArchitecturalLayer::Core
} else if method.complexity < 5 {
ArchitecturalLayer::Utility
} else {
ArchitecturalLayer::Internal
}
}
}
fn tokenize_method_name(name: &str) -> HashSet<String> {
name.split('_')
.filter(|token| !token.is_empty())
.map(|token| token.to_lowercase())
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
struct MockCallGraph {
calls: HashMap<(String, String), usize>,
callees: HashMap<String, HashSet<String>>,
callers: HashMap<String, HashSet<String>>,
}
impl CallGraphProvider for MockCallGraph {
fn call_count(&self, from: &str, to: &str) -> usize {
*self
.calls
.get(&(from.to_string(), to.to_string()))
.unwrap_or(&0)
}
fn callees(&self, method: &str) -> HashSet<String> {
self.callees.get(method).cloned().unwrap_or_default()
}
fn callers(&self, method: &str) -> HashSet<String> {
self.callers.get(method).cloned().unwrap_or_default()
}
}
struct MockFieldAccess {
fields: HashMap<String, HashSet<String>>,
writes: HashMap<(String, String), bool>,
}
impl FieldAccessProvider for MockFieldAccess {
fn fields_accessed_by(&self, method: &str) -> HashSet<String> {
self.fields.get(method).cloned().unwrap_or_default()
}
fn writes_to_field(&self, method: &str, field: &str) -> bool {
*self
.writes
.get(&(method.to_string(), field.to_string()))
.unwrap_or(&false)
}
}
fn create_method(name: &str) -> Method {
Method {
name: name.to_string(),
is_pure: false,
visibility: Visibility::Private,
complexity: 10,
has_io: false,
}
}
#[test]
fn test_naming_similarity_common_prefix() {
let call_graph = MockCallGraph {
calls: HashMap::new(),
callees: HashMap::new(),
callers: HashMap::new(),
};
let field_tracker = MockFieldAccess {
fields: HashMap::new(),
writes: HashMap::new(),
};
let calc = ClusteringSimilarityCalculator::new(call_graph, field_tracker);
let method1 = create_method("format_item");
let method2 = create_method("format_details");
let similarity = calc.naming_similarity(&method1, &method2);
assert!(
similarity > 0.7,
"Common prefix should give high similarity"
);
}
#[test]
fn test_tokenize_method_name() {
let tokens = tokenize_method_name("format_item_details");
assert_eq!(tokens.len(), 3);
assert!(tokens.contains("format"));
assert!(tokens.contains("item"));
assert!(tokens.contains("details"));
}
}