use std::collections::HashMap;
use super::types::{capitalize_first, BehaviorCategory};
pub struct BehavioralCategorizer;
impl BehavioralCategorizer {
pub fn categorize_method(method_name: &str) -> BehaviorCategory {
let lower_name = method_name.to_lowercase();
if Self::is_utilities(&lower_name) {
return BehaviorCategory::Utilities;
}
if Self::is_construction(&lower_name) {
return BehaviorCategory::Construction;
}
if Self::is_lifecycle(&lower_name) {
return BehaviorCategory::Lifecycle;
}
if Self::is_validation(&lower_name) {
return BehaviorCategory::Validation;
}
if Self::is_parsing(&lower_name) {
return BehaviorCategory::Parsing;
}
if Self::is_rendering(&lower_name) {
return BehaviorCategory::Rendering;
}
if Self::is_event_handling(&lower_name) {
return BehaviorCategory::EventHandling;
}
if Self::is_persistence(&lower_name) {
return BehaviorCategory::Persistence;
}
if Self::is_computation(&lower_name) {
return BehaviorCategory::Computation;
}
if Self::is_filtering(&lower_name) {
return BehaviorCategory::Filtering;
}
if Self::is_transformation(&lower_name) {
return BehaviorCategory::Transformation;
}
if Self::is_data_access(&lower_name) {
return BehaviorCategory::DataAccess;
}
if Self::is_state_management(&lower_name) {
return BehaviorCategory::StateManagement;
}
if Self::is_processing(&lower_name) {
return BehaviorCategory::Processing;
}
if Self::is_communication(&lower_name) {
return BehaviorCategory::Communication;
}
let domain = method_name
.split('_')
.next()
.filter(|s| !s.is_empty())
.map(capitalize_first)
.unwrap_or_else(|| "Operations".to_string());
BehaviorCategory::Domain(domain)
}
pub(crate) fn is_lifecycle(name: &str) -> bool {
const LIFECYCLE_KEYWORDS: &[&str] = &[
"new",
"create",
"init",
"initialize",
"setup",
"destroy",
"cleanup",
"dispose",
"shutdown",
"close",
];
LIFECYCLE_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_rendering(name: &str) -> bool {
const RENDERING_KEYWORDS: &[&str] = &[
"render",
"draw",
"paint",
"display",
"show",
"present",
"format",
"to_string",
"print", ];
RENDERING_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_event_handling(name: &str) -> bool {
name.starts_with("handle_")
|| name.starts_with("on_")
|| name.contains("_event")
|| name.contains("dispatch")
|| name.contains("trigger")
}
pub(crate) fn is_persistence(name: &str) -> bool {
const PERSISTENCE_KEYWORDS: &[&str] = &[
"save",
"load",
"persist",
"restore",
"serialize",
"deserialize",
"write",
"read",
"parse",
"store", ];
PERSISTENCE_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_validation(name: &str) -> bool {
const VALIDATION_KEYWORDS: &[&str] = &["validate", "check", "verify", "ensure", "is_"];
VALIDATION_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_state_management(name: &str) -> bool {
name.starts_with("get_")
|| name.starts_with("set_")
|| name.starts_with("update_")
|| name.starts_with("mutate_")
|| name.contains("_state")
}
pub(crate) fn is_computation(name: &str) -> bool {
const COMPUTATION_KEYWORDS: &[&str] = &["calculate", "compute", "evaluate", "measure"];
COMPUTATION_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_parsing(name: &str) -> bool {
const PARSING_KEYWORDS: &[&str] = &[
"parse",
"read",
"extract",
"decode",
"deserialize",
"unmarshal",
"scan",
];
PARSING_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_filtering(name: &str) -> bool {
const FILTERING_KEYWORDS: &[&str] = &[
"filter", "select", "find", "search", "query", "lookup", "match", "where", "first",
"last", "single", "take", "skip",
];
FILTERING_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_transformation(name: &str) -> bool {
const TRANSFORMATION_KEYWORDS: &[&str] = &[
"transform",
"convert",
"map",
"apply",
"adapt",
"reduce",
"fold",
"flatten",
"merge",
"join",
"split",
"group",
"partition",
];
TRANSFORMATION_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_data_access(name: &str) -> bool {
const DATA_ACCESS_KEYWORDS: &[&str] = &["get", "set", "fetch", "retrieve", "access"];
DATA_ACCESS_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_construction(name: &str) -> bool {
const CONSTRUCTION_KEYWORDS: &[&str] = &["create", "build", "new", "make", "construct"];
CONSTRUCTION_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_processing(name: &str) -> bool {
const PROCESSING_KEYWORDS: &[&str] = &["process", "handle", "execute", "run"];
PROCESSING_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_communication(name: &str) -> bool {
const COMMUNICATION_KEYWORDS: &[&str] =
&["send", "receive", "transmit", "broadcast", "notify"];
COMMUNICATION_KEYWORDS
.iter()
.any(|&kw| name.starts_with(kw) || name.contains(&format!("_{}", kw)))
}
pub(crate) fn is_utilities(name: &str) -> bool {
if name.starts_with("with_")
|| name.starts_with("from_")
|| name.starts_with("into_")
|| name.starts_with("as_")
|| name.starts_with("for_")
{
return true;
}
const UTILITY_EXACT: &[&str] = &[
"default", "any", "clone", "eq", "cmp", "hash", "len", "is_empty", "clear", "reset",
];
const UTILITY_PREFIXES: &[&str] = &["helper_", "util_"];
UTILITY_EXACT.contains(&name) || UTILITY_PREFIXES.iter().any(|&kw| name.starts_with(kw))
}
}
pub fn cluster_methods_by_behavior(methods: &[String]) -> HashMap<BehaviorCategory, Vec<String>> {
let mut clusters: HashMap<BehaviorCategory, Vec<String>> = HashMap::new();
for method in methods {
let category = BehavioralCategorizer::categorize_method(method);
clusters.entry(category).or_default().push(method.clone());
}
clusters.retain(|category, methods| {
matches!(
category,
BehaviorCategory::Lifecycle
| BehaviorCategory::StateManagement
| BehaviorCategory::Rendering
| BehaviorCategory::EventHandling
| BehaviorCategory::Persistence
| BehaviorCategory::Validation
| BehaviorCategory::Computation
| BehaviorCategory::Parsing
| BehaviorCategory::Filtering
| BehaviorCategory::Transformation
| BehaviorCategory::DataAccess
| BehaviorCategory::Construction
| BehaviorCategory::Processing
| BehaviorCategory::Communication
| BehaviorCategory::Utilities
) || methods.len() >= 3 });
clusters
}
pub fn is_test_method(method_name: &str) -> bool {
method_name.starts_with("test_")
|| method_name.contains("_test_")
|| method_name.ends_with("_test")
|| method_name.starts_with("bench_")
|| method_name.contains("_bench_")
|| method_name.starts_with("mock_")
|| method_name.starts_with("stub_")
|| method_name.starts_with("fixture_")
|| method_name == "setup"
|| method_name == "teardown"
}
pub(crate) fn infer_cluster_category(methods: &[String]) -> BehaviorCategory {
let mut category_counts: HashMap<BehaviorCategory, usize> = HashMap::new();
for method in methods {
let category = BehavioralCategorizer::categorize_method(method);
*category_counts.entry(category).or_insert(0) += 1;
}
category_counts
.into_iter()
.filter(|(cat, _)| !matches!(cat, BehaviorCategory::Domain(_)))
.max_by_key(|(_, count)| *count)
.map(|(cat, _)| cat)
.unwrap_or_else(|| {
BehavioralCategorizer::categorize_method(&methods[0])
})
}