use std::collections::HashSet;
pub struct SpecificityScorer {
generic_terms: HashSet<String>,
specific_verbs: Vec<String>,
domain_terms: Vec<String>,
}
impl SpecificityScorer {
pub fn new() -> Self {
let generic_terms: HashSet<String> = [
"unknown",
"self",
"misc",
"utils",
"common",
"helpers",
"data",
"types",
"structs",
"impl",
"methods",
"functions",
"module",
"base",
"core",
"main",
"other",
"transformations",
"computation",
"item",
"formatter",
"shared",
"operations",
]
.iter()
.map(|s| s.to_string())
.collect();
let specific_verbs = vec![
"format",
"parse",
"validate",
"calculate",
"analyze",
"serialize",
"deserialize",
"transform",
"convert",
"compute",
"evaluate",
]
.into_iter()
.map(String::from)
.collect();
let domain_terms = vec![
"coverage",
"complexity",
"validation",
"formatting",
"parsing",
"computation",
]
.into_iter()
.map(String::from)
.collect();
Self {
generic_terms,
specific_verbs,
domain_terms,
}
}
pub fn calculate_specificity(&self, name: &str) -> f64 {
let name_lower = name.to_lowercase();
if self.generic_terms.contains(&name_lower) {
return 0.0;
}
for generic in &self.generic_terms {
if name_lower == *generic {
return 0.0;
}
}
let mut score: f64 = 0.5;
if self.domain_terms.contains(&name_lower) {
score += 0.12; } else if self
.domain_terms
.iter()
.any(|term| name_lower.contains(term))
{
score += 0.05; }
let name_len = name.len();
if name_len > 8 {
score += 0.04;
}
if name_len > 12 {
score += 0.02;
}
if name.contains('_') {
score += 0.10;
}
let has_verb = self
.specific_verbs
.iter()
.any(|verb| name_lower.contains(verb) || name_lower.contains(&format!("{}ing", verb)));
if has_verb {
score += 0.10;
}
for generic in &self.generic_terms {
if name_lower.contains(generic) && name_lower != *generic {
if name_lower.ends_with(generic) && name_lower.len() > generic.len() + 1 {
continue;
}
score -= 0.1;
break;
}
}
if name_len < 4 {
score -= 0.15;
}
if name_lower.starts_with("needs_review") {
score = 0.4; }
score.clamp(0.0_f64, 1.0)
}
pub fn is_acceptable(&self, name: &str, min_threshold: f64) -> bool {
self.calculate_specificity(name) >= min_threshold
}
pub fn assess_quality(&self, name: &str) -> &'static str {
let score = self.calculate_specificity(name);
if score >= 0.85 {
"Excellent"
} else if score >= 0.6 {
"Good"
} else if score >= 0.4 {
"Acceptable"
} else {
"Poor"
}
}
pub fn calculate_specificity_type_aware(&self, name: &str, is_type_based: bool) -> f64 {
let mut score = self.calculate_specificity(name);
if is_type_based {
if is_generic_type_name(name) {
score *= 0.2; } else if score < 0.6 {
score *= 0.85;
}
}
score.clamp(0.0, 1.0)
}
pub fn is_acceptable_for_type_based(&self, name: &str) -> bool {
self.calculate_specificity_type_aware(name, true) >= 0.65
}
}
pub fn is_generic_type_name(name: &str) -> bool {
let normalized = name.to_lowercase();
const GENERIC_TYPE_PATTERNS: &[&str] = &[
"unknown",
"self",
"transformations",
"computation",
"item",
"data",
"utils",
"helpers",
"misc",
"other",
"common",
"shared",
"base",
];
if GENERIC_TYPE_PATTERNS.iter().any(|p| normalized == *p) {
return true;
}
const GENERIC_SUFFIXES: &[&str] = &["operations", "formatting"];
for suffix in GENERIC_SUFFIXES {
if normalized == *suffix {
return true;
}
}
if name.len() < 5 {
return true;
}
if name.chars().all(|c| c.is_uppercase() || c.is_numeric()) {
return true;
}
false
}
impl Default for SpecificityScorer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rejects_generic_names() {
let scorer = SpecificityScorer::new();
assert_eq!(scorer.calculate_specificity("unknown"), 0.0);
assert_eq!(scorer.calculate_specificity("self"), 0.0);
assert_eq!(scorer.calculate_specificity("misc"), 0.0);
assert_eq!(scorer.calculate_specificity("utils"), 0.0);
assert_eq!(scorer.calculate_specificity("common"), 0.0);
assert_eq!(scorer.calculate_specificity("helpers"), 0.0);
}
#[test]
fn test_scores_specific_names_high() {
let scorer = SpecificityScorer::new();
assert!(scorer.calculate_specificity("coverage") > 0.5);
assert!(scorer.calculate_specificity("complexity") > 0.5);
assert!(scorer.calculate_specificity("validation") > 0.5);
}
#[test]
fn test_compound_names_score_higher() {
let scorer = SpecificityScorer::new();
let single = scorer.calculate_specificity("format");
let compound = scorer.calculate_specificity("format_coverage");
assert!(compound > single);
assert!(compound > 0.6);
}
#[test]
fn test_specific_verbs_boost_score() {
let scorer = SpecificityScorer::new();
assert!(scorer.calculate_specificity("formatting") > 0.6);
assert!(scorer.calculate_specificity("parsing") > 0.6);
assert!(scorer.calculate_specificity("validation") > 0.6);
assert!(scorer.calculate_specificity("metric_calculation") > 0.6);
}
#[test]
fn test_short_names_penalized() {
let scorer = SpecificityScorer::new();
let short = scorer.calculate_specificity("io");
let long = scorer.calculate_specificity("input_output");
assert!(long > short);
}
#[test]
fn test_needs_review_fallback() {
let scorer = SpecificityScorer::new();
let score = scorer.calculate_specificity("needs_review_group_1");
assert!(score >= 0.4);
assert!(score < 0.5);
}
#[test]
fn test_is_acceptable_threshold() {
let scorer = SpecificityScorer::new();
assert!(scorer.is_acceptable("format_coverage", 0.4));
assert!(!scorer.is_acceptable("unknown", 0.4));
assert!(!scorer.is_acceptable("misc", 0.4));
}
#[test]
fn test_quality_assessment() {
let scorer = SpecificityScorer::new();
assert_eq!(scorer.assess_quality("format_coverage"), "Good");
assert_eq!(scorer.assess_quality("unknown"), "Poor");
assert_eq!(scorer.assess_quality("validation"), "Good");
}
#[test]
fn test_case_insensitive() {
let scorer = SpecificityScorer::new();
let lower = scorer.calculate_specificity("unknown");
let upper = scorer.calculate_specificity("Unknown");
let mixed = scorer.calculate_specificity("UnKnOwN");
assert_eq!(lower, upper);
assert_eq!(lower, mixed);
}
#[test]
fn test_contains_generic_penalty() {
let scorer = SpecificityScorer::new();
let without_generic = scorer.calculate_specificity("coverage_analysis");
let with_generic_suffix = scorer.calculate_specificity("coverage_utils");
let with_generic_prefix = scorer.calculate_specificity("utils_coverage");
assert!(without_generic > 0.6);
assert!(with_generic_suffix > 0.6);
assert!(without_generic > with_generic_prefix);
}
#[test]
fn test_is_generic_type_name_detects_unknown() {
assert!(is_generic_type_name("unknown"));
assert!(is_generic_type_name("Unknown"));
assert!(is_generic_type_name("UNKNOWN"));
}
#[test]
fn test_is_generic_type_name_detects_transformations() {
assert!(is_generic_type_name("transformations"));
assert!(is_generic_type_name("Transformations"));
}
#[test]
fn test_is_generic_type_name_detects_computation() {
assert!(is_generic_type_name("computation"));
assert!(is_generic_type_name("Computation"));
}
#[test]
fn test_is_generic_type_name_detects_self() {
assert!(is_generic_type_name("self"));
assert!(is_generic_type_name("Self"));
}
#[test]
fn test_is_generic_type_name_accepts_specific() {
assert!(!is_generic_type_name("validation_rules"));
assert!(!is_generic_type_name("responsibility_classifier"));
assert!(!is_generic_type_name("scoring_calculations"));
}
#[test]
fn test_is_generic_type_name_rejects_short() {
assert!(is_generic_type_name("io"));
assert!(is_generic_type_name("abc"));
assert!(is_generic_type_name("T"));
}
#[test]
fn test_type_aware_scoring_penalizes_generic() {
let scorer = SpecificityScorer::new();
assert_eq!(
scorer.calculate_specificity_type_aware("unknown", false),
0.0
);
assert_eq!(
scorer.calculate_specificity_type_aware("unknown", true),
0.0
);
let behavioral_score = scorer.calculate_specificity_type_aware("operations", false);
let type_based_score = scorer.calculate_specificity_type_aware("operations", true);
assert!(type_based_score < 0.2); assert!(behavioral_score > type_based_score || behavioral_score < 0.2); }
#[test]
fn test_type_aware_scoring_stricter_threshold() {
let scorer = SpecificityScorer::new();
let behavioral_score = scorer.calculate_specificity_type_aware("formatting", false);
let type_based_score = scorer.calculate_specificity_type_aware("formatting", true);
assert!(behavioral_score > type_based_score);
}
#[test]
fn test_is_acceptable_for_type_based_rejects_generic() {
let scorer = SpecificityScorer::new();
assert!(!scorer.is_acceptable_for_type_based("unknown"));
assert!(!scorer.is_acceptable_for_type_based("transformations"));
assert!(!scorer.is_acceptable_for_type_based("computation"));
assert!(!scorer.is_acceptable_for_type_based("self"));
}
#[test]
fn test_is_acceptable_for_type_based_accepts_specific() {
let scorer = SpecificityScorer::new();
assert!(scorer.is_acceptable_for_type_based("validation_operations"));
assert!(scorer.is_acceptable_for_type_based("responsibility_classification"));
}
}