use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Minimality {
pub lines_added: usize,
pub lines_removed: usize,
pub total_lines: usize,
pub files_modified: usize,
pub files_added: usize,
pub files_deleted: usize,
pub mechanical_changes: usize,
pub semantic_changes: usize,
pub mechanical_ratio: f64,
}
impl Minimality {
pub fn score(&self) -> f64 {
let line_penalty = (self.total_lines as f64 / 100.0).min(1.0);
let file_penalty = (self.files_modified as f64 / 10.0).min(1.0);
let mechanical_penalty = self.mechanical_ratio;
let raw_score = 0.5 * line_penalty + 0.3 * file_penalty + 0.2 * mechanical_penalty;
1.0 - raw_score.min(1.0)
}
pub fn is_minimal(&self) -> bool {
self.total_lines < 50 && self.files_modified <= 3 && self.mechanical_ratio < 0.3
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EditLocality {
pub edited_files: Vec<String>,
pub modules_touched: HashSet<String>,
pub scatter_score: f64,
pub primary_module_fraction: f64,
pub directories_touched: usize,
pub crosses_boundaries: bool,
}
impl EditLocality {
pub fn score(&self) -> f64 {
let scatter_penalty = self.scatter_score.min(1.0);
let boundary_penalty = if self.crosses_boundaries { 0.2 } else { 0.0 };
let concentration_bonus = self.primary_module_fraction;
(concentration_bonus - scatter_penalty - boundary_penalty)
.max(0.0)
.min(1.0)
}
pub fn is_localized(&self) -> bool {
!self.crosses_boundaries && self.primary_module_fraction > 0.7 && self.scatter_score < 0.3
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReviewBurden {
pub lint_errors: usize,
pub lint_warnings: usize,
pub type_errors: usize,
pub format_issues: usize,
pub followup_patches: usize,
pub estimated_review_minutes: f64,
pub complexity_delta: i32,
pub new_dependencies: usize,
}
impl ReviewBurden {
pub fn score(&self) -> f64 {
let error_penalty = (self.lint_errors + self.type_errors) as f64 * 0.1;
let warning_penalty = self.lint_warnings as f64 * 0.02;
let followup_penalty = self.followup_patches as f64 * 0.15;
let complexity_penalty = (self.complexity_delta.max(0) as f64) * 0.05;
let raw_burden = error_penalty + warning_penalty + followup_penalty + complexity_penalty;
1.0 - raw_burden.min(1.0)
}
pub fn is_acceptable(&self) -> bool {
self.lint_errors == 0
&& self.type_errors == 0
&& self.followup_patches <= 1
&& self.format_issues == 0
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiffQualityMetrics {
pub minimality_score: f64,
pub locality_score: f64,
pub edit_similarity: Option<f64>,
pub review_burden_score: f64,
pub combined_score: f64,
pub minimality: Minimality,
pub locality: EditLocality,
pub review_burden: ReviewBurden,
}
impl DiffQualityMetrics {
pub fn calculate_combined(&mut self) {
let sim_score = self.edit_similarity.unwrap_or(0.5);
self.combined_score = 0.3 * self.minimality_score
+ 0.25 * self.locality_score
+ 0.25 * sim_score
+ 0.2 * self.review_burden_score;
}
pub fn meets_quality_bar(&self, threshold: f64) -> bool {
self.combined_score >= threshold
}
}
pub struct DiffAnalyzer {
mechanical_patterns: Vec<String>,
boundary_markers: Vec<String>,
}
impl Default for DiffAnalyzer {
fn default() -> Self {
Self {
mechanical_patterns: vec![
r"^[-+]\s*use\s+".to_string(), r"^[-+]\s*$".to_string(), r"^[-+]\s*//".to_string(), r"^[-+]\s*#\[".to_string(), ],
boundary_markers: vec![
"Cargo.toml".to_string(),
"package.json".to_string(),
"go.mod".to_string(),
],
}
}
}
impl DiffAnalyzer {
pub fn analyze(&self, diff: &str, reference_diff: Option<&str>) -> DiffQualityMetrics {
let minimality = self.analyze_minimality(diff);
let locality = self.analyze_locality(diff);
let review_burden = self.analyze_review_burden(diff);
let edit_similarity = reference_diff.map(|r| self.compute_edit_similarity(diff, r));
let mut metrics = DiffQualityMetrics {
minimality_score: minimality.score(),
locality_score: locality.score(),
edit_similarity,
review_burden_score: review_burden.score(),
combined_score: 0.0,
minimality,
locality,
review_burden,
};
metrics.calculate_combined();
metrics
}
fn analyze_minimality(&self, diff: &str) -> Minimality {
let mut lines_added = 0;
let mut lines_removed = 0;
let mut mechanical = 0;
let mut files: HashSet<String> = HashSet::new();
let mut current_file = String::new();
for line in diff.lines() {
if line.starts_with("+++ ") || line.starts_with("--- ") {
if line.len() > 4 {
current_file = line[4..].trim_start_matches("b/").to_string();
files.insert(current_file.clone());
}
continue;
}
if line.starts_with('+') && !line.starts_with("+++") {
lines_added += 1;
if self.is_mechanical_change(line) {
mechanical += 1;
}
} else if line.starts_with('-') && !line.starts_with("---") {
lines_removed += 1;
if self.is_mechanical_change(line) {
mechanical += 1;
}
}
}
let total = lines_added + lines_removed;
let mechanical_ratio = if total > 0 {
mechanical as f64 / total as f64
} else {
0.0
};
Minimality {
lines_added,
lines_removed,
total_lines: total,
files_modified: files.len(),
files_added: 0, files_deleted: 0,
mechanical_changes: mechanical,
semantic_changes: total.saturating_sub(mechanical),
mechanical_ratio,
}
}
fn is_mechanical_change(&self, line: &str) -> bool {
for pattern in &self.mechanical_patterns {
if let Ok(re) = regex::Regex::new(pattern) {
if re.is_match(line) {
return true;
}
}
}
false
}
fn analyze_locality(&self, diff: &str) -> EditLocality {
let mut files: Vec<String> = Vec::new();
let mut modules: HashSet<String> = HashSet::new();
let mut directories: HashSet<String> = HashSet::new();
let mut crosses_boundaries = false;
for line in diff.lines() {
if line.starts_with("+++ ") && line.len() > 4 {
let file = line[4..].trim_start_matches("b/").to_string();
files.push(file.clone());
if let Some(dir) = file.rsplit('/').nth(1) {
directories.insert(dir.to_string());
}
if let Some(module) = file.split('/').next() {
modules.insert(module.to_string());
}
for marker in &self.boundary_markers {
if file.ends_with(marker) {
crosses_boundaries = true;
}
}
}
}
let scatter_score = if modules.len() <= 1 {
0.0
} else {
(modules.len() as f64 - 1.0) / 5.0 };
let primary_fraction = if files.is_empty() {
1.0
} else {
let mut module_counts: HashMap<String, usize> = HashMap::new();
for file in &files {
if let Some(module) = file.split('/').next() {
*module_counts.entry(module.to_string()).or_insert(0) += 1;
}
}
let max_count = module_counts.values().max().copied().unwrap_or(0);
max_count as f64 / files.len() as f64
};
EditLocality {
edited_files: files,
modules_touched: modules,
scatter_score: scatter_score.min(1.0),
primary_module_fraction: primary_fraction,
directories_touched: directories.len(),
crosses_boundaries,
}
}
fn analyze_review_burden(&self, _diff: &str) -> ReviewBurden {
ReviewBurden {
lint_errors: 0,
lint_warnings: 0,
type_errors: 0,
format_issues: 0,
followup_patches: 0,
estimated_review_minutes: 0.0,
complexity_delta: 0,
new_dependencies: 0,
}
}
pub fn compute_edit_similarity(&self, diff1: &str, diff2: &str) -> f64 {
let lines1: HashSet<&str> = diff1
.lines()
.filter(|l| l.starts_with('+') || l.starts_with('-'))
.filter(|l| !l.starts_with("+++") && !l.starts_with("---"))
.collect();
let lines2: HashSet<&str> = diff2
.lines()
.filter(|l| l.starts_with('+') || l.starts_with('-'))
.filter(|l| !l.starts_with("+++") && !l.starts_with("---"))
.collect();
if lines1.is_empty() && lines2.is_empty() {
return 1.0;
}
let intersection = lines1.intersection(&lines2).count();
let union = lines1.union(&lines2).count();
if union == 0 {
0.0
} else {
intersection as f64 / union as f64
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_minimality_score() {
let minimal = Minimality {
lines_added: 5,
lines_removed: 3,
total_lines: 8,
files_modified: 1,
files_added: 0,
files_deleted: 0,
mechanical_changes: 1,
semantic_changes: 7,
mechanical_ratio: 0.125,
};
assert!(minimal.is_minimal());
assert!(minimal.score() > 0.8);
}
#[test]
fn test_locality_score() {
let localized = EditLocality {
edited_files: vec!["src/lib.rs".into(), "src/utils.rs".into()],
modules_touched: HashSet::from(["src".into()]),
scatter_score: 0.0,
primary_module_fraction: 1.0,
directories_touched: 1,
crosses_boundaries: false,
};
assert!(localized.is_localized());
assert!(localized.score() > 0.8);
}
#[test]
fn test_edit_similarity() {
let analyzer = DiffAnalyzer::default();
let diff1 = "+line1\n+line2\n-line3";
let diff2 = "+line1\n+line2\n-line3";
assert_eq!(analyzer.compute_edit_similarity(diff1, diff2), 1.0);
let diff3 = "+line1\n+different\n-other";
assert!(analyzer.compute_edit_similarity(diff1, diff3) < 1.0);
}
#[test]
fn test_review_burden_acceptable() {
let acceptable = ReviewBurden {
lint_errors: 0,
lint_warnings: 2,
type_errors: 0,
format_issues: 0,
followup_patches: 0,
estimated_review_minutes: 5.0,
complexity_delta: 2,
new_dependencies: 0,
};
assert!(acceptable.is_acceptable());
assert!(acceptable.score() > 0.8);
}
}