use std::collections::{HashMap, HashSet};
use crate::config::CommitConfig;
use crate::{
error::Result,
git::get_git_numstat,
types::{Mode, ScopeCandidate},
};
const PLACEHOLDER_DIRS: &[&str] = &[
"src", "lib", "bin", "crates", "benches", "examples", "internal", "pkg", "include", "tests", "test", "docs", "packages", "modules",
];
const SKIP_DIRS: &[&str] =
&["test", "tests", "benches", "examples", "target", "build", "node_modules", ".github"];
pub struct ScopeAnalyzer {
component_lines: HashMap<String, usize>,
total_lines: usize,
}
impl Default for ScopeAnalyzer {
fn default() -> Self {
Self::new()
}
}
impl ScopeAnalyzer {
pub fn new() -> Self {
Self { component_lines: HashMap::new(), total_lines: 0 }
}
fn from_numstat(numstat: &str, config: &CommitConfig) -> Self {
let mut analyzer = Self::new();
for line in numstat.lines() {
analyzer.process_numstat_line(line, config);
}
analyzer
}
pub fn process_numstat_line(&mut self, line: &str, config: &CommitConfig) {
let parts: Vec<&str> = line.split('\t').collect();
if parts.len() < 3 {
return;
}
let (added_str, deleted_str, path_part) = (parts[0], parts[1], parts[2]);
let added = added_str.parse::<usize>().unwrap_or(0);
let deleted = deleted_str.parse::<usize>().unwrap_or(0);
let lines_changed = added + deleted;
if lines_changed == 0 {
return;
}
let path = Self::extract_path_from_rename(path_part);
if config.excluded_files.iter().any(|ex| path.ends_with(ex)) {
return;
}
self.total_lines += lines_changed;
let component_candidates = Self::extract_components_from_path(&path);
for comp in component_candidates {
if comp.split('/').any(|s| s.contains('.')) {
continue;
}
*self.component_lines.entry(comp).or_insert(0) += lines_changed;
}
}
fn extract_path_from_rename(path_part: &str) -> String {
if let Some(brace_start) = path_part.find('{') {
if let Some(arrow_pos) = path_part[brace_start..].find(" => ") {
let arrow_abs = brace_start + arrow_pos;
if let Some(brace_end) = path_part[arrow_abs..].find('}') {
let brace_end_abs = arrow_abs + brace_end;
let prefix = &path_part[..brace_start];
let new_name = path_part[arrow_abs + 4..brace_end_abs].trim();
return format!("{prefix}{new_name}");
}
}
} else if path_part.contains(" => ") {
return path_part
.split(" => ")
.nth(1)
.unwrap_or(path_part)
.trim()
.to_string();
}
path_part.trim().to_string()
}
fn extract_components_from_path(path: &str) -> Vec<String> {
let segments: Vec<&str> = path.split('/').collect();
let mut component_candidates = Vec::new();
let mut meaningful_segments = Vec::new();
let strip_ext = |s: &str| -> String {
if let Some(pos) = s.rfind('.') {
s[..pos].to_string()
} else {
s.to_string()
}
};
let is_file = |s: &str| -> bool {
s.contains('.') && !s.starts_with('.') && s.rfind('.').is_some_and(|p| p > 0)
};
for (seg_idx, seg) in segments.iter().enumerate() {
if PLACEHOLDER_DIRS.contains(seg) {
if segments.len() > seg_idx + 1 {
continue;
}
}
if is_file(seg) {
continue;
}
if SKIP_DIRS.contains(seg) {
continue;
}
let stripped = strip_ext(seg);
if !stripped.is_empty() && !stripped.starts_with('.') {
meaningful_segments.push(stripped);
}
}
if !meaningful_segments.is_empty() {
component_candidates.push(meaningful_segments[0].clone());
if meaningful_segments.len() >= 2 {
component_candidates
.push(format!("{}/{}", meaningful_segments[0], meaningful_segments[1]));
}
}
component_candidates
}
pub fn build_scope_candidates(&self) -> Vec<ScopeCandidate> {
let mut candidates: Vec<ScopeCandidate> = self
.component_lines
.iter()
.filter(|(path, _)| {
if !path.contains('/') && PLACEHOLDER_DIRS.contains(&path.as_str()) {
return false;
}
if let Some(root) = path.split('/').next()
&& PLACEHOLDER_DIRS.contains(&root)
{
return false;
}
true
})
.map(|(path, &lines)| {
let percentage = (lines as f32 / self.total_lines as f32) * 100.0;
let is_two_segment = path.contains('/');
let confidence = if is_two_segment {
if percentage > 60.0 {
percentage * 1.2
} else {
percentage * 0.8
}
} else {
percentage
};
ScopeCandidate { percentage, path: path.clone(), confidence }
})
.collect();
candidates.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
candidates
}
pub fn is_wide_change(candidates: &[ScopeCandidate], config: &CommitConfig) -> bool {
let is_wide = if let Some(top) = candidates.first() {
top.percentage / 100.0 < config.wide_change_threshold
} else {
false
};
let distinct_roots: HashSet<&str> = candidates
.iter()
.map(|c| c.path.split('/').next().unwrap_or(&c.path))
.collect();
is_wide || distinct_roots.len() >= 3
}
pub fn extract_scope(numstat: &str, config: &CommitConfig) -> (Vec<ScopeCandidate>, usize) {
let analyzer = Self::from_numstat(numstat, config);
let candidates = analyzer.build_scope_candidates();
(candidates, analyzer.total_lines)
}
pub fn count_changed_lines(numstat: &str, config: &CommitConfig) -> usize {
Self::from_numstat(numstat, config).total_lines
}
pub fn analyze_wide_change(numstat: &str) -> Option<String> {
let lines: Vec<&str> = numstat.lines().collect();
if lines.is_empty() {
return None;
}
let paths: Vec<&str> = lines
.iter()
.filter_map(|line| {
let parts: Vec<&str> = line.split('\t').collect();
if parts.len() >= 3 {
Some(parts[2])
} else {
None
}
})
.collect();
if paths.is_empty() {
return None;
}
let total = paths.len();
let mut md_count = 0;
let mut test_count = 0;
let mut config_count = 0;
let mut has_cargo_toml = false;
let mut has_package_json = false;
let mut error_keywords = 0;
let mut type_keywords = 0;
for path in &paths {
if std::path::Path::new(path)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
{
md_count += 1;
}
if path.contains("/test") || path.contains("_test.") || path.ends_with("_test.rs") {
test_count += 1;
}
if std::path::Path::new(path).extension().is_some_and(|ext| {
ext.eq_ignore_ascii_case("toml")
|| ext.eq_ignore_ascii_case("yaml")
|| ext.eq_ignore_ascii_case("yml")
|| ext.eq_ignore_ascii_case("json")
}) {
config_count += 1;
}
if path.contains("Cargo.toml") {
has_cargo_toml = true;
}
if path.contains("package.json") {
has_package_json = true;
}
let lower_path = path.to_lowercase();
if lower_path.contains("error")
|| lower_path.contains("result")
|| lower_path.contains("err")
{
error_keywords += 1;
}
if lower_path.contains("type")
|| lower_path.contains("struct")
|| lower_path.contains("enum")
{
type_keywords += 1;
}
}
if has_cargo_toml || has_package_json {
return Some("deps".to_string());
}
if md_count * 100 / total > 70 {
return Some("docs".to_string());
}
if test_count * 100 / total > 60 {
return Some("tests".to_string());
}
if error_keywords * 100 / total > 40 {
return Some("error-handling".to_string());
}
if type_keywords * 100 / total > 40 {
return Some("type-refactor".to_string());
}
if config_count * 100 / total > 50 {
return Some("config".to_string());
}
None
}
}
#[tracing::instrument(target = "lgit", name = "analysis.extract_scope_candidates", skip_all, fields(mode = ?mode, target = ?target, dir))]
pub fn extract_scope_candidates(
mode: &Mode,
target: Option<&str>,
dir: &str,
config: &CommitConfig,
) -> Result<(String, bool)> {
let numstat = get_git_numstat(mode, target, dir, config)?;
let (candidates, total_lines) = ScopeAnalyzer::extract_scope(&numstat, config);
if total_lines == 0 {
return Ok(("(none - no measurable changes)".to_string(), false));
}
let is_wide = ScopeAnalyzer::is_wide_change(&candidates, config);
if is_wide {
let scope_str = if config.wide_change_abstract {
if let Some(pattern) = ScopeAnalyzer::analyze_wide_change(&numstat) {
format!("(cross-cutting: {pattern})")
} else {
"(none - multi-component change)".to_string()
}
} else {
"(none - multi-component change)".to_string()
};
return Ok((scope_str, true));
}
let mut suggestion_parts = Vec::new();
for cand in candidates.iter().take(5) {
if cand.percentage >= 10.0 {
let confidence_label = if cand.path.contains('/') {
if cand.percentage > 60.0 {
"high confidence"
} else {
"moderate confidence"
}
} else {
"high confidence"
};
suggestion_parts
.push(format!("{} ({:.0}%, {})", cand.path, cand.percentage, confidence_label));
}
}
let scope_str = if suggestion_parts.is_empty() {
"(none - unclear component)".to_string()
} else {
format!("{}\nPrefer 2-segment scopes marked 'high confidence'", suggestion_parts.join(", "))
};
Ok((scope_str, is_wide))
}
#[cfg(test)]
mod tests {
use super::*;
fn default_config() -> CommitConfig {
CommitConfig {
excluded_files: vec![
"Cargo.lock".to_string(),
"package-lock.json".to_string(),
"yarn.lock".to_string(),
],
wide_change_threshold: 0.5,
..Default::default()
}
}
#[test]
fn test_extract_path_from_rename_brace() {
assert_eq!(ScopeAnalyzer::extract_path_from_rename("lib/{old => new}/file.rs"), "lib/new");
}
#[test]
fn test_extract_path_from_rename_brace_complex() {
assert_eq!(
ScopeAnalyzer::extract_path_from_rename("src/api/{client.rs => http_client.rs}"),
"src/api/http_client.rs"
);
}
#[test]
fn test_extract_path_from_rename_arrow() {
assert_eq!(
ScopeAnalyzer::extract_path_from_rename("old/file.rs => new/file.rs"),
"new/file.rs"
);
}
#[test]
fn test_extract_path_from_rename_arrow_with_spaces() {
assert_eq!(
ScopeAnalyzer::extract_path_from_rename(" old/path.rs => new/path.rs "),
"new/path.rs"
);
}
#[test]
fn test_extract_path_from_rename_no_rename() {
assert_eq!(ScopeAnalyzer::extract_path_from_rename("lib/file.rs"), "lib/file.rs");
}
#[test]
fn test_extract_path_from_rename_malformed_brace() {
assert_eq!(
ScopeAnalyzer::extract_path_from_rename("lib/{old => new/file.rs"),
"lib/{old => new/file.rs"
);
}
#[test]
fn test_extract_components_simple() {
let comps = ScopeAnalyzer::extract_components_from_path("src/api/client.rs");
assert_eq!(comps, vec!["api"]);
}
#[test]
fn test_extract_components_with_placeholder() {
let comps = ScopeAnalyzer::extract_components_from_path("lib/foo/bar/baz.tsx");
assert_eq!(comps, vec!["foo", "foo/bar"]);
}
#[test]
fn test_extract_components_skip_tests() {
let comps = ScopeAnalyzer::extract_components_from_path("tests/api/client_test.rs");
assert_eq!(comps, vec!["api"]);
}
#[test]
fn test_extract_components_skip_node_modules() {
let comps = ScopeAnalyzer::extract_components_from_path("node_modules/foo/bar.js");
assert_eq!(comps, vec!["foo"]);
}
#[test]
fn test_extract_components_single_segment() {
let comps = ScopeAnalyzer::extract_components_from_path("src/main.rs");
assert_eq!(comps, Vec::<String>::new());
}
#[test]
fn test_extract_components_dotfile_skipped() {
let comps = ScopeAnalyzer::extract_components_from_path("lib/.git/config");
assert_eq!(comps, vec!["config"]);
}
#[test]
fn test_extract_components_strips_extension() {
let comps = ScopeAnalyzer::extract_components_from_path("src/api/client.rs");
assert!(comps.contains(&"api".to_string()));
}
#[test]
fn test_extract_components_go_internal() {
let comps = ScopeAnalyzer::extract_components_from_path("internal/agent/worker.go");
assert_eq!(comps, vec!["agent"]);
}
#[test]
fn test_extract_components_go_internal_nested() {
let comps = ScopeAnalyzer::extract_components_from_path("internal/config/parser/json.go");
assert_eq!(comps, vec!["config", "config/parser"]);
}
#[test]
fn test_extract_components_go_pkg() {
let comps = ScopeAnalyzer::extract_components_from_path("pkg/util/strings.go");
assert_eq!(comps, vec!["util"]);
}
#[test]
fn test_extract_components_monorepo_packages() {
let comps = ScopeAnalyzer::extract_components_from_path("packages/core/index.ts");
assert_eq!(comps, vec!["core"]);
}
#[test]
fn test_process_numstat_line_normal() {
let mut analyzer = ScopeAnalyzer::new();
let config = default_config();
analyzer.process_numstat_line("10\t5\tlib/foo/bar.rs", &config);
assert_eq!(analyzer.total_lines, 15);
assert_eq!(analyzer.component_lines.get("foo"), Some(&15));
}
#[test]
fn test_process_numstat_line_excluded_file() {
let mut analyzer = ScopeAnalyzer::new();
let config = default_config();
analyzer.process_numstat_line("10\t5\tCargo.lock", &config);
assert_eq!(analyzer.total_lines, 0);
assert!(analyzer.component_lines.is_empty());
}
#[test]
fn test_process_numstat_line_binary_file() {
let mut analyzer = ScopeAnalyzer::new();
let config = default_config();
analyzer.process_numstat_line("-\t-\timage.png", &config);
assert_eq!(analyzer.total_lines, 0);
}
#[test]
fn test_process_numstat_line_invalid() {
let mut analyzer = ScopeAnalyzer::new();
let config = default_config();
analyzer.process_numstat_line("invalid line", &config);
assert_eq!(analyzer.total_lines, 0);
}
#[test]
fn test_process_numstat_line_rename_brace() {
let mut analyzer = ScopeAnalyzer::new();
let config = default_config();
analyzer.process_numstat_line("20\t10\tlib/{old => new}/file.rs", &config);
assert_eq!(analyzer.total_lines, 30);
assert_eq!(analyzer.component_lines.get("new"), Some(&30));
}
#[test]
fn test_process_numstat_line_multiple_files() {
let mut analyzer = ScopeAnalyzer::new();
let config = default_config();
analyzer.process_numstat_line("10\t5\tsrc/api/client.rs", &config);
analyzer.process_numstat_line("20\t10\tsrc/api/server.rs", &config);
assert_eq!(analyzer.total_lines, 45);
assert_eq!(analyzer.component_lines.get("api"), Some(&45));
}
#[test]
fn test_is_wide_change_focused() {
let config = default_config();
let candidates = vec![
ScopeCandidate { path: "api".to_string(), percentage: 80.0, confidence: 80.0 },
ScopeCandidate { path: "db".to_string(), percentage: 20.0, confidence: 20.0 },
];
assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
}
#[test]
fn test_is_wide_change_dispersed() {
let config = default_config();
let candidates = vec![
ScopeCandidate { path: "api".to_string(), percentage: 30.0, confidence: 30.0 },
ScopeCandidate { path: "db".to_string(), percentage: 30.0, confidence: 30.0 },
ScopeCandidate { path: "ui".to_string(), percentage: 40.0, confidence: 40.0 },
];
assert!(ScopeAnalyzer::is_wide_change(&candidates, &config));
}
#[test]
fn test_is_wide_change_three_roots() {
let config = default_config();
let candidates = vec![
ScopeCandidate { path: "api".to_string(), percentage: 60.0, confidence: 60.0 },
ScopeCandidate { path: "db".to_string(), percentage: 20.0, confidence: 20.0 },
ScopeCandidate { path: "ui".to_string(), percentage: 20.0, confidence: 20.0 },
];
assert!(ScopeAnalyzer::is_wide_change(&candidates, &config));
}
#[test]
fn test_is_wide_change_nested_same_root() {
let config = default_config();
let candidates = vec![
ScopeCandidate {
path: "api/client".to_string(),
percentage: 60.0,
confidence: 72.0,
},
ScopeCandidate {
path: "api/server".to_string(),
percentage: 40.0,
confidence: 32.0,
},
];
assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
}
#[test]
fn test_is_wide_change_empty() {
let config = default_config();
let candidates = vec![];
assert!(!ScopeAnalyzer::is_wide_change(&candidates, &config));
}
#[test]
fn test_extract_scope_single_file() {
let config = default_config();
let numstat = "10\t5\tsrc/api/client.rs";
let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
assert_eq!(total_lines, 15);
assert_eq!(candidates.len(), 1);
assert_eq!(candidates[0].path, "api");
assert_eq!(candidates[0].percentage, 100.0);
}
#[test]
fn test_extract_scope_placeholder_only() {
let config = default_config();
let numstat = "10\t5\tsrc/main.rs";
let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
assert_eq!(total_lines, 15);
assert_eq!(candidates.len(), 0);
}
#[test]
fn test_extract_scope_multiple_files() {
let config = default_config();
let numstat = "10\t5\tsrc/api/client.rs\n20\t10\tsrc/db/models.rs";
let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
assert_eq!(total_lines, 45);
assert!(candidates.len() >= 2);
let api_cand = candidates.iter().find(|c| c.path == "api");
let db_cand = candidates.iter().find(|c| c.path == "db");
assert!(api_cand.is_some());
assert!(db_cand.is_some());
assert!(db_cand.unwrap().percentage > api_cand.unwrap().percentage);
}
#[test]
fn test_extract_scope_excluded_files() {
let config = default_config();
let numstat = "100\t50\tCargo.lock\n10\t5\tsrc/api/client.rs";
let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
assert_eq!(total_lines, 15);
assert_eq!(candidates[0].path, "api");
}
#[test]
fn test_extract_scope_no_changes() {
let config = default_config();
let numstat = "";
let (candidates, total_lines) = ScopeAnalyzer::extract_scope(numstat, &config);
assert_eq!(total_lines, 0);
assert!(candidates.is_empty());
}
#[test]
fn test_count_changed_lines_ignores_excluded_and_binary_files() {
let config = default_config();
let numstat = "100\t50\tCargo.lock\n-\t-\timage.png\n10\t5\tsrc/api/client.rs";
assert_eq!(ScopeAnalyzer::count_changed_lines(numstat, &config), 15);
}
#[test]
fn test_extract_scope_sorted_by_percentage() {
let config = default_config();
let numstat = "5\t0\tsrc/api/client.rs\n50\t0\tsrc/db/models.rs\n10\t0\tsrc/ui/component.tsx";
let (candidates, _) = ScopeAnalyzer::extract_scope(numstat, &config);
assert!(candidates[0].percentage >= candidates[1].percentage);
assert!(candidates[1].percentage >= candidates[2].percentage);
}
#[test]
fn test_build_scope_candidates_percentages() {
let mut analyzer = ScopeAnalyzer::new();
analyzer.component_lines.insert("api".to_string(), 30);
analyzer.component_lines.insert("db".to_string(), 70);
analyzer.total_lines = 100;
let candidates = analyzer.build_scope_candidates();
assert_eq!(candidates.len(), 2);
assert_eq!(candidates[0].path, "db");
assert!((candidates[0].percentage - 70.0).abs() < 0.001);
assert_eq!(candidates[1].path, "api");
assert!((candidates[1].percentage - 30.0).abs() < 0.001);
}
#[test]
fn test_confidence_70_percent_in_two_segment_prefers_specific() {
let mut analyzer = ScopeAnalyzer::new();
analyzer.component_lines.insert("api".to_string(), 70);
analyzer
.component_lines
.insert("api/client".to_string(), 70);
analyzer.component_lines.insert("other".to_string(), 30);
analyzer.total_lines = 100;
let candidates = analyzer.build_scope_candidates();
assert_eq!(candidates[0].path, "api/client");
assert!((candidates[0].percentage - 70.0).abs() < 0.001);
assert!((candidates[0].confidence - 84.0).abs() < 0.001);
}
#[test]
fn test_confidence_45_percent_in_two_segment_prefers_single() {
let mut analyzer = ScopeAnalyzer::new();
analyzer.component_lines.insert("api".to_string(), 45);
analyzer
.component_lines
.insert("api/client".to_string(), 45);
analyzer.component_lines.insert("other".to_string(), 55);
analyzer.total_lines = 100;
let candidates = analyzer.build_scope_candidates();
assert_eq!(candidates[0].path, "other");
assert_eq!(candidates[1].path, "api");
assert_eq!(candidates[2].path, "api/client");
assert!((candidates[2].confidence - 36.0).abs() < 0.001);
}
#[test]
fn test_analyze_wide_change_dependency_updates() {
let numstat = "10\t5\tCargo.toml\n20\t10\tsrc/lib.rs\n5\t3\tsrc/api.rs";
let result = ScopeAnalyzer::analyze_wide_change(numstat);
assert_eq!(result, Some("deps".to_string()));
}
#[test]
fn test_analyze_wide_change_documentation() {
let numstat =
"50\t20\tREADME.md\n30\t10\tdocs/guide.md\n20\t5\tdocs/api.md\n5\t2\tsrc/lib.rs";
let result = ScopeAnalyzer::analyze_wide_change(numstat);
assert_eq!(result, Some("docs".to_string()));
}
#[test]
fn test_analyze_wide_change_tests() {
let numstat = "10\t5\tsrc/api_test.rs\n15\t8\tsrc/client_test.rs\n20\t10\ttests/\
integration_test.rs\n5\t2\tsrc/lib.rs";
let result = ScopeAnalyzer::analyze_wide_change(numstat);
assert_eq!(result, Some("tests".to_string()));
}
#[test]
fn test_analyze_wide_change_error_handling() {
let numstat =
"10\t5\tsrc/error.rs\n15\t8\tsrc/result.rs\n20\t10\tsrc/error_types.rs\n5\t2\tsrc/lib.rs";
let result = ScopeAnalyzer::analyze_wide_change(numstat);
assert_eq!(result, Some("error-handling".to_string()));
}
#[test]
fn test_analyze_wide_change_type_refactor() {
let numstat =
"10\t5\tsrc/types.rs\n15\t8\tsrc/structs.rs\n20\t10\tsrc/enums.rs\n5\t2\tsrc/lib.rs";
let result = ScopeAnalyzer::analyze_wide_change(numstat);
assert_eq!(result, Some("type-refactor".to_string()));
}
#[test]
fn test_analyze_wide_change_config() {
let numstat =
"10\t5\tconfig.toml\n15\t8\tsettings.yaml\n20\t10\tconfig.json\n5\t2\tsrc/lib.rs";
let result = ScopeAnalyzer::analyze_wide_change(numstat);
assert_eq!(result, Some("config".to_string()));
}
#[test]
fn test_analyze_wide_change_no_pattern() {
let numstat = "10\t5\tsrc/foo.rs\n15\t8\tsrc/bar.rs\n20\t10\tsrc/baz.rs";
let result = ScopeAnalyzer::analyze_wide_change(numstat);
assert_eq!(result, None);
}
#[test]
fn test_analyze_wide_change_empty() {
let numstat = "";
let result = ScopeAnalyzer::analyze_wide_change(numstat);
assert_eq!(result, None);
}
#[test]
fn test_analyze_wide_change_package_json() {
let numstat = "10\t5\tpackage.json\n20\t10\tsrc/index.js\n5\t3\tsrc/utils.js";
let result = ScopeAnalyzer::analyze_wide_change(numstat);
assert_eq!(result, Some("deps".to_string()));
}
}