use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::Duration;
use glob::Pattern;
use serde::{Deserialize, Serialize};
use thiserror::Error;
const GIT_TIMEOUT_SECS: u64 = 300;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct FileChurn {
pub file: String,
pub commit_count: u32,
pub lines_added: u32,
pub lines_deleted: u32,
pub lines_changed: u32,
pub first_commit: Option<String>,
pub last_commit: Option<String>,
pub authors: Vec<String>,
pub author_count: u32,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct AuthorStats {
pub name: String,
pub email: String,
pub commits: u32,
pub lines_added: u32,
pub lines_deleted: u32,
pub files_touched: u32,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Hotspot {
pub file: String,
pub churn_rank: u32,
pub complexity_rank: u32,
pub combined_score: f64,
pub commit_count: u32,
pub cyclomatic_complexity: u32,
pub recommendation: String,
}
const BOT_PATTERNS: &[&str] = &[
"dependabot",
"renovate",
"github-actions",
"[bot]",
"snyk-bot",
"greenkeeper",
"depfu",
"codecov",
"semantic-release-bot",
];
pub fn is_bot_author(author_name: &str, author_email: &str) -> bool {
let name_lower = author_name.to_lowercase();
let email_lower = author_email.to_lowercase();
BOT_PATTERNS
.iter()
.any(|p| name_lower.contains(p) || email_lower.contains(p))
}
#[derive(Debug, Clone)]
pub(crate) struct CommitChurnEntry {
pub date: String,
pub lines_added: u32,
pub lines_deleted: u32,
pub author_email: String,
}
#[derive(Debug, Clone)]
pub(crate) struct FileChurnDetailed {
pub base: FileChurn,
pub commits: Vec<CommitChurnEntry>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ChurnSummary {
pub total_files: u32,
pub total_commits: u32,
pub time_window_days: u32,
pub total_lines_changed: u64,
pub avg_commits_per_file: f64,
pub most_churned_file: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ChurnReport {
pub files: Vec<FileChurn>,
pub hotspots: Vec<Hotspot>,
pub authors: Vec<AuthorStats>,
pub summary: ChurnSummary,
pub is_shallow: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub shallow_depth: Option<u32>,
#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub warnings: Vec<String>,
}
#[derive(Debug, Error)]
pub enum ChurnError {
#[error("Path not found: {0}")]
PathNotFound(PathBuf),
#[error("Not a git repository: {path}")]
NotGitRepository {
path: PathBuf,
},
#[error("Git command failed: {command}\n{stderr}")]
GitError {
command: String,
stderr: String,
exit_code: Option<i32>,
},
#[error("Failed to parse git output: {context}\nLine: {line}")]
ParseError {
context: String,
line: String,
},
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("Complexity analysis failed for {file}: {reason}")]
ComplexityError {
file: PathBuf,
reason: String,
},
}
fn run_git(args: &[&str], cwd: &Path) -> Result<String, ChurnError> {
let canonical_cwd = cwd.canonicalize().map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ChurnError::PathNotFound(cwd.to_path_buf())
} else {
ChurnError::Io(e)
}
})?;
let mut cmd = Command::new("git");
cmd.arg("-c").arg("core.quotepath=false");
for arg in args {
cmd.arg(arg);
}
cmd.current_dir(&canonical_cwd);
let output = {
let child = cmd
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| ChurnError::GitError {
command: format!("git {}", args.join(" ")),
stderr: format!("Failed to spawn git: {}", e),
exit_code: None,
})?;
let timeout = Duration::from_secs(GIT_TIMEOUT_SECS);
let (tx, rx) = std::sync::mpsc::channel();
let handle = std::thread::spawn(move || {
let result = child.wait_with_output();
let _ = tx.send(result);
});
match rx.recv_timeout(timeout) {
Ok(result) => {
let _ = handle.join();
result.map_err(|e| ChurnError::GitError {
command: format!("git {}", args.join(" ")),
stderr: format!("Failed to wait for git: {}", e),
exit_code: None,
})?
}
Err(_) => {
return Err(ChurnError::GitError {
command: format!("git {}", args.join(" ")),
stderr: format!("Git command timed out after {} seconds", GIT_TIMEOUT_SECS),
exit_code: None,
});
}
}
};
if output.status.success() {
let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
Ok(stdout)
} else {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
Err(ChurnError::GitError {
command: format!("git {}", args.join(" ")),
stderr,
exit_code: output.status.code(),
})
}
}
pub fn is_git_repository(path: &Path) -> Result<bool, ChurnError> {
if !path.exists() {
return Err(ChurnError::PathNotFound(path.to_path_buf()));
}
match run_git(&["rev-parse", "--git-dir"], path) {
Ok(_) => Ok(true),
Err(ChurnError::GitError {
exit_code: Some(_), ..
}) => {
Ok(false)
}
Err(ChurnError::GitError {
exit_code: None,
stderr,
..
}) => {
if stderr.contains("not a git repository") {
Ok(false)
} else {
Err(ChurnError::GitError {
command: "git rev-parse --git-dir".to_string(),
stderr,
exit_code: None,
})
}
}
Err(e) => Err(e),
}
}
pub fn check_shallow_clone(path: &Path) -> Result<(bool, Option<u32>), ChurnError> {
if !path.exists() {
return Err(ChurnError::PathNotFound(path.to_path_buf()));
}
let canonical_path = path.canonicalize()?;
let is_shallow = match run_git(&["rev-parse", "--is-shallow-repository"], &canonical_path) {
Ok(output) => output.trim() == "true",
Err(_) => {
let shallow_file = canonical_path.join(".git").join("shallow");
shallow_file.exists()
}
};
if !is_shallow {
return Ok((false, None));
}
let depth = match run_git(&["rev-list", "--count", "HEAD"], &canonical_path) {
Ok(output) => output.trim().parse::<u32>().ok(),
Err(_) => None,
};
Ok((true, depth))
}
pub fn get_file_churn(
path: &Path,
days: u32,
exclude_patterns: &[String],
) -> Result<HashMap<String, FileChurn>, ChurnError> {
if !path.exists() {
return Err(ChurnError::PathNotFound(path.to_path_buf()));
}
if !is_git_repository(path)? {
return Err(ChurnError::NotGitRepository {
path: path.to_path_buf(),
});
}
struct FileData {
commit_count: u32,
lines_added: u32,
lines_deleted: u32,
first_commit: Option<String>, last_commit: Option<String>, authors: std::collections::HashSet<String>, }
let mut file_data: HashMap<String, FileData> = HashMap::new();
let since_arg = format!("{} days ago", days);
let format_arg = "COMMIT:%H\x1e%aI\x1e%ae\x1e%an";
let commit_output = match run_git(
&[
"log",
&format!("--since={}", since_arg),
&format!("--pretty=format:{}", format_arg),
"--name-only",
],
path,
) {
Ok(output) => output,
Err(ChurnError::GitError { stderr, .. }) => {
if stderr.contains("does not have any commits") || stderr.contains("bad revision") {
return Ok(HashMap::new());
}
return Err(ChurnError::GitError {
command: format!("git log --since=\"{}\" ...", since_arg),
stderr,
exit_code: None,
});
}
Err(e) => return Err(e),
};
let mut current_date: Option<String> = None;
let mut current_email: Option<String> = None;
for line in commit_output.lines() {
let line = line.trim();
if line.is_empty() {
current_date = None;
current_email = None;
continue;
}
if let Some(rest) = line.strip_prefix("COMMIT:") {
let parts: Vec<&str> = rest.split('\x1e').collect();
if parts.len() >= 3 {
let date_str = parts[1];
current_date = if date_str.len() >= 10 {
Some(date_str[..10].to_string())
} else {
None
};
current_email = Some(parts[2].to_string());
}
} else {
let file_path = line;
if matches_exclude_pattern(file_path, exclude_patterns) {
continue;
}
let data = file_data
.entry(file_path.to_string())
.or_insert_with(|| FileData {
commit_count: 0,
lines_added: 0,
lines_deleted: 0,
first_commit: None,
last_commit: None,
authors: std::collections::HashSet::new(),
});
data.commit_count += 1;
if let Some(ref date) = current_date {
if data.last_commit.is_none() {
data.last_commit = Some(date.clone());
}
data.first_commit = Some(date.clone());
}
if let Some(ref email) = current_email {
data.authors.insert(email.clone());
}
}
}
let numstat_output = run_git(
&[
"log",
&format!("--since={}", since_arg),
"--numstat",
"--format=",
],
path,
)?;
for line in numstat_output.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
let parts: Vec<&str> = line.split('\t').collect();
if parts.len() < 3 {
continue;
}
let added_str = parts[0];
let deleted_str = parts[1];
let file_path = parts[2];
if matches_exclude_pattern(file_path, exclude_patterns) {
continue;
}
if added_str == "-" || deleted_str == "-" {
continue;
}
let added: u32 = added_str.parse().unwrap_or(0);
let deleted: u32 = deleted_str.parse().unwrap_or(0);
if let Some(data) = file_data.get_mut(file_path) {
data.lines_added += added;
data.lines_deleted += deleted;
}
}
let result: HashMap<String, FileChurn> = file_data
.into_iter()
.map(|(file, data)| {
let authors: Vec<String> = data.authors.into_iter().collect();
let author_count = authors.len() as u32;
let churn = FileChurn {
file: file.clone(),
commit_count: data.commit_count,
lines_added: data.lines_added,
lines_deleted: data.lines_deleted,
lines_changed: data.lines_added + data.lines_deleted,
first_commit: data.first_commit,
last_commit: data.last_commit,
authors,
author_count,
};
(file, churn)
})
.collect();
Ok(result)
}
pub(crate) fn get_file_churn_detailed(
path: &Path,
days: u32,
exclude_patterns: &[String],
include_bots: bool,
) -> Result<(HashMap<String, FileChurnDetailed>, u32), ChurnError> {
use std::io::BufRead;
use std::process::Stdio;
if !path.exists() {
return Err(ChurnError::PathNotFound(path.to_path_buf()));
}
if !is_git_repository(path)? {
return Err(ChurnError::NotGitRepository {
path: path.to_path_buf(),
});
}
let canonical_path = path.canonicalize().map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ChurnError::PathNotFound(path.to_path_buf())
} else {
ChurnError::Io(e)
}
})?;
let since_arg = format!("{} days ago", days);
let format_arg = "COMMIT:%H\x1e%aI\x1e%aE\x1e%aN";
let mut cmd = Command::new("git");
cmd.arg("-c")
.arg("core.quotepath=false")
.arg("log")
.arg(format!("--since={}", since_arg))
.arg(format!("--pretty=format:{}", format_arg))
.arg("--numstat")
.arg("--no-renames") .arg("--no-merges") .current_dir(&canonical_path)
.stdout(Stdio::piped())
.stderr(Stdio::piped());
let mut child = cmd.spawn().map_err(|e| ChurnError::GitError {
command: "git log --numstat --no-renames --no-merges".to_string(),
stderr: format!("Failed to spawn git: {}", e),
exit_code: None,
})?;
let stdout = child.stdout.take().ok_or_else(|| ChurnError::GitError {
command: "git log".to_string(),
stderr: "Failed to capture stdout".to_string(),
exit_code: None,
})?;
let reader = std::io::BufReader::new(stdout);
struct FileAccum {
commits: Vec<CommitChurnEntry>,
authors: std::collections::HashSet<String>,
first_commit_date: Option<String>,
last_commit_date: Option<String>,
}
let mut file_data: HashMap<String, FileAccum> = HashMap::new();
let mut total_bot_commits_filtered: u32 = 0;
let mut current_date: Option<String> = None;
let mut current_email: Option<String> = None;
let mut skip_this_commit = false;
for line_result in reader.lines() {
let line = match line_result {
Ok(l) => l,
Err(_) => continue, };
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
if let Some(rest) = trimmed.strip_prefix("COMMIT:") {
let parts: Vec<&str> = rest.split('\x1e').collect();
if parts.len() >= 4 {
current_date = Some(parts[1].to_string());
current_email = Some(parts[2].to_string());
let is_bot = is_bot_author(parts[3], parts[2]);
if is_bot && !include_bots {
skip_this_commit = true;
total_bot_commits_filtered += 1;
} else {
skip_this_commit = false;
}
} else {
current_date = None;
current_email = None;
skip_this_commit = true;
}
continue;
}
if skip_this_commit {
continue;
}
let parts: Vec<&str> = trimmed.splitn(3, '\t').collect();
if parts.len() < 3 {
continue; }
let added_str = parts[0];
let deleted_str = parts[1];
let file_path = parts[2];
if matches_exclude_pattern(file_path, exclude_patterns) {
continue;
}
let lines_added: u32 = if added_str == "-" {
0
} else {
added_str.parse().unwrap_or(0)
};
let lines_deleted: u32 = if deleted_str == "-" {
0
} else {
deleted_str.parse().unwrap_or(0)
};
let commit_entry = CommitChurnEntry {
date: current_date.clone().unwrap_or_default(),
lines_added,
lines_deleted,
author_email: current_email.clone().unwrap_or_default(),
};
let accum = file_data
.entry(file_path.to_string())
.or_insert_with(|| FileAccum {
commits: Vec::new(),
authors: std::collections::HashSet::new(),
first_commit_date: None,
last_commit_date: None,
});
if let Some(ref email) = current_email {
accum.authors.insert(email.clone());
}
if let Some(ref date) = current_date {
let date_short = if date.len() >= 10 { &date[..10] } else { date };
if accum.last_commit_date.is_none() {
accum.last_commit_date = Some(date_short.to_string());
}
accum.first_commit_date = Some(date_short.to_string());
}
accum.commits.push(commit_entry);
}
let status = child.wait().map_err(|e| ChurnError::GitError {
command: "git log".to_string(),
stderr: format!("Failed to wait for git process: {}", e),
exit_code: None,
})?;
if !status.success() {
if file_data.is_empty() {
return Ok((HashMap::new(), total_bot_commits_filtered));
}
}
let mut result: HashMap<String, FileChurnDetailed> = HashMap::new();
for (file_path, accum) in file_data {
let total_added: u32 = accum.commits.iter().map(|c| c.lines_added).sum();
let total_deleted: u32 = accum.commits.iter().map(|c| c.lines_deleted).sum();
let commit_count = accum.commits.len() as u32;
let authors: Vec<String> = accum.authors.into_iter().collect();
let author_count = authors.len() as u32;
let base = FileChurn {
file: file_path.clone(),
commit_count,
lines_added: total_added,
lines_deleted: total_deleted,
lines_changed: total_added + total_deleted,
first_commit: accum.first_commit_date,
last_commit: accum.last_commit_date,
authors,
author_count,
};
result.insert(
file_path,
FileChurnDetailed {
base,
commits: accum.commits,
},
);
}
Ok((result, total_bot_commits_filtered))
}
pub fn get_author_stats(
path: &Path,
days: u32,
file_stats: &HashMap<String, FileChurn>,
) -> Result<Vec<AuthorStats>, ChurnError> {
if !path.exists() {
return Err(ChurnError::PathNotFound(path.to_path_buf()));
}
if !is_git_repository(path)? {
return Err(ChurnError::NotGitRepository {
path: path.to_path_buf(),
});
}
let since_arg = format!("{} days ago", days);
let shortlog_output = match run_git(
&[
"shortlog",
"-sne",
&format!("--since={}", since_arg),
"HEAD",
],
path,
) {
Ok(output) => output,
Err(ChurnError::GitError { stderr, .. }) => {
if stderr.contains("does not have any commits")
|| stderr.contains("bad revision")
|| stderr.is_empty()
{
return Ok(Vec::new());
}
return Err(ChurnError::GitError {
command: format!("git shortlog -sne --since=\"{}\"", since_arg),
stderr,
exit_code: None,
});
}
Err(e) => return Err(e),
};
if shortlog_output.trim().is_empty() {
return Ok(Vec::new());
}
struct AuthorData {
name: String,
email: String,
commits: u32,
lines_added: u32,
lines_deleted: u32,
}
let mut author_map: HashMap<String, AuthorData> = HashMap::new();
for line in shortlog_output.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
let parts: Vec<&str> = line.splitn(2, '\t').collect();
if parts.len() != 2 {
continue;
}
let commits: u32 = match parts[0].trim().parse() {
Ok(n) => n,
Err(_) => continue,
};
let author_part = parts[1].trim();
if let (Some(email_start), Some(email_end)) =
(author_part.rfind('<'), author_part.rfind('>'))
{
if email_start < email_end {
let name = author_part[..email_start].trim().to_string();
let email = author_part[email_start + 1..email_end].to_string();
author_map.insert(
email.clone(),
AuthorData {
name,
email,
commits,
lines_added: 0,
lines_deleted: 0,
},
);
}
}
}
for data in author_map.values_mut() {
let numstat_output = match run_git(
&[
"log",
&format!("--since={}", since_arg),
&format!("--author={}", data.email),
"--numstat",
"--format=",
],
path,
) {
Ok(output) => output,
Err(_) => continue, };
for line in numstat_output.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
let parts: Vec<&str> = line.split('\t').collect();
if parts.len() < 3 {
continue;
}
let added_str = parts[0];
let deleted_str = parts[1];
if added_str == "-" || deleted_str == "-" {
continue;
}
let added: u32 = added_str.parse().unwrap_or(0);
let deleted: u32 = deleted_str.parse().unwrap_or(0);
data.lines_added += added;
data.lines_deleted += deleted;
}
}
let mut result: Vec<AuthorStats> = author_map
.into_iter()
.map(|(email, data)| {
let files_touched = file_stats
.values()
.filter(|f| f.authors.contains(&email))
.count() as u32;
AuthorStats {
name: data.name,
email: data.email,
commits: data.commits,
lines_added: data.lines_added,
lines_deleted: data.lines_deleted,
files_touched,
}
})
.collect();
result.sort_by(|a, b| b.commits.cmp(&a.commits));
Ok(result)
}
pub fn build_summary(file_stats: &HashMap<String, FileChurn>, days: u32) -> ChurnSummary {
let total_files = file_stats.len() as u32;
if total_files == 0 {
return ChurnSummary {
total_files: 0,
total_commits: 0,
time_window_days: days,
total_lines_changed: 0,
avg_commits_per_file: 0.0,
most_churned_file: String::new(),
};
}
let total_commits: u32 = file_stats.values().map(|f| f.commit_count).sum();
let total_lines_changed: u64 = file_stats.values().map(|f| f.lines_changed as u64).sum();
let avg_commits_per_file = total_commits as f64 / total_files as f64;
let most_churned_file = file_stats
.values()
.max_by_key(|f| f.commit_count)
.map(|f| f.file.clone())
.unwrap_or_default();
ChurnSummary {
total_files,
total_commits,
time_window_days: days,
total_lines_changed,
avg_commits_per_file,
most_churned_file,
}
}
pub fn get_recommendation(score: f64) -> &'static str {
if score > 0.7 {
"Critical: High churn + high complexity. Prioritize refactoring."
} else if score > 0.4 {
"Warning: Moderate risk. Consider simplification."
} else {
"Low risk."
}
}
pub fn matches_exclude_pattern(path: &str, patterns: &[String]) -> bool {
for pattern_str in patterns {
if let Ok(pattern) = Pattern::new(pattern_str) {
if pattern.matches(path) {
return true;
}
}
}
false
}
fn truncate_path(path: &str, max_len: usize) -> String {
if path.len() <= max_len {
path.to_string()
} else {
let keep_len = max_len.saturating_sub(3); let start = path.len().saturating_sub(keep_len);
format!("...{}", &path[start..])
}
}
pub fn format_text_output(report: &ChurnReport) -> String {
use std::fmt::Write;
let mut output = String::new();
writeln!(output, "Code Churn Analysis").unwrap();
writeln!(output, "==================================================").unwrap();
writeln!(output).unwrap();
if !report.warnings.is_empty() {
writeln!(output, "Warnings:").unwrap();
for warning in &report.warnings {
writeln!(output, " - {}", warning).unwrap();
}
writeln!(output).unwrap();
}
writeln!(
output,
"Time window: {} days",
report.summary.time_window_days
)
.unwrap();
writeln!(
output,
"Total files changed: {}",
report.summary.total_files
)
.unwrap();
writeln!(output, "Total commits: {}", report.summary.total_commits).unwrap();
writeln!(
output,
"Total lines changed: {}",
report.summary.total_lines_changed
)
.unwrap();
if !report.summary.most_churned_file.is_empty() {
writeln!(
output,
"Most churned file: {}",
report.summary.most_churned_file
)
.unwrap();
}
writeln!(output).unwrap();
if !report.files.is_empty() {
writeln!(output, "Top Files by Churn:").unwrap();
writeln!(
output,
"{:<6}{:<40}{:<10}{:<10}{:<8}",
"Rank", "File", "Commits", "Lines", "Authors"
)
.unwrap();
writeln!(output, "{}", "-".repeat(74)).unwrap();
for (i, file) in report.files.iter().take(10).enumerate() {
let truncated = truncate_path(&file.file, 38);
writeln!(
output,
"{:<6}{:<40}{:<10}{:<10}{:<8}",
i + 1,
truncated,
file.commit_count,
file.lines_changed,
file.author_count
)
.unwrap();
}
writeln!(output).unwrap();
}
if !report.hotspots.is_empty() {
writeln!(output, "Hotspot Matrix (High Churn + High Complexity):").unwrap();
for (i, hotspot) in report.hotspots.iter().take(5).enumerate() {
writeln!(output, " {}. {}", i + 1, hotspot.file).unwrap();
writeln!(
output,
" Churn: {} commits (rank #{})",
hotspot.commit_count, hotspot.churn_rank
)
.unwrap();
writeln!(
output,
" Complexity: CC={} (rank #{})",
hotspot.cyclomatic_complexity, hotspot.complexity_rank
)
.unwrap();
writeln!(output, " Score: {:.3}", hotspot.combined_score).unwrap();
writeln!(output, " {}", hotspot.recommendation).unwrap();
}
writeln!(output).unwrap();
}
if !report.authors.is_empty() {
writeln!(output, "Top Authors:").unwrap();
for author in report.authors.iter().take(5) {
writeln!(output, " {} <{}>", author.name, author.email).unwrap();
writeln!(
output,
" Commits: {}, Files: {}",
author.commits, author.files_touched
)
.unwrap();
}
writeln!(output).unwrap();
}
output
}