use std::fs;
use std::sync::LazyLock;
use anyhow::{Context, Result};
use chrono::{DateTime, FixedOffset};
use git2::{Commit, Repository};
use globset::Glob;
use regex::Regex;
use serde::{Deserialize, Serialize};
use crate::data::context::ScopeDefinition;
use crate::git::diff_split::split_by_file;
#[allow(clippy::unwrap_used)] static SCOPE_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[a-z]+!\(([^)]+)\):|^[a-z]+\(([^)]+)\):").unwrap());
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CommitInfo<A = CommitAnalysis> {
pub hash: String,
pub author: String,
pub date: DateTime<FixedOffset>,
pub original_message: String,
pub in_main_branches: Vec<String>,
pub analysis: A,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CommitAnalysis {
pub detected_type: String,
pub detected_scope: String,
pub proposed_message: String,
pub file_changes: FileChanges,
pub diff_summary: String,
pub diff_file: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub file_diffs: Vec<FileDiffRef>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileDiffRef {
pub path: String,
pub diff_file: String,
pub byte_len: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CommitAnalysisForAI {
#[serde(flatten)]
pub base: CommitAnalysis,
pub diff_content: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CommitInfoForAI {
#[serde(flatten)]
pub base: CommitInfo<CommitAnalysisForAI>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub pre_validated_checks: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileChanges {
pub total_files: usize,
pub files_added: usize,
pub files_deleted: usize,
pub file_list: Vec<FileChange>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileChange {
pub status: String,
pub file: String,
}
impl CommitInfo {
pub fn from_git_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
let hash = commit.id().to_string();
let author = format!(
"{} <{}>",
commit.author().name().unwrap_or("Unknown"),
commit.author().email().unwrap_or("unknown@example.com")
);
let timestamp = commit.author().when();
let date = DateTime::from_timestamp(timestamp.seconds(), 0)
.context("Invalid commit timestamp")?
.with_timezone(
#[allow(clippy::unwrap_used)] &FixedOffset::east_opt(timestamp.offset_minutes() * 60)
.unwrap_or_else(|| FixedOffset::east_opt(0).unwrap()),
);
let original_message = commit.message().unwrap_or("").to_string();
let in_main_branches = Vec::new();
let analysis = CommitAnalysis::analyze_commit(repo, commit)?;
Ok(Self {
hash,
author,
date,
original_message,
in_main_branches,
analysis,
})
}
}
impl CommitAnalysis {
pub fn analyze_commit(repo: &Repository, commit: &Commit) -> Result<Self> {
let file_changes = Self::analyze_file_changes(repo, commit)?;
let detected_type = Self::detect_commit_type(commit, &file_changes);
let detected_scope = Self::detect_scope(&file_changes);
let proposed_message =
Self::generate_proposed_message(commit, &detected_type, &detected_scope, &file_changes);
let diff_summary = Self::get_diff_summary(repo, commit)?;
let (diff_file, file_diffs) = Self::write_diff_to_file(repo, commit)?;
Ok(Self {
detected_type,
detected_scope,
proposed_message,
file_changes,
diff_summary,
diff_file,
file_diffs,
})
}
fn analyze_file_changes(repo: &Repository, commit: &Commit) -> Result<FileChanges> {
let mut file_list = Vec::new();
let mut files_added = 0;
let mut files_deleted = 0;
let commit_tree = commit.tree().context("Failed to get commit tree")?;
let parent_tree = if commit.parent_count() > 0 {
Some(
commit
.parent(0)
.context("Failed to get parent commit")?
.tree()
.context("Failed to get parent tree")?,
)
} else {
None
};
let diff = if let Some(parent_tree) = parent_tree {
repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
.context("Failed to create diff")?
} else {
repo.diff_tree_to_tree(None, Some(&commit_tree), None)
.context("Failed to create diff for initial commit")?
};
diff.foreach(
&mut |delta, _progress| {
let status = match delta.status() {
git2::Delta::Added => {
files_added += 1;
"A"
}
git2::Delta::Deleted => {
files_deleted += 1;
"D"
}
git2::Delta::Modified => "M",
git2::Delta::Renamed => "R",
git2::Delta::Copied => "C",
git2::Delta::Typechange => "T",
_ => "?",
};
if let Some(path) = delta.new_file().path() {
if let Some(path_str) = path.to_str() {
file_list.push(FileChange {
status: status.to_string(),
file: path_str.to_string(),
});
}
}
true
},
None,
None,
None,
)
.context("Failed to process diff")?;
let total_files = file_list.len();
Ok(FileChanges {
total_files,
files_added,
files_deleted,
file_list,
})
}
fn detect_commit_type(commit: &Commit, file_changes: &FileChanges) -> String {
let message = commit.message().unwrap_or("");
if let Some(existing_type) = Self::extract_conventional_type(message) {
return existing_type;
}
let files: Vec<&str> = file_changes
.file_list
.iter()
.map(|f| f.file.as_str())
.collect();
if files
.iter()
.any(|f| f.contains("test") || f.contains("spec"))
{
"test".to_string()
} else if files
.iter()
.any(|f| f.ends_with(".md") || f.contains("README") || f.contains("docs/"))
{
"docs".to_string()
} else if files
.iter()
.any(|f| f.contains("Cargo.toml") || f.contains("package.json") || f.contains("config"))
{
if file_changes.files_added > 0 {
"feat".to_string()
} else {
"chore".to_string()
}
} else if file_changes.files_added > 0
&& files
.iter()
.any(|f| f.ends_with(".rs") || f.ends_with(".js") || f.ends_with(".py"))
{
"feat".to_string()
} else if message.to_lowercase().contains("fix") || message.to_lowercase().contains("bug") {
"fix".to_string()
} else if file_changes.files_deleted > file_changes.files_added {
"refactor".to_string()
} else {
"chore".to_string()
}
}
fn extract_conventional_type(message: &str) -> Option<String> {
let first_line = message.lines().next().unwrap_or("");
if let Some(colon_pos) = first_line.find(':') {
let prefix = &first_line[..colon_pos];
if let Some(paren_pos) = prefix.find('(') {
let type_part = &prefix[..paren_pos];
if Self::is_valid_conventional_type(type_part) {
return Some(type_part.to_string());
}
} else if Self::is_valid_conventional_type(prefix) {
return Some(prefix.to_string());
}
}
None
}
fn is_valid_conventional_type(s: &str) -> bool {
matches!(
s,
"feat"
| "fix"
| "docs"
| "style"
| "refactor"
| "test"
| "chore"
| "build"
| "ci"
| "perf"
)
}
fn detect_scope(file_changes: &FileChanges) -> String {
let files: Vec<&str> = file_changes
.file_list
.iter()
.map(|f| f.file.as_str())
.collect();
if files.iter().any(|f| f.starts_with("src/cli/")) {
"cli".to_string()
} else if files.iter().any(|f| f.starts_with("src/git/")) {
"git".to_string()
} else if files.iter().any(|f| f.starts_with("src/data/")) {
"data".to_string()
} else if files.iter().any(|f| f.starts_with("tests/")) {
"test".to_string()
} else if files.iter().any(|f| f.starts_with("docs/")) {
"docs".to_string()
} else if files
.iter()
.any(|f| f.contains("Cargo.toml") || f.contains("deny.toml"))
{
"deps".to_string()
} else {
String::new()
}
}
pub fn refine_scope(&mut self, scope_defs: &[ScopeDefinition]) {
let files: Vec<&str> = self
.file_changes
.file_list
.iter()
.map(|f| f.file.as_str())
.collect();
if let Some(resolved) = resolve_scope(&files, scope_defs) {
self.detected_scope = resolved;
}
}
fn generate_proposed_message(
commit: &Commit,
commit_type: &str,
scope: &str,
file_changes: &FileChanges,
) -> String {
let current_message = commit.message().unwrap_or("").lines().next().unwrap_or("");
if Self::extract_conventional_type(current_message).is_some() {
return current_message.to_string();
}
let description =
if !current_message.is_empty() && !current_message.eq_ignore_ascii_case("stuff") {
current_message.to_string()
} else {
Self::generate_description(commit_type, file_changes)
};
if scope.is_empty() {
format!("{commit_type}: {description}")
} else {
format!("{commit_type}({scope}): {description}")
}
}
fn generate_description(commit_type: &str, file_changes: &FileChanges) -> String {
match commit_type {
"feat" => {
if file_changes.total_files == 1 {
format!("add {}", file_changes.file_list[0].file)
} else {
format!("add {} new features", file_changes.total_files)
}
}
"fix" => "resolve issues".to_string(),
"docs" => "update documentation".to_string(),
"test" => "add tests".to_string(),
"refactor" => "improve code structure".to_string(),
"chore" => "update project files".to_string(),
_ => "update project".to_string(),
}
}
fn get_diff_summary(repo: &Repository, commit: &Commit) -> Result<String> {
let commit_tree = commit.tree().context("Failed to get commit tree")?;
let parent_tree = if commit.parent_count() > 0 {
Some(
commit
.parent(0)
.context("Failed to get parent commit")?
.tree()
.context("Failed to get parent tree")?,
)
} else {
None
};
let diff = if let Some(parent_tree) = parent_tree {
repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
.context("Failed to create diff")?
} else {
repo.diff_tree_to_tree(None, Some(&commit_tree), None)
.context("Failed to create diff for initial commit")?
};
let stats = diff.stats().context("Failed to get diff stats")?;
let mut summary = String::new();
for i in 0..stats.files_changed() {
if let Some(path) = diff
.get_delta(i)
.and_then(|d| d.new_file().path())
.and_then(|p| p.to_str())
{
let insertions = stats.insertions();
let deletions = stats.deletions();
summary.push_str(&format!(
" {} | {} +{} -{}\n",
path,
insertions + deletions,
insertions,
deletions
));
}
}
Ok(summary)
}
fn write_diff_to_file(
repo: &Repository,
commit: &Commit,
) -> Result<(String, Vec<FileDiffRef>)> {
let ai_scratch_path = crate::utils::ai_scratch::get_ai_scratch_dir()
.context("Failed to determine AI scratch directory")?;
let diffs_dir = ai_scratch_path.join("diffs");
fs::create_dir_all(&diffs_dir).context("Failed to create diffs directory")?;
let commit_hash = commit.id().to_string();
let diff_filename = format!("{commit_hash}.diff");
let diff_path = diffs_dir.join(&diff_filename);
let commit_tree = commit.tree().context("Failed to get commit tree")?;
let parent_tree = if commit.parent_count() > 0 {
Some(
commit
.parent(0)
.context("Failed to get parent commit")?
.tree()
.context("Failed to get parent tree")?,
)
} else {
None
};
let diff = if let Some(parent_tree) = parent_tree {
repo.diff_tree_to_tree(Some(&parent_tree), Some(&commit_tree), None)
.context("Failed to create diff")?
} else {
repo.diff_tree_to_tree(None, Some(&commit_tree), None)
.context("Failed to create diff for initial commit")?
};
let mut diff_content = String::new();
diff.print(git2::DiffFormat::Patch, |_delta, _hunk, line| {
let content = std::str::from_utf8(line.content()).unwrap_or("<binary>");
let prefix = match line.origin() {
'+' => "+",
'-' => "-",
' ' => " ",
'@' => "@",
_ => "", };
diff_content.push_str(&format!("{prefix}{content}"));
true
})
.context("Failed to format diff")?;
if !diff_content.ends_with('\n') {
diff_content.push('\n');
}
fs::write(&diff_path, &diff_content).context("Failed to write diff file")?;
let per_file_diffs = split_by_file(&diff_content);
let mut file_diffs = Vec::with_capacity(per_file_diffs.len());
if !per_file_diffs.is_empty() {
let per_file_dir = diffs_dir.join(&commit_hash);
fs::create_dir_all(&per_file_dir)
.context("Failed to create per-file diffs directory")?;
for (index, file_diff) in per_file_diffs.iter().enumerate() {
let per_file_name = format!("{index:04}.diff");
let per_file_path = per_file_dir.join(&per_file_name);
fs::write(&per_file_path, &file_diff.content).with_context(|| {
format!("Failed to write per-file diff: {}", per_file_path.display())
})?;
file_diffs.push(FileDiffRef {
path: file_diff.path.clone(),
diff_file: per_file_path.to_string_lossy().to_string(),
byte_len: file_diff.byte_len,
});
}
}
Ok((diff_path.to_string_lossy().to_string(), file_diffs))
}
}
impl CommitInfoForAI {
pub fn from_commit_info(commit_info: CommitInfo) -> Result<Self> {
let analysis = CommitAnalysisForAI::from_commit_analysis(commit_info.analysis)?;
Ok(Self {
base: CommitInfo {
hash: commit_info.hash,
author: commit_info.author,
date: commit_info.date,
original_message: commit_info.original_message,
in_main_branches: commit_info.in_main_branches,
analysis,
},
pre_validated_checks: Vec::new(),
})
}
#[cfg(test)]
pub(crate) fn from_commit_info_partial(
commit_info: CommitInfo,
file_paths: &[String],
) -> Result<Self> {
let overrides: Vec<Option<String>> = vec![None; file_paths.len()];
Self::from_commit_info_partial_with_overrides(commit_info, file_paths, &overrides)
}
pub(crate) fn from_commit_info_partial_with_overrides(
commit_info: CommitInfo,
file_paths: &[String],
diff_overrides: &[Option<String>],
) -> Result<Self> {
let mut diff_parts = Vec::new();
let mut included_refs = Vec::new();
let mut loaded_disk_paths: std::collections::HashSet<String> =
std::collections::HashSet::new();
for (path, override_content) in file_paths.iter().zip(diff_overrides.iter()) {
if let Some(content) = override_content {
diff_parts.push(content.clone());
if let Some(file_ref) = commit_info
.analysis
.file_diffs
.iter()
.find(|r| r.path == *path)
{
if !included_refs.iter().any(|r: &FileDiffRef| r.path == *path) {
included_refs.push(file_ref.clone());
}
}
} else {
if loaded_disk_paths.insert(path.clone()) {
if let Some(file_ref) = commit_info
.analysis
.file_diffs
.iter()
.find(|r| r.path == *path)
{
let content =
fs::read_to_string(&file_ref.diff_file).with_context(|| {
format!("Failed to read per-file diff: {}", file_ref.diff_file)
})?;
diff_parts.push(content);
included_refs.push(file_ref.clone());
}
}
}
}
let diff_content = diff_parts.join("\n");
let partial_analysis = CommitAnalysisForAI {
base: CommitAnalysis {
file_diffs: included_refs,
..commit_info.analysis
},
diff_content,
};
Ok(Self {
base: CommitInfo {
hash: commit_info.hash,
author: commit_info.author,
date: commit_info.date,
original_message: commit_info.original_message,
in_main_branches: commit_info.in_main_branches,
analysis: partial_analysis,
},
pre_validated_checks: Vec::new(),
})
}
pub fn run_pre_validation_checks(&mut self, valid_scopes: &[ScopeDefinition]) {
if let Some(caps) = SCOPE_RE.captures(&self.base.original_message) {
let scope = caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str());
if let Some(scope) = scope {
if scope.contains(',') && !scope.contains(", ") {
self.pre_validated_checks.push(format!(
"Scope format verified: multi-scope '{scope}' correctly uses commas without spaces"
));
}
if !valid_scopes.is_empty() {
let scope_parts: Vec<&str> = scope.split(',').map(str::trim).collect();
let all_valid = scope_parts
.iter()
.all(|part| valid_scopes.iter().any(|s| s.name == *part));
if all_valid {
self.pre_validated_checks.push(format!(
"Scope validity verified: '{scope}' is in the valid scopes list"
));
}
}
}
}
}
}
pub fn resolve_scope(files: &[&str], scope_defs: &[ScopeDefinition]) -> Option<String> {
if scope_defs.is_empty() || files.is_empty() {
return None;
}
let mut matches: Vec<(&str, usize)> = Vec::new();
for scope_def in scope_defs {
if let Some(specificity) = scope_matches_files(files, &scope_def.file_patterns) {
matches.push((&scope_def.name, specificity));
}
}
if matches.is_empty() {
return None;
}
#[allow(clippy::expect_used)] let max_specificity = matches.iter().map(|(_, s)| *s).max().expect("non-empty");
let best: Vec<&str> = matches
.into_iter()
.filter(|(_, s)| *s == max_specificity)
.map(|(name, _)| name)
.collect();
Some(best.join(", "))
}
pub fn refine_message_scope(
message: &str,
files: &[&str],
scope_defs: &[ScopeDefinition],
) -> String {
let Some(resolved) = resolve_scope(files, scope_defs) else {
return message.to_string();
};
let (first_line, rest) = message
.split_once('\n')
.map_or((message, ""), |(f, r)| (f, r));
let Some(caps) = SCOPE_RE.captures(first_line) else {
return message.to_string();
};
let existing_scope = caps
.get(1)
.or_else(|| caps.get(2))
.map_or("", |m| m.as_str());
if existing_scope == resolved {
return message.to_string();
}
let new_first_line =
first_line.replacen(&format!("({existing_scope})"), &format!("({resolved})"), 1);
if rest.is_empty() {
new_first_line
} else {
format!("{new_first_line}\n{rest}")
}
}
fn scope_matches_files(files: &[&str], patterns: &[String]) -> Option<usize> {
let mut positive = Vec::new();
let mut negative = Vec::new();
for pat in patterns {
if let Some(stripped) = pat.strip_prefix('!') {
negative.push(stripped);
} else {
positive.push(pat.as_str());
}
}
let neg_matchers: Vec<_> = negative
.iter()
.filter_map(|p| Glob::new(p).ok().map(|g| g.compile_matcher()))
.collect();
let mut max_specificity: Option<usize> = None;
for pat in &positive {
let Ok(glob) = Glob::new(pat) else {
continue;
};
let matcher = glob.compile_matcher();
for file in files {
if matcher.is_match(file) && !neg_matchers.iter().any(|neg| neg.is_match(file)) {
let specificity = count_specificity(pat);
max_specificity =
Some(max_specificity.map_or(specificity, |cur| cur.max(specificity)));
}
}
}
max_specificity
}
fn count_specificity(pattern: &str) -> usize {
pattern
.split('/')
.filter(|segment| !segment.contains('*') && !segment.contains('?'))
.count()
}
impl CommitAnalysisForAI {
pub fn from_commit_analysis(analysis: CommitAnalysis) -> Result<Self> {
let diff_content = fs::read_to_string(&analysis.diff_file)
.with_context(|| format!("Failed to read diff file: {}", analysis.diff_file))?;
Ok(Self {
base: analysis,
diff_content,
})
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::data::context::ScopeDefinition;
#[test]
fn conventional_type_feat_with_scope() {
assert_eq!(
CommitAnalysis::extract_conventional_type("feat(cli): add flag"),
Some("feat".to_string())
);
}
#[test]
fn conventional_type_without_scope() {
assert_eq!(
CommitAnalysis::extract_conventional_type("fix: resolve bug"),
Some("fix".to_string())
);
}
#[test]
fn conventional_type_invalid_message() {
assert_eq!(
CommitAnalysis::extract_conventional_type("random message without colon"),
None
);
}
#[test]
fn conventional_type_unknown_type() {
assert_eq!(
CommitAnalysis::extract_conventional_type("yolo(scope): stuff"),
None
);
}
#[test]
fn conventional_type_all_valid_types() {
let types = [
"feat", "fix", "docs", "style", "refactor", "test", "chore", "build", "ci", "perf",
];
for t in types {
let msg = format!("{t}: description");
assert_eq!(
CommitAnalysis::extract_conventional_type(&msg),
Some(t.to_string()),
"expected Some for type '{t}'"
);
}
}
#[test]
fn valid_conventional_types() {
for t in [
"feat", "fix", "docs", "style", "refactor", "test", "chore", "build", "ci", "perf",
] {
assert!(
CommitAnalysis::is_valid_conventional_type(t),
"'{t}' should be valid"
);
}
}
#[test]
fn invalid_conventional_types() {
for t in ["yolo", "Feat", "", "FEAT", "feature", "bugfix"] {
assert!(
!CommitAnalysis::is_valid_conventional_type(t),
"'{t}' should be invalid"
);
}
}
fn make_file_changes(files: &[(&str, &str)]) -> FileChanges {
FileChanges {
total_files: files.len(),
files_added: files.iter().filter(|(s, _)| *s == "A").count(),
files_deleted: files.iter().filter(|(s, _)| *s == "D").count(),
file_list: files
.iter()
.map(|(status, file)| FileChange {
status: (*status).to_string(),
file: (*file).to_string(),
})
.collect(),
}
}
#[test]
fn scope_from_cli_files() {
let changes = make_file_changes(&[("M", "src/cli/commands.rs")]);
assert_eq!(CommitAnalysis::detect_scope(&changes), "cli");
}
#[test]
fn scope_from_git_files() {
let changes = make_file_changes(&[("M", "src/git/remote.rs")]);
assert_eq!(CommitAnalysis::detect_scope(&changes), "git");
}
#[test]
fn scope_from_docs_files() {
let changes = make_file_changes(&[("M", "docs/README.md")]);
assert_eq!(CommitAnalysis::detect_scope(&changes), "docs");
}
#[test]
fn scope_from_data_files() {
let changes = make_file_changes(&[("M", "src/data/yaml.rs")]);
assert_eq!(CommitAnalysis::detect_scope(&changes), "data");
}
#[test]
fn scope_from_test_files() {
let changes = make_file_changes(&[("A", "tests/new_test.rs")]);
assert_eq!(CommitAnalysis::detect_scope(&changes), "test");
}
#[test]
fn scope_from_deps_files() {
let changes = make_file_changes(&[("M", "Cargo.toml")]);
assert_eq!(CommitAnalysis::detect_scope(&changes), "deps");
}
#[test]
fn scope_unknown_files() {
let changes = make_file_changes(&[("M", "random/path/file.txt")]);
assert_eq!(CommitAnalysis::detect_scope(&changes), "");
}
#[test]
fn count_specificity_deep_path() {
assert_eq!(super::count_specificity("src/main/scala/**"), 3);
}
#[test]
fn count_specificity_shallow() {
assert_eq!(super::count_specificity("docs/**"), 1);
}
#[test]
fn count_specificity_wildcard_only() {
assert_eq!(super::count_specificity("*.md"), 0);
}
#[test]
fn count_specificity_no_wildcards() {
assert_eq!(super::count_specificity("src/lib.rs"), 2);
}
#[test]
fn scope_matches_positive_patterns() {
let patterns = vec!["src/cli/**".to_string()];
let files = &["src/cli/commands.rs"];
assert!(super::scope_matches_files(files, &patterns).is_some());
}
#[test]
fn scope_matches_no_match() {
let patterns = vec!["src/cli/**".to_string()];
let files = &["src/git/remote.rs"];
assert!(super::scope_matches_files(files, &patterns).is_none());
}
#[test]
fn scope_matches_with_negation() {
let patterns = vec!["src/**".to_string(), "!src/test/**".to_string()];
let files = &["src/lib.rs"];
assert!(super::scope_matches_files(files, &patterns).is_some());
let test_files = &["src/test/helper.rs"];
assert!(super::scope_matches_files(test_files, &patterns).is_none());
}
fn make_scope_def(name: &str, patterns: &[&str]) -> ScopeDefinition {
ScopeDefinition {
name: name.to_string(),
description: String::new(),
examples: vec![],
file_patterns: patterns.iter().map(|p| (*p).to_string()).collect(),
}
}
#[test]
fn refine_scope_empty_defs() {
let mut analysis = CommitAnalysis {
detected_type: "feat".to_string(),
detected_scope: "original".to_string(),
proposed_message: String::new(),
file_changes: make_file_changes(&[("M", "src/cli/commands.rs")]),
diff_summary: String::new(),
diff_file: String::new(),
file_diffs: Vec::new(),
};
analysis.refine_scope(&[]);
assert_eq!(analysis.detected_scope, "original");
}
#[test]
fn refine_scope_most_specific_wins() {
let scope_defs = vec![
make_scope_def("lib", &["src/**"]),
make_scope_def("cli", &["src/cli/**"]),
];
let mut analysis = CommitAnalysis {
detected_type: "feat".to_string(),
detected_scope: String::new(),
proposed_message: String::new(),
file_changes: make_file_changes(&[("M", "src/cli/commands.rs")]),
diff_summary: String::new(),
diff_file: String::new(),
file_diffs: Vec::new(),
};
analysis.refine_scope(&scope_defs);
assert_eq!(analysis.detected_scope, "cli");
}
#[test]
fn refine_scope_no_matching_files() {
let scope_defs = vec![make_scope_def("cli", &["src/cli/**"])];
let mut analysis = CommitAnalysis {
detected_type: "feat".to_string(),
detected_scope: "original".to_string(),
proposed_message: String::new(),
file_changes: make_file_changes(&[("M", "README.md")]),
diff_summary: String::new(),
diff_file: String::new(),
file_diffs: Vec::new(),
};
analysis.refine_scope(&scope_defs);
assert_eq!(analysis.detected_scope, "original");
}
#[test]
fn refine_scope_equal_specificity_joins() {
let scope_defs = vec![
make_scope_def("cli", &["src/cli/**"]),
make_scope_def("git", &["src/git/**"]),
];
let mut analysis = CommitAnalysis {
detected_type: "feat".to_string(),
detected_scope: String::new(),
proposed_message: String::new(),
file_changes: make_file_changes(&[
("M", "src/cli/commands.rs"),
("M", "src/git/remote.rs"),
]),
diff_summary: String::new(),
diff_file: String::new(),
file_diffs: Vec::new(),
};
analysis.refine_scope(&scope_defs);
assert!(
analysis.detected_scope == "cli, git" || analysis.detected_scope == "git, cli",
"expected joined scopes, got: {}",
analysis.detected_scope
);
}
#[test]
fn refine_message_scope_replaces_less_specific() {
let scope_defs = vec![
make_scope_def("ci", &[".github/**"]),
make_scope_def("workflows", &[".github/workflows/**"]),
];
let files = &[".github/workflows/ci.yml"];
let result = super::refine_message_scope(
"chore(ci): bump EmbarkStudios/cargo-deny-action from 2.0.15 to 2.0.17",
files,
&scope_defs,
);
assert_eq!(
result,
"chore(workflows): bump EmbarkStudios/cargo-deny-action from 2.0.15 to 2.0.17"
);
}
#[test]
fn refine_message_scope_keeps_already_correct() {
let scope_defs = vec![
make_scope_def("ci", &[".github/**"]),
make_scope_def("workflows", &[".github/workflows/**"]),
];
let files = &[".github/workflows/ci.yml"];
let msg = "chore(workflows): bump something";
assert_eq!(super::refine_message_scope(msg, files, &scope_defs), msg);
}
#[test]
fn refine_message_scope_no_scope_in_message() {
let scope_defs = vec![make_scope_def("cli", &["src/cli/**"])];
let files = &["src/cli/commands.rs"];
let msg = "chore: do something";
assert_eq!(super::refine_message_scope(msg, files, &scope_defs), msg);
}
#[test]
fn refine_message_scope_preserves_body() {
let scope_defs = vec![
make_scope_def("ci", &[".github/**"]),
make_scope_def("workflows", &[".github/workflows/**"]),
];
let files = &[".github/workflows/ci.yml"];
let msg = "chore(ci): bump dep\n\nSome body text\nMore details";
let result = super::refine_message_scope(msg, files, &scope_defs);
assert_eq!(
result,
"chore(workflows): bump dep\n\nSome body text\nMore details"
);
}
#[test]
fn refine_message_scope_breaking_change() {
let scope_defs = vec![
make_scope_def("ci", &[".github/**"]),
make_scope_def("workflows", &[".github/workflows/**"]),
];
let files = &[".github/workflows/ci.yml"];
let result = super::refine_message_scope("feat!(ci): breaking change", files, &scope_defs);
assert_eq!(result, "feat!(workflows): breaking change");
}
#[test]
fn refine_message_scope_no_matching_scope_defs() {
let scope_defs = vec![make_scope_def("cli", &["src/cli/**"])];
let files = &["README.md"];
let msg = "docs(docs): update readme";
assert_eq!(super::refine_message_scope(msg, files, &scope_defs), msg);
}
fn make_commit_info_for_ai(message: &str) -> CommitInfoForAI {
CommitInfoForAI {
base: CommitInfo {
hash: "a".repeat(40),
author: "Test <test@example.com>".to_string(),
date: chrono::DateTime::parse_from_rfc3339("2024-01-01T00:00:00+00:00").unwrap(),
original_message: message.to_string(),
in_main_branches: vec![],
analysis: CommitAnalysisForAI {
base: CommitAnalysis {
detected_type: "feat".to_string(),
detected_scope: String::new(),
proposed_message: String::new(),
file_changes: make_file_changes(&[]),
diff_summary: String::new(),
diff_file: String::new(),
file_diffs: Vec::new(),
},
diff_content: String::new(),
},
},
pre_validated_checks: vec![],
}
}
#[test]
fn pre_validation_valid_single_scope() {
let scopes = vec![make_scope_def("cli", &["src/cli/**"])];
let mut info = make_commit_info_for_ai("feat(cli): add command");
info.run_pre_validation_checks(&scopes);
assert!(
info.pre_validated_checks
.iter()
.any(|c| c.contains("Scope validity verified")),
"expected scope validity check, got: {:?}",
info.pre_validated_checks
);
}
#[test]
fn pre_validation_multi_scope() {
let scopes = vec![
make_scope_def("cli", &["src/cli/**"]),
make_scope_def("git", &["src/git/**"]),
];
let mut info = make_commit_info_for_ai("feat(cli,git): cross-cutting change");
info.run_pre_validation_checks(&scopes);
assert!(info
.pre_validated_checks
.iter()
.any(|c| c.contains("Scope validity verified")),);
assert!(info
.pre_validated_checks
.iter()
.any(|c| c.contains("multi-scope")),);
}
#[test]
fn pre_validation_multi_scope_with_spaces() {
let scopes = vec![
make_scope_def("cli", &["src/cli/**"]),
make_scope_def("lib", &["src/lib/**"]),
];
let mut info = make_commit_info_for_ai("feat(cli, lib): add something");
info.run_pre_validation_checks(&scopes);
assert!(
info.pre_validated_checks
.iter()
.any(|c| c.contains("Scope validity verified")),
"expected scope validity check for spaced multi-scope, got: {:?}",
info.pre_validated_checks
);
}
#[test]
fn pre_validation_invalid_scope_not_added() {
let scopes = vec![make_scope_def("cli", &["src/cli/**"])];
let mut info = make_commit_info_for_ai("feat(unknown): something");
info.run_pre_validation_checks(&scopes);
assert!(
!info
.pre_validated_checks
.iter()
.any(|c| c.contains("Scope validity verified")),
"should not validate unknown scope"
);
}
#[test]
fn pre_validation_no_scope_message() {
let scopes = vec![make_scope_def("cli", &["src/cli/**"])];
let mut info = make_commit_info_for_ai("feat: no scope here");
info.run_pre_validation_checks(&scopes);
assert!(info.pre_validated_checks.is_empty());
}
mod prop {
use super::*;
use proptest::prelude::*;
fn arb_conventional_type() -> impl Strategy<Value = &'static str> {
prop_oneof![
Just("feat"),
Just("fix"),
Just("docs"),
Just("style"),
Just("refactor"),
Just("test"),
Just("chore"),
Just("build"),
Just("ci"),
Just("perf"),
]
}
proptest! {
#[test]
fn valid_conventional_format_extracts_type(
ctype in arb_conventional_type(),
scope in "[a-z]{1,10}",
desc in "[a-zA-Z ]{1,50}",
) {
let message = format!("{ctype}({scope}): {desc}");
let result = CommitAnalysis::extract_conventional_type(&message);
prop_assert_eq!(result, Some(ctype.to_string()));
}
#[test]
fn no_colon_returns_none(s in "[^:]{0,100}") {
let result = CommitAnalysis::extract_conventional_type(&s);
prop_assert!(result.is_none());
}
#[test]
fn count_specificity_nonnegative(pattern in ".*") {
let _ = super::count_specificity(&pattern);
}
#[test]
fn count_specificity_bounded_by_segments(
segments in proptest::collection::vec("[a-z*?]{1,10}", 1..6),
) {
let pattern = segments.join("/");
let result = super::count_specificity(&pattern);
prop_assert!(result <= segments.len());
}
}
}
#[test]
fn from_commit_analysis_loads_diff_content() {
let dir = tempfile::tempdir().unwrap();
let diff_path = dir.path().join("test.diff");
std::fs::write(&diff_path, "+added line\n-removed line\n").unwrap();
let analysis = CommitAnalysis {
detected_type: "feat".to_string(),
detected_scope: "cli".to_string(),
proposed_message: "feat(cli): test".to_string(),
file_changes: make_file_changes(&[]),
diff_summary: "file.rs | 2 +-".to_string(),
diff_file: diff_path.to_string_lossy().to_string(),
file_diffs: Vec::new(),
};
let ai = CommitAnalysisForAI::from_commit_analysis(analysis.clone()).unwrap();
assert_eq!(ai.diff_content, "+added line\n-removed line\n");
assert_eq!(ai.base.detected_type, analysis.detected_type);
assert_eq!(ai.base.diff_file, analysis.diff_file);
}
#[test]
fn from_commit_info_wraps_and_loads_diff() {
let dir = tempfile::tempdir().unwrap();
let diff_path = dir.path().join("test.diff");
std::fs::write(&diff_path, "diff content").unwrap();
let info = CommitInfo {
hash: "a".repeat(40),
author: "Test <test@example.com>".to_string(),
date: chrono::DateTime::parse_from_rfc3339("2024-01-01T00:00:00+00:00").unwrap(),
original_message: "feat(cli): add flag".to_string(),
in_main_branches: vec!["origin/main".to_string()],
analysis: CommitAnalysis {
detected_type: "feat".to_string(),
detected_scope: "cli".to_string(),
proposed_message: "feat(cli): add flag".to_string(),
file_changes: make_file_changes(&[("M", "src/cli.rs")]),
diff_summary: "cli.rs | 1 +".to_string(),
diff_file: diff_path.to_string_lossy().to_string(),
file_diffs: Vec::new(),
},
};
let ai = CommitInfoForAI::from_commit_info(info).unwrap();
assert_eq!(ai.base.analysis.diff_content, "diff content");
assert_eq!(ai.base.hash, "a".repeat(40));
assert_eq!(ai.base.original_message, "feat(cli): add flag");
assert!(ai.pre_validated_checks.is_empty());
}
#[test]
fn file_diffs_default_empty_on_deserialize() {
let yaml = r#"
detected_type: feat
detected_scope: cli
proposed_message: "feat(cli): test"
file_changes:
total_files: 0
files_added: 0
files_deleted: 0
file_list: []
diff_summary: ""
diff_file: "/tmp/test.diff"
"#;
let analysis: CommitAnalysis = serde_yaml::from_str(yaml).unwrap();
assert!(analysis.file_diffs.is_empty());
}
#[test]
fn file_diffs_omitted_when_empty_on_serialize() {
let analysis = CommitAnalysis {
detected_type: "feat".to_string(),
detected_scope: "cli".to_string(),
proposed_message: "feat(cli): test".to_string(),
file_changes: make_file_changes(&[]),
diff_summary: String::new(),
diff_file: String::new(),
file_diffs: Vec::new(),
};
let yaml = serde_yaml::to_string(&analysis).unwrap();
assert!(!yaml.contains("file_diffs"));
}
#[test]
fn file_diffs_included_when_populated() {
let analysis = CommitAnalysis {
detected_type: "feat".to_string(),
detected_scope: "cli".to_string(),
proposed_message: "feat(cli): test".to_string(),
file_changes: make_file_changes(&[]),
diff_summary: String::new(),
diff_file: String::new(),
file_diffs: vec![FileDiffRef {
path: "src/main.rs".to_string(),
diff_file: "/tmp/diffs/abc/0000.diff".to_string(),
byte_len: 42,
}],
};
let yaml = serde_yaml::to_string(&analysis).unwrap();
assert!(yaml.contains("file_diffs"));
assert!(yaml.contains("src/main.rs"));
assert!(yaml.contains("byte_len: 42"));
}
fn make_commit_with_file_diffs(
dir: &tempfile::TempDir,
files: &[(&str, &str)], ) -> CommitInfo {
let file_diffs: Vec<FileDiffRef> = files
.iter()
.enumerate()
.map(|(i, (path, content))| {
let diff_path = dir.path().join(format!("{i:04}.diff"));
fs::write(&diff_path, content).unwrap();
FileDiffRef {
path: (*path).to_string(),
diff_file: diff_path.to_string_lossy().to_string(),
byte_len: content.len(),
}
})
.collect();
CommitInfo {
hash: "abc123def456abc123def456abc123def456abc1".to_string(),
author: "Test Author".to_string(),
date: DateTime::parse_from_rfc3339("2025-01-01T00:00:00+00:00").unwrap(),
original_message: "feat(cli): original message".to_string(),
in_main_branches: vec!["main".to_string()],
analysis: CommitAnalysis {
detected_type: "feat".to_string(),
detected_scope: "cli".to_string(),
proposed_message: "feat(cli): proposed".to_string(),
file_changes: make_file_changes(
&files.iter().map(|(p, _)| ("M", *p)).collect::<Vec<_>>(),
),
diff_summary: " src/main.rs | 10 ++++\n src/lib.rs | 5 ++\n".to_string(),
diff_file: dir.path().join("full.diff").to_string_lossy().to_string(),
file_diffs,
},
}
}
#[test]
fn from_commit_info_partial_loads_subset() -> Result<()> {
let dir = tempfile::tempdir()?;
let commit = make_commit_with_file_diffs(
&dir,
&[
("src/main.rs", "diff --git a/src/main.rs\n+main\n"),
("src/lib.rs", "diff --git a/src/lib.rs\n+lib\n"),
("src/utils.rs", "diff --git a/src/utils.rs\n+utils\n"),
],
);
let paths = vec!["src/main.rs".to_string(), "src/utils.rs".to_string()];
let partial = CommitInfoForAI::from_commit_info_partial(commit, &paths)?;
assert!(partial.base.analysis.diff_content.contains("+main"));
assert!(partial.base.analysis.diff_content.contains("+utils"));
assert!(!partial.base.analysis.diff_content.contains("+lib"));
let ref_paths: Vec<&str> = partial
.base
.analysis
.base
.file_diffs
.iter()
.map(|r| r.path.as_str())
.collect();
assert_eq!(ref_paths, &["src/main.rs", "src/utils.rs"]);
Ok(())
}
#[test]
fn from_commit_info_partial_deduplicates_paths() -> Result<()> {
let dir = tempfile::tempdir()?;
let commit = make_commit_with_file_diffs(
&dir,
&[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
);
let paths = vec!["src/main.rs".to_string(), "src/main.rs".to_string()];
let partial = CommitInfoForAI::from_commit_info_partial(commit, &paths)?;
assert_eq!(
partial.base.analysis.diff_content.matches("+main").count(),
1
);
Ok(())
}
#[test]
fn from_commit_info_partial_preserves_metadata() -> Result<()> {
let dir = tempfile::tempdir()?;
let commit = make_commit_with_file_diffs(
&dir,
&[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
);
let original_hash = commit.hash.clone();
let original_author = commit.author.clone();
let original_date = commit.date;
let original_message = commit.original_message.clone();
let original_summary = commit.analysis.diff_summary.clone();
let paths = vec!["src/main.rs".to_string()];
let partial = CommitInfoForAI::from_commit_info_partial(commit, &paths)?;
assert_eq!(partial.base.hash, original_hash);
assert_eq!(partial.base.author, original_author);
assert_eq!(partial.base.date, original_date);
assert_eq!(partial.base.original_message, original_message);
assert_eq!(partial.base.analysis.base.diff_summary, original_summary);
Ok(())
}
#[test]
fn with_overrides_uses_override_content() -> Result<()> {
let dir = tempfile::tempdir()?;
let commit = make_commit_with_file_diffs(
&dir,
&[(
"src/big.rs",
"diff --git a/src/big.rs\n+full-file-content\n",
)],
);
let paths = vec!["src/big.rs".to_string(), "src/big.rs".to_string()];
let overrides = vec![
Some("diff --git a/src/big.rs\n@@ -1,3 +1,4 @@\n+hunk1\n".to_string()),
Some("diff --git a/src/big.rs\n@@ -10,3 +10,4 @@\n+hunk2\n".to_string()),
];
let partial =
CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
assert!(partial.base.analysis.diff_content.contains("+hunk1"));
assert!(partial.base.analysis.diff_content.contains("+hunk2"));
assert!(
!partial
.base
.analysis
.diff_content
.contains("+full-file-content"),
"should not contain full file content"
);
Ok(())
}
#[test]
fn with_overrides_mixed_override_and_disk() -> Result<()> {
let dir = tempfile::tempdir()?;
let commit = make_commit_with_file_diffs(
&dir,
&[
("src/big.rs", "diff --git a/src/big.rs\n+big-full\n"),
("src/small.rs", "diff --git a/src/small.rs\n+small-disk\n"),
],
);
let paths = vec!["src/big.rs".to_string(), "src/small.rs".to_string()];
let overrides = vec![
Some("diff --git a/src/big.rs\n@@ -1,3 +1,4 @@\n+big-hunk\n".to_string()),
None, ];
let partial =
CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
assert!(partial.base.analysis.diff_content.contains("+big-hunk"));
assert!(!partial.base.analysis.diff_content.contains("+big-full"));
assert!(partial.base.analysis.diff_content.contains("+small-disk"));
let ref_paths: Vec<&str> = partial
.base
.analysis
.base
.file_diffs
.iter()
.map(|r| r.path.as_str())
.collect();
assert!(ref_paths.contains(&"src/big.rs"));
assert!(ref_paths.contains(&"src/small.rs"));
Ok(())
}
#[test]
fn with_overrides_deduplicates_disk_reads() -> Result<()> {
let dir = tempfile::tempdir()?;
let commit = make_commit_with_file_diffs(
&dir,
&[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
);
let paths = vec!["src/main.rs".to_string(), "src/main.rs".to_string()];
let overrides = vec![None, None];
let partial =
CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
assert_eq!(
partial.base.analysis.diff_content.matches("+main").count(),
1
);
Ok(())
}
#[test]
fn with_overrides_preserves_metadata() -> Result<()> {
let dir = tempfile::tempdir()?;
let commit = make_commit_with_file_diffs(
&dir,
&[("src/main.rs", "diff --git a/src/main.rs\n+main\n")],
);
let original_hash = commit.hash.clone();
let original_author = commit.author.clone();
let original_message = commit.original_message.clone();
let paths = vec!["src/main.rs".to_string()];
let overrides = vec![Some("+override-content\n".to_string())];
let partial =
CommitInfoForAI::from_commit_info_partial_with_overrides(commit, &paths, &overrides)?;
assert_eq!(partial.base.hash, original_hash);
assert_eq!(partial.base.author, original_author);
assert_eq!(partial.base.original_message, original_message);
assert!(partial.pre_validated_checks.is_empty());
Ok(())
}
}