use std::collections::HashMap;
use std::path::Path;
#[derive(Debug, Clone)]
pub struct CommitInfo {
pub hash: String,
pub message: String,
pub author: String,
pub date: String,
}
#[derive(Debug, Clone)]
pub struct FileChurn {
pub path: String,
pub commit_count: usize,
}
#[derive(Debug, Clone)]
pub struct ContributorInfo {
pub name: String,
pub commit_count: usize,
}
#[derive(Debug)]
pub struct GitContext {
pub commits: Vec<CommitInfo>,
pub file_churn: Vec<FileChurn>,
pub contributors: Vec<ContributorInfo>,
}
fn format_date(unix_secs: i64) -> String {
let days = unix_secs / 86_400;
if days < 0 {
return "1970-01-01".to_string();
}
let z = days + 719_468;
let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
let doe = z - era * 146_097;
let yoe = (doe - doe / 1_460 + doe / 36_524 - doe / 146_096) / 365;
let y = yoe + era * 400;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let d = doy - (153 * mp + 2) / 5 + 1;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
let y = if m <= 2 { y + 1 } else { y };
format!("{:04}-{:02}-{:02}", y, m, d)
}
pub fn extract_git_context(
repo_path: &Path,
max_commits: usize,
) -> Result<GitContext, git2::Error> {
let repo = git2::Repository::open(repo_path)?;
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(git2::Sort::TIME)?;
let mut commits: Vec<CommitInfo> = Vec::new();
let mut file_counts: HashMap<String, usize> = HashMap::new();
let mut contributor_counts: HashMap<String, usize> = HashMap::new();
for oid_result in revwalk.take(max_commits) {
let oid = oid_result?;
let commit = repo.find_commit(oid)?;
let hash = format!("{:.7}", commit.id());
let message = commit.summary().unwrap_or("").to_string();
let author = commit.author().name().unwrap_or("Unknown").to_string();
let date = format_date(commit.time().seconds());
commits.push(CommitInfo {
hash,
message,
author: author.clone(),
date,
});
*contributor_counts.entry(author).or_insert(0) += 1;
let parent_tree = if commit.parent_count() > 0 {
Some(commit.parent(0)?.tree()?)
} else {
None
};
let commit_tree = commit.tree()?;
let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_tree), None)?;
diff.foreach(
&mut |delta, _progress| {
if let Some(path) = delta.new_file().path() {
let key = path.to_string_lossy().into_owned();
*file_counts.entry(key).or_insert(0) += 1;
}
true
},
None,
None,
None,
)?;
}
let mut file_churn: Vec<FileChurn> = file_counts
.into_iter()
.map(|(path, commit_count)| FileChurn { path, commit_count })
.collect();
file_churn.sort_by(|a, b| {
b.commit_count
.cmp(&a.commit_count)
.then(a.path.cmp(&b.path))
});
file_churn.truncate(20);
let mut contributors: Vec<ContributorInfo> = contributor_counts
.into_iter()
.map(|(name, commit_count)| ContributorInfo { name, commit_count })
.collect();
contributors.sort_by(|a, b| {
b.commit_count
.cmp(&a.commit_count)
.then(a.name.cmp(&b.name))
});
Ok(GitContext {
commits,
file_churn,
contributors,
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
fn make_commit(
repo: &git2::Repository,
sig: &git2::Signature,
message: &str,
files: &[(&str, &str)],
parent_id: Option<git2::Oid>,
) -> git2::Oid {
let workdir = repo.workdir().expect("bare repo not supported in test");
let mut index = repo.index().unwrap();
for (name, content) in files {
let file_path = workdir.join(name);
if let Some(parent) = file_path.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(&file_path, content).unwrap();
index.add_path(Path::new(name)).unwrap();
}
index.write().unwrap();
let tree_id = index.write_tree().unwrap();
let tree = repo.find_tree(tree_id).unwrap();
let parents: Vec<git2::Commit> = match parent_id {
Some(id) => vec![repo.find_commit(id).unwrap()],
None => vec![],
};
let parent_refs: Vec<&git2::Commit> = parents.iter().collect();
repo.commit(Some("HEAD"), sig, sig, message, &tree, &parent_refs)
.unwrap()
}
#[test]
fn test_extract_git_context() {
let dir = tempfile::TempDir::new().unwrap();
let repo = git2::Repository::init(dir.path()).unwrap();
let sig = git2::Signature::now("Test User", "test@test.com").unwrap();
let c1 = make_commit(
&repo,
&sig,
"initial commit",
&[("file.txt", "hello world")],
None,
);
let _c2 = make_commit(
&repo,
&sig,
"second commit",
&[("file.txt", "updated content"), ("another.txt", "new file")],
Some(c1),
);
let ctx = extract_git_context(dir.path(), 100).unwrap();
assert_eq!(ctx.commits.len(), 2, "expected 2 commits");
assert_eq!(ctx.commits[0].message, "second commit");
assert_eq!(ctx.commits[1].message, "initial commit");
assert!(
ctx.contributors.iter().any(|c| c.name == "Test User"),
"expected 'Test User' contributor"
);
let contributor = ctx
.contributors
.iter()
.find(|c| c.name == "Test User")
.unwrap();
assert_eq!(contributor.commit_count, 2);
let file_txt = ctx.file_churn.iter().find(|f| f.path == "file.txt");
assert!(file_txt.is_some(), "file.txt should appear in churn list");
assert_eq!(file_txt.unwrap().commit_count, 2);
let another = ctx.file_churn.iter().find(|f| f.path == "another.txt");
assert!(another.is_some(), "another.txt should appear in churn list");
assert_eq!(another.unwrap().commit_count, 1);
for commit in &ctx.commits {
assert_eq!(commit.date.len(), 10, "date '{}' wrong length", commit.date);
let parts: Vec<&str> = commit.date.split('-').collect();
assert_eq!(parts.len(), 3, "date '{}' missing dashes", commit.date);
}
}
#[test]
fn test_empty_repo_no_commits() {
let dir = tempfile::TempDir::new().unwrap();
let _repo = git2::Repository::init(dir.path()).unwrap();
let result = extract_git_context(dir.path(), 100);
assert!(result.is_err(), "expected error for repo with no commits");
}
#[test]
fn test_single_commit() {
let dir = tempfile::TempDir::new().unwrap();
let repo = git2::Repository::init(dir.path()).unwrap();
let sig = git2::Signature::now("Alice", "alice@test.com").unwrap();
make_commit(&repo, &sig, "first", &[("hello.txt", "hi")], None);
let ctx = extract_git_context(dir.path(), 100).unwrap();
assert_eq!(ctx.commits.len(), 1);
assert_eq!(ctx.commits[0].message, "first");
assert_eq!(ctx.contributors.len(), 1);
assert_eq!(ctx.contributors[0].name, "Alice");
}
#[test]
fn test_max_commits_limit() {
let dir = tempfile::TempDir::new().unwrap();
let repo = git2::Repository::init(dir.path()).unwrap();
let sig = git2::Signature::now("Test", "t@t.com").unwrap();
let c1 = make_commit(&repo, &sig, "c1", &[("a.txt", "1")], None);
let c2 = make_commit(&repo, &sig, "c2", &[("a.txt", "2")], Some(c1));
let _c3 = make_commit(&repo, &sig, "c3", &[("a.txt", "3")], Some(c2));
let ctx = extract_git_context(dir.path(), 2).unwrap();
assert_eq!(ctx.commits.len(), 2);
assert_eq!(ctx.commits[0].message, "c3");
}
#[test]
fn test_format_date() {
assert_eq!(format_date(0), "1970-01-01");
assert_eq!(format_date(-1), "1970-01-01");
assert_eq!(format_date(1_700_000_000), "2023-11-14");
}
#[test]
fn test_multiple_contributors() {
let dir = tempfile::TempDir::new().unwrap();
let repo = git2::Repository::init(dir.path()).unwrap();
let alice = git2::Signature::now("Alice", "alice@test.com").unwrap();
let bob = git2::Signature::now("Bob", "bob@test.com").unwrap();
let c1 = make_commit(&repo, &alice, "by alice", &[("a.txt", "a")], None);
let _c2 = make_commit(&repo, &bob, "by bob", &[("b.txt", "b")], Some(c1));
let ctx = extract_git_context(dir.path(), 100).unwrap();
assert_eq!(ctx.contributors.len(), 2);
}
#[test]
fn test_file_churn_sorted() {
let dir = tempfile::TempDir::new().unwrap();
let repo = git2::Repository::init(dir.path()).unwrap();
let sig = git2::Signature::now("Test", "t@t.com").unwrap();
let c1 = make_commit(
&repo,
&sig,
"c1",
&[("hot.txt", "1"), ("cold.txt", "1")],
None,
);
let c2 = make_commit(&repo, &sig, "c2", &[("hot.txt", "2")], Some(c1));
let _c3 = make_commit(&repo, &sig, "c3", &[("hot.txt", "3")], Some(c2));
let ctx = extract_git_context(dir.path(), 100).unwrap();
assert_eq!(ctx.file_churn[0].path, "hot.txt");
assert_eq!(ctx.file_churn[0].commit_count, 3);
}
#[test]
fn test_not_a_git_repo() {
let dir = tempfile::TempDir::new().unwrap();
let result = extract_git_context(dir.path(), 100);
assert!(result.is_err());
}
}