use std::path::Path;
use antigen::learn::szz::{CommitMeta, Corpus, DefectFixPair, is_fix_commit};
#[derive(Debug)]
pub enum MineError {
Open(String),
Walk(String),
}
impl std::fmt::Display for MineError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Open(p) => write!(f, "could not open git repository: {p}"),
Self::Walk(c) => write!(f, "git object-graph walk failed: {c}"),
}
}
}
impl std::error::Error for MineError {}
pub fn mine_repo(repo_path: &Path) -> Result<Corpus, MineError> {
let repo = gix::open(repo_path).map_err(|e| MineError::Open(e.to_string()))?;
let mut tips: Vec<gix::ObjectId> = Vec::new();
let refs = repo
.references()
.map_err(|e| MineError::Walk(e.to_string()))?;
let all = refs.all().map_err(|e| MineError::Walk(e.to_string()))?;
for r in all {
let mut r = r.map_err(|e| MineError::Walk(e.to_string()))?;
let peeled = r.peel_to_id().map_err(|e| MineError::Walk(e.to_string()))?;
tips.push(peeled.detach());
}
let walk = repo
.rev_walk(tips)
.all()
.map_err(|e| MineError::Walk(e.to_string()))?;
let mut pairs: Vec<DefectFixPair> = Vec::new();
for info in walk {
let info = info.map_err(|e| MineError::Walk(e.to_string()))?;
let commit = repo
.find_commit(info.id)
.map_err(|e| MineError::Walk(e.to_string()))?;
let subject = commit
.message()
.map_err(|e| MineError::Walk(e.to_string()))?
.summary()
.to_string();
let meta = CommitMeta {
id: info.id.to_string(),
subject,
files_changed: Vec::new(),
cosmetic_only: false,
};
if !is_fix_commit(&meta) {
continue;
}
let Some(parent) = info.parent_ids.first() else {
continue;
};
let pair = DefectFixPair {
defect_commit: parent.to_string(),
fix_commit: meta.id,
path: String::new(),
};
if !pairs.contains(&pair) {
pairs.push(pair);
}
}
Ok(Corpus { pairs })
}
#[cfg(test)]
mod tests {
use super::*;
fn find_repo_root(start: &Path) -> Option<std::path::PathBuf> {
start
.ancestors()
.find(|p| p.join(".git").exists())
.map(Path::to_path_buf)
}
#[test]
fn mine_repo_mines_a_real_corpus_from_this_repo_git() {
let manifest = Path::new(env!("CARGO_MANIFEST_DIR"));
let Some(root) = find_repo_root(manifest) else {
return;
};
let corpus = mine_repo(&root).expect("mining this repo's .git must not error");
assert!(
corpus.size() > 0,
"full-object-graph walk of a repo with fix-history must mine a non-empty \
corpus (size 0 ⇒ a tip-revwalk regression — the starvation bug)"
);
for pair in &corpus.pairs {
assert!(!pair.fix_commit.is_empty(), "fix commit id must be present");
assert!(
!pair.defect_commit.is_empty(),
"defect (parent) id must be present"
);
assert_ne!(
pair.fix_commit, pair.defect_commit,
"a fix and its parent are distinct commits"
);
}
}
#[test]
fn mine_repo_errors_cleanly_on_a_non_repo() {
let not_a_repo = std::env::temp_dir().join("antigen-szz-definitely-not-a-git-repo-xyz");
assert!(matches!(mine_repo(¬_a_repo), Err(MineError::Open(_))));
}
fn git(dir: &Path, args: &[&str]) {
let status = std::process::Command::new("git")
.current_dir(dir)
.args(args)
.status()
.expect("git must be on PATH for this fixture");
assert!(status.success(), "git {args:?} failed in {}", dir.display());
}
#[test]
fn mine_repo_abandons_on_an_unpeelable_ref_never_silently_drops() {
let tmp = tempfile::tempdir().expect("tempdir");
let repo = tmp.path();
git(repo, &["init", "-q", "-b", "main"]);
git(repo, &["config", "user.email", "t@t.t"]);
git(repo, &["config", "user.name", "t"]);
std::fs::write(repo.join("f.txt"), "a").unwrap();
git(repo, &["add", "f.txt"]);
git(repo, &["commit", "-qm", "fix: a panic in Drop"]);
std::fs::write(
repo.join(".git/refs/heads/dangling"),
"0000000000000000000000000000000000000001\n",
)
.unwrap();
let result = mine_repo(repo);
assert!(
matches!(result, Err(MineError::Walk(_))),
"ATK-DEEPCOMB-SZZ-1: an unpeelable (dangling) ref must ABANDON the walk with \
MineError::Walk — abandon-don't-truncate (the function's own contract). \
Silently dropping it excludes its ancestry from the --all seed and \
under-reports Corpus::size (the tip-revwalk starvation the seed prevents). \
Got: {result:?}",
);
}
}