use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CommitMeta {
pub id: String,
pub subject: String,
pub files_changed: Vec<String>,
#[serde(default)]
pub cosmetic_only: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BlamedLine {
pub introducing_commit: String,
pub path: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DefectFixPair {
pub defect_commit: String,
pub fix_commit: String,
pub path: String,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct Corpus {
pub pairs: Vec<DefectFixPair>,
}
impl Corpus {
#[must_use]
pub const fn size(&self) -> usize {
self.pairs.len()
}
#[must_use]
pub const fn is_starved(&self, min_pairs: usize) -> bool {
self.size() < min_pairs
}
}
const FIX_KEYWORDS: [&str; 5] = ["fix", "bug", "patch", "revert", "hotfix"];
#[must_use]
pub fn is_fix_commit(meta: &CommitMeta) -> bool {
if meta.cosmetic_only {
return false;
}
let subject = meta.subject.to_lowercase();
let has_keyword = FIX_KEYWORDS.iter().any(|kw| subject.contains(kw));
has_keyword || references_issue(&meta.subject)
}
fn references_issue(subject: &str) -> bool {
let bytes = subject.as_bytes();
bytes
.iter()
.enumerate()
.any(|(i, &b)| b == b'#' && bytes.get(i + 1).is_some_and(u8::is_ascii_digit))
}
pub fn mine<'a, F>(commits: &'a [CommitMeta], blame_of: F) -> Corpus
where F: Fn(&'a str) -> &'a [BlamedLine] {
let mut pairs: Vec<DefectFixPair> = Vec::new();
for fix in commits.iter().filter(|c| is_fix_commit(c)) {
for line in blame_of(&fix.id) {
if line.introducing_commit == fix.id {
continue;
}
let pair = DefectFixPair {
defect_commit: line.introducing_commit.clone(),
fix_commit: fix.id.clone(),
path: line.path.clone(),
};
if !pairs.contains(&pair) {
pairs.push(pair);
}
}
}
Corpus { pairs }
}
#[cfg(test)]
mod tests {
use super::*;
fn commit(id: &str, subject: &str) -> CommitMeta {
CommitMeta {
id: id.to_string(),
subject: subject.to_string(),
files_changed: vec!["src/lib.rs".to_string()],
cosmetic_only: false,
}
}
fn blamed(introducing: &str, path: &str) -> BlamedLine {
BlamedLine {
introducing_commit: introducing.to_string(),
path: path.to_string(),
}
}
#[test]
fn fix_keywords_classify_fix_commits() {
assert!(is_fix_commit(&commit("a", "fix: panic in Drop")));
assert!(is_fix_commit(&commit("b", "Fix the off-by-one")));
assert!(is_fix_commit(&commit("c", "bug: wrong index")));
assert!(is_fix_commit(&commit("d", "revert the bad merge")));
assert!(is_fix_commit(&commit("e", "hotfix for prod")));
assert!(is_fix_commit(&commit("f", "patch the leak")));
}
#[test]
fn non_fix_commits_are_not_classified() {
assert!(!is_fix_commit(&commit("a", "feat: add the affinity type")));
assert!(!is_fix_commit(&commit("b", "docs: clarify the README")));
assert!(!is_fix_commit(&commit("c", "refactor module layout")));
}
#[test]
fn issue_reference_classifies_a_fix() {
assert!(is_fix_commit(&commit("a", "close #1234")));
assert!(is_fix_commit(&commit("b", "resolves #42 cleanly")));
assert!(!is_fix_commit(&commit("c", "# Heading, not an issue")));
}
#[test]
fn cosmetic_only_commit_is_filtered_even_if_subject_says_fix() {
let mut c = commit("a", "fix formatting");
c.cosmetic_only = true;
assert!(!is_fix_commit(&c));
}
#[test]
fn mines_defect_fix_pairs_from_classified_fixes_and_blame() {
let commits = [
commit("d1", "feat: introduce the guard"), commit("f1", "fix: guard panics on Drop"), commit("x", "docs: unrelated"), ];
let f1_blame = [blamed("d1", "src/lib.rs"), blamed("d1", "src/lib.rs")];
let corpus = mine(&commits, |id| if id == "f1" { &f1_blame[..] } else { &[] });
assert_eq!(corpus.size(), 1);
assert_eq!(
corpus.pairs[0],
DefectFixPair {
defect_commit: "d1".to_string(),
fix_commit: "f1".to_string(),
path: "src/lib.rs".to_string(),
}
);
}
#[test]
fn a_fix_blaming_itself_yields_no_pair() {
let commits = [commit("f1", "fix: add a brand-new guard line")];
let self_blame = [blamed("f1", "src/lib.rs")];
let corpus = mine(
&commits,
|id| if id == "f1" { &self_blame[..] } else { &[] },
);
assert_eq!(corpus.size(), 0);
}
#[test]
fn a_non_fix_commit_with_blame_contributes_nothing() {
let commits = [commit("feat1", "feat: shiny new module")];
let blame = [blamed("d0", "src/lib.rs")];
let corpus = mine(&commits, |id| if id == "feat1" { &blame[..] } else { &[] });
assert_eq!(corpus.size(), 0);
}
#[test]
fn size_is_the_measured_count_never_baked() {
let corpus = Corpus {
pairs: vec![
DefectFixPair {
defect_commit: "d1".into(),
fix_commit: "f1".into(),
path: "a.rs".into(),
},
DefectFixPair {
defect_commit: "d2".into(),
fix_commit: "f2".into(),
path: "b.rs".into(),
},
],
};
assert_eq!(corpus.size(), 2);
}
#[test]
fn is_starved_is_the_tip_revwalk_tripwire() {
let empty = Corpus::default();
assert!(empty.is_starved(5));
let healthy = Corpus {
pairs: (0..10)
.map(|i| DefectFixPair {
defect_commit: format!("d{i}"),
fix_commit: format!("f{i}"),
path: "src/lib.rs".into(),
})
.collect(),
};
assert!(!healthy.is_starved(5));
}
#[test]
fn corpus_serde_roundtrips() {
let corpus = Corpus {
pairs: vec![DefectFixPair {
defect_commit: "d1".into(),
fix_commit: "f1".into(),
path: "src/lib.rs".into(),
}],
};
let json = serde_json::to_string(&corpus).expect("serialize");
let back: Corpus = serde_json::from_str(&json).expect("deserialize");
assert_eq!(corpus, back);
}
}