use regex::Regex;
use serde::{Deserialize, Serialize};
#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Artifacts {
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub commit_hashes: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub pr_urls: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub linked_issues: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub files: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub branch_names: Vec<String>,
}
impl Artifacts {
pub fn is_empty(&self) -> bool {
self.commit_hashes.is_empty()
&& self.pr_urls.is_empty()
&& self.linked_issues.is_empty()
&& self.files.is_empty()
&& self.branch_names.is_empty()
}
pub fn merge(&mut self, other: Artifacts) {
for (dst, src) in [
(&mut self.commit_hashes, other.commit_hashes),
(&mut self.pr_urls, other.pr_urls),
(&mut self.linked_issues, other.linked_issues),
(&mut self.files, other.files),
(&mut self.branch_names, other.branch_names),
] {
for s in src {
if !dst.iter().any(|x| x == &s) {
dst.push(s);
}
}
}
}
}
pub fn extract(text: &str) -> Artifacts {
let mut a = Artifacts::default();
static_re(
r"\b[0-9a-f]{7,40}\b",
|m| {
if m.chars().all(|c| c.is_ascii_digit()) {
return;
}
a.commit_hashes.push(m.to_string());
},
text,
);
static_re(
r"https?://[A-Za-z0-9.\-]+/[A-Za-z0-9_./\-]+/(?:pull|merge_requests)/\d+",
|m| a.pr_urls.push(m.to_string()),
text,
);
static_re(
r"\b[A-Z]{2,}-\d+\b",
|m| a.linked_issues.push(m.to_string()),
text,
);
static_re(
r"(?:\./|\.?[A-Za-z0-9_\-]+/)+[A-Za-z0-9_.\-]+\.[A-Za-z0-9]{1,8}\b",
|m| a.files.push(m.to_string()),
text,
);
static_re(
r"(?:checkout -b|switch -c|branch)\s+([A-Za-z0-9._/\-]+)",
|_full| {},
text,
);
if let Ok(re) = Regex::new(r"(?:checkout -b|switch -c|branch)\s+([A-Za-z0-9._/\-]+)") {
for cap in re.captures_iter(text) {
if let Some(m) = cap.get(1) {
a.branch_names.push(m.as_str().to_string());
}
}
}
dedup(&mut a.commit_hashes);
dedup(&mut a.pr_urls);
dedup(&mut a.linked_issues);
dedup(&mut a.files);
dedup(&mut a.branch_names);
a
}
fn dedup(v: &mut Vec<String>) {
let mut seen = std::collections::HashSet::new();
v.retain(|x| seen.insert(x.clone()));
}
fn static_re(pat: &str, mut f: impl FnMut(&str), text: &str) {
if let Ok(re) = Regex::new(pat) {
for m in re.find_iter(text) {
f(m.as_str());
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extracts_commit_hash() {
let a = extract("fixed in commit abc1234 and 9012abcdef");
assert_eq!(a.commit_hashes, vec!["abc1234", "9012abcdef"]);
}
#[test]
fn rejects_all_digit_commit_lookalikes() {
let a = extract("ran tests on port 12345 in 2026");
assert!(a.commit_hashes.is_empty());
}
#[test]
fn extracts_github_pr_url() {
let a = extract("see https://github.com/Digital-Threads/Task-Journal/pull/42");
assert_eq!(
a.pr_urls,
vec!["https://github.com/Digital-Threads/Task-Journal/pull/42"]
);
}
#[test]
fn extracts_linked_issues() {
let a = extract("FIN-868 references JIRA-12345 and INC-7");
assert_eq!(a.linked_issues, vec!["FIN-868", "JIRA-12345", "INC-7"]);
}
#[test]
fn extracts_file_paths() {
let a = extract("edited crates/tj-core/src/db.rs and ./README.md");
assert!(a.files.contains(&"crates/tj-core/src/db.rs".to_string()));
assert!(a.files.contains(&"./README.md".to_string()));
}
#[test]
fn extracts_dot_prefixed_dirs() {
let a = extract("see .docs/specs/auth.md and .github/workflows/ci.yml");
assert!(a.files.contains(&".docs/specs/auth.md".to_string()));
assert!(a.files.contains(&".github/workflows/ci.yml".to_string()));
}
#[test]
fn extracts_branch_names() {
let a = extract("git checkout -b FIN-868-fix-paygate-fee then switch -c hotfix/abc");
assert_eq!(
a.branch_names,
vec!["FIN-868-fix-paygate-fee", "hotfix/abc"]
);
}
#[test]
fn merge_dedupes() {
let mut a = Artifacts {
commit_hashes: vec!["abc1234".into()],
..Default::default()
};
let b = Artifacts {
commit_hashes: vec!["abc1234".into(), "def5678".into()],
..Default::default()
};
a.merge(b);
assert_eq!(a.commit_hashes, vec!["abc1234", "def5678"]);
}
#[test]
fn empty_text_yields_empty_artifacts() {
let a = extract("");
assert!(a.is_empty());
}
#[test]
fn json_round_trip() {
let a = Artifacts {
commit_hashes: vec!["abc1234".into()],
linked_issues: vec!["FIN-868".into()],
..Default::default()
};
let s = serde_json::to_string(&a).unwrap();
let b: Artifacts = serde_json::from_str(&s).unwrap();
assert_eq!(a, b);
}
}