1use regex::Regex;
13use serde::{Deserialize, Serialize};
14
15#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)]
19pub struct Artifacts {
20 #[serde(default, skip_serializing_if = "Vec::is_empty")]
21 pub commit_hashes: Vec<String>,
22 #[serde(default, skip_serializing_if = "Vec::is_empty")]
23 pub pr_urls: Vec<String>,
24 #[serde(default, skip_serializing_if = "Vec::is_empty")]
25 pub linked_issues: Vec<String>,
26 #[serde(default, skip_serializing_if = "Vec::is_empty")]
27 pub files: Vec<String>,
28 #[serde(default, skip_serializing_if = "Vec::is_empty")]
29 pub branch_names: Vec<String>,
30}
31
32impl Artifacts {
33 pub fn is_empty(&self) -> bool {
34 self.commit_hashes.is_empty()
35 && self.pr_urls.is_empty()
36 && self.linked_issues.is_empty()
37 && self.files.is_empty()
38 && self.branch_names.is_empty()
39 }
40
41 pub fn merge(&mut self, other: Artifacts) {
44 for (dst, src) in [
45 (&mut self.commit_hashes, other.commit_hashes),
46 (&mut self.pr_urls, other.pr_urls),
47 (&mut self.linked_issues, other.linked_issues),
48 (&mut self.files, other.files),
49 (&mut self.branch_names, other.branch_names),
50 ] {
51 for s in src {
52 if !dst.iter().any(|x| x == &s) {
53 dst.push(s);
54 }
55 }
56 }
57 }
58}
59
60pub fn extract(text: &str) -> Artifacts {
63 let mut a = Artifacts::default();
64
65 static_re(
70 r"\b[0-9a-f]{7,40}\b",
71 |m| {
72 if m.chars().all(|c| c.is_ascii_digit()) {
75 return;
76 }
77 a.commit_hashes.push(m.to_string());
78 },
79 text,
80 );
81
82 static_re(
84 r"https?://[A-Za-z0-9.\-]+/[A-Za-z0-9_./\-]+/(?:pull|merge_requests)/\d+",
85 |m| a.pr_urls.push(m.to_string()),
86 text,
87 );
88
89 static_re(
92 r"\b[A-Z]{2,}-\d+\b",
93 |m| a.linked_issues.push(m.to_string()),
94 text,
95 );
96
97 static_re(
104 r"(?:\./|\.?[A-Za-z0-9_\-]+/)+[A-Za-z0-9_.\-]+\.[A-Za-z0-9]{1,8}\b",
105 |m| a.files.push(m.to_string()),
106 text,
107 );
108
109 if let Ok(re) =
116 Regex::new(r"\bgit\s+(?:checkout\s+-b|switch\s+-c|branch)\s+([A-Za-z0-9._/\-]+)")
117 {
118 for cap in re.captures_iter(text) {
119 if let Some(m) = cap.get(1) {
120 a.branch_names.push(m.as_str().to_string());
121 }
122 }
123 }
124
125 dedup(&mut a.commit_hashes);
127 dedup(&mut a.pr_urls);
128 dedup(&mut a.linked_issues);
129 dedup(&mut a.files);
130 dedup(&mut a.branch_names);
131 a
132}
133
134fn dedup(v: &mut Vec<String>) {
135 let mut seen = std::collections::HashSet::new();
136 v.retain(|x| seen.insert(x.clone()));
137}
138
139fn static_re(pat: &str, mut f: impl FnMut(&str), text: &str) {
140 if let Ok(re) = Regex::new(pat) {
141 for m in re.find_iter(text) {
142 f(m.as_str());
143 }
144 }
145}
146
147#[cfg(test)]
148mod tests {
149 use super::*;
150
151 #[test]
152 fn extracts_commit_hash() {
153 let a = extract("fixed in commit abc1234 and 9012abcdef");
154 assert_eq!(a.commit_hashes, vec!["abc1234", "9012abcdef"]);
155 }
156
157 #[test]
158 fn rejects_all_digit_commit_lookalikes() {
159 let a = extract("ran tests on port 12345 in 2026");
161 assert!(a.commit_hashes.is_empty());
162 }
163
164 #[test]
165 fn extracts_github_pr_url() {
166 let a = extract("see https://github.com/Digital-Threads/Task-Journal/pull/42");
167 assert_eq!(
168 a.pr_urls,
169 vec!["https://github.com/Digital-Threads/Task-Journal/pull/42"]
170 );
171 }
172
173 #[test]
174 fn extracts_linked_issues() {
175 let a = extract("FIN-868 references JIRA-12345 and INC-7");
176 assert_eq!(a.linked_issues, vec!["FIN-868", "JIRA-12345", "INC-7"]);
177 }
178
179 #[test]
180 fn extracts_file_paths() {
181 let a = extract("edited crates/tj-core/src/db.rs and ./README.md");
182 assert!(a.files.contains(&"crates/tj-core/src/db.rs".to_string()));
183 assert!(a.files.contains(&"./README.md".to_string()));
184 }
185
186 #[test]
187 fn extracts_dot_prefixed_dirs() {
188 let a = extract("see .docs/specs/auth.md and .github/workflows/ci.yml");
192 assert!(a.files.contains(&".docs/specs/auth.md".to_string()));
193 assert!(a.files.contains(&".github/workflows/ci.yml".to_string()));
194 }
195
196 #[test]
197 fn extracts_branch_names() {
198 let a = extract("git checkout -b FIN-868-fix-paygate-fee then git switch -c hotfix/abc");
203 assert_eq!(
204 a.branch_names,
205 vec!["FIN-868-fix-paygate-fee", "hotfix/abc"]
206 );
207 }
208
209 #[test]
210 fn does_not_capture_branch_from_prose() {
211 let a =
215 extract("Artifacts groups: commits, PRs, issues, files, branches names listed below");
216 assert!(
217 a.branch_names.is_empty(),
218 "regex must not pick up branches from prose, got: {:?}",
219 a.branch_names
220 );
221 }
222
223 #[test]
224 fn merge_dedupes() {
225 let mut a = Artifacts {
226 commit_hashes: vec!["abc1234".into()],
227 ..Default::default()
228 };
229 let b = Artifacts {
230 commit_hashes: vec!["abc1234".into(), "def5678".into()],
231 ..Default::default()
232 };
233 a.merge(b);
234 assert_eq!(a.commit_hashes, vec!["abc1234", "def5678"]);
235 }
236
237 #[test]
238 fn empty_text_yields_empty_artifacts() {
239 let a = extract("");
240 assert!(a.is_empty());
241 }
242
243 #[test]
244 fn json_round_trip() {
245 let a = Artifacts {
246 commit_hashes: vec!["abc1234".into()],
247 linked_issues: vec!["FIN-868".into()],
248 ..Default::default()
249 };
250 let s = serde_json::to_string(&a).unwrap();
251 let b: Artifacts = serde_json::from_str(&s).unwrap();
252 assert_eq!(a, b);
253 }
254}