1use anyhow::Result;
9use std::collections::{HashMap, HashSet};
10use std::path::Path;
11use std::process::Command;
12
13#[derive(Debug, Clone)]
15pub struct Attribution {
16 pub responsible_tasks: Vec<String>,
18 pub cleared_tasks: Vec<String>,
20 pub confidence: AttributionConfidence,
22 pub evidence: Vec<AttributionEvidence>,
24}
25
26#[derive(Debug, Clone, PartialEq)]
27pub enum AttributionConfidence {
28 High,
30 Medium,
32 Low,
34}
35
36#[derive(Debug, Clone)]
37pub struct AttributionEvidence {
38 pub file: String,
39 pub line: Option<u32>,
40 pub task_id: Option<String>,
41 pub commit_sha: Option<String>,
42 pub error_snippet: String,
43}
44
45pub fn parse_error_locations(stderr: &str, stdout: &str) -> Vec<(String, Option<u32>)> {
47 let mut locations = Vec::new();
48 let combined = format!("{}\n{}", stderr, stdout);
49
50 let patterns = [
58 r"(?:-->|error\[.*?\]:)\s+([^:\s]+):(\d+)", r"([^\s(]+)\((\d+),\d+\):", r"([^\s:]+):(\d+):\d+:", r#"File "([^"]+)", line (\d+)"#, r"([^\s:]+):(\d+)", ];
64
65 for pattern in patterns {
66 if let Ok(re) = regex::Regex::new(pattern) {
67 for cap in re.captures_iter(&combined) {
68 if let (Some(file), Some(line)) = (cap.get(1), cap.get(2)) {
69 let file_str = file.as_str().to_string();
70 let line_num = line.as_str().parse::<u32>().ok();
71 if !locations.iter().any(|(f, _)| f == &file_str) {
72 locations.push((file_str, line_num));
73 }
74 }
75 }
76 }
77 }
78
79 locations
80}
81
82pub fn extract_task_id_from_commit(message: &str) -> Option<String> {
84 let re = regex::Regex::new(r"\[([^\]]+)\]").ok()?;
85 re.captures(message)
86 .and_then(|cap| cap.get(1))
87 .map(|m| m.as_str().to_string())
88}
89
90pub fn blame_line(working_dir: &Path, file: &str, line: u32) -> Result<Option<String>> {
92 let output = Command::new("git")
93 .current_dir(working_dir)
94 .args([
95 "blame",
96 "-L",
97 &format!("{},{}", line, line),
98 "--porcelain",
99 file,
100 ])
101 .output()?;
102
103 if !output.status.success() {
104 return Ok(None);
105 }
106
107 let blame_output = String::from_utf8_lossy(&output.stdout);
108
109 for blame_line in blame_output.lines() {
111 if blame_line.starts_with("summary ") {
112 let message = blame_line.strip_prefix("summary ").unwrap_or("");
113 return Ok(extract_task_id_from_commit(message));
114 }
115 }
116
117 Ok(None)
118}
119
120pub fn get_task_commits(
122 working_dir: &Path,
123 start_commit: Option<&str>,
124) -> Result<HashMap<String, Vec<String>>> {
125 let range = match start_commit {
126 Some(commit) => format!("{}..HEAD", commit),
127 None => "HEAD~10..HEAD".to_string(),
128 };
129
130 let output = Command::new("git")
131 .current_dir(working_dir)
132 .args(["log", "--format=%H %s", &range])
133 .output()?;
134
135 let mut task_commits: HashMap<String, Vec<String>> = HashMap::new();
136
137 for line in String::from_utf8_lossy(&output.stdout).lines() {
138 let parts: Vec<&str> = line.splitn(2, ' ').collect();
139 if parts.len() == 2 {
140 let sha = parts[0].to_string();
141 let message = parts[1];
142 if let Some(task_id) = extract_task_id_from_commit(message) {
143 task_commits.entry(task_id).or_default().push(sha);
144 }
145 }
146 }
147
148 Ok(task_commits)
149}
150
151pub fn get_task_changed_files(
153 working_dir: &Path,
154 task_id: &str,
155 start_commit: Option<&str>,
156) -> Result<HashSet<String>> {
157 let task_commits = get_task_commits(working_dir, start_commit)?;
158 let mut files = HashSet::new();
159
160 if let Some(commits) = task_commits.get(task_id) {
161 for sha in commits {
162 let output = Command::new("git")
163 .current_dir(working_dir)
164 .args(["diff-tree", "--no-commit-id", "--name-only", "-r", sha])
165 .output()?;
166
167 for file in String::from_utf8_lossy(&output.stdout).lines() {
168 files.insert(file.to_string());
169 }
170 }
171 }
172
173 Ok(files)
174}
175
176pub fn attribute_failure(
178 working_dir: &Path,
179 stderr: &str,
180 stdout: &str,
181 wave_tasks: &[String],
182 start_commit: Option<&str>,
183) -> Result<Attribution> {
184 let mut evidence = Vec::new();
185 let mut responsible: HashSet<String> = HashSet::new();
186
187 let locations = parse_error_locations(stderr, stdout);
189
190 for (file, line_opt) in &locations {
192 let mut ev = AttributionEvidence {
193 file: file.clone(),
194 line: *line_opt,
195 task_id: None,
196 commit_sha: None,
197 error_snippet: String::new(),
198 };
199
200 if let Some(line) = line_opt {
201 if let Ok(Some(task_id)) = blame_line(working_dir, file, *line) {
202 if wave_tasks.contains(&task_id) {
203 responsible.insert(task_id.clone());
204 ev.task_id = Some(task_id);
205 }
206 }
207 }
208
209 evidence.push(ev);
210 }
211
212 if responsible.is_empty() && !locations.is_empty() {
214 let error_files: HashSet<String> = locations.iter().map(|(f, _)| f.clone()).collect();
215
216 for task_id in wave_tasks {
217 if let Ok(task_files) = get_task_changed_files(working_dir, task_id, start_commit) {
218 if !task_files.is_disjoint(&error_files) {
219 responsible.insert(task_id.clone());
220 }
221 }
222 }
223 }
224
225 let confidence = if responsible.len() == 1 {
226 AttributionConfidence::High
227 } else if !responsible.is_empty() {
228 AttributionConfidence::Medium
229 } else {
230 responsible.extend(wave_tasks.iter().cloned());
232 AttributionConfidence::Low
233 };
234
235 let cleared: Vec<String> = wave_tasks
236 .iter()
237 .filter(|t| !responsible.contains(*t))
238 .cloned()
239 .collect();
240
241 Ok(Attribution {
242 responsible_tasks: responsible.into_iter().collect(),
243 cleared_tasks: cleared,
244 confidence,
245 evidence,
246 })
247}
248
249#[cfg(test)]
250mod tests {
251 use super::*;
252
253 #[test]
254 fn test_extract_task_id_from_commit() {
255 assert_eq!(
256 extract_task_id_from_commit("[auth:1] Add login endpoint"),
257 Some("auth:1".to_string())
258 );
259 assert_eq!(
260 extract_task_id_from_commit("[TASK-123] Fix bug"),
261 Some("TASK-123".to_string())
262 );
263 assert_eq!(extract_task_id_from_commit("No task ID here"), None);
264 }
265
266 #[test]
267 fn test_parse_error_locations_rust() {
268 let stderr = r#"
269error[E0308]: mismatched types
270 --> src/main.rs:42:5
271 |
27242 | let x: i32 = "hello";
273 | ^^^^^^^ expected `i32`, found `&str`
274"#;
275 let locations = parse_error_locations(stderr, "");
276 assert!(!locations.is_empty());
277 assert!(locations
278 .iter()
279 .any(|(f, l)| f == "src/main.rs" && *l == Some(42)));
280 }
281
282 #[test]
283 fn test_parse_error_locations_python() {
284 let stderr = r#"
285Traceback (most recent call last):
286 File "script.py", line 10, in <module>
287 raise ValueError("test")
288ValueError: test
289"#;
290 let locations = parse_error_locations(stderr, "");
291 assert!(!locations.is_empty());
292 assert!(locations
293 .iter()
294 .any(|(f, l)| f == "script.py" && *l == Some(10)));
295 }
296
297 #[test]
298 fn test_parse_error_locations_go() {
299 let stderr = "./main.go:15:3: undefined: foo\n";
300 let locations = parse_error_locations(stderr, "");
301 assert!(!locations.is_empty());
302 assert!(locations
303 .iter()
304 .any(|(f, l)| f == "./main.go" && *l == Some(15)));
305 }
306
307 #[test]
308 fn test_parse_error_locations_empty() {
309 let locations = parse_error_locations("", "");
310 assert!(locations.is_empty());
311 }
312
313 #[test]
314 fn test_attribution_confidence() {
315 assert_eq!(AttributionConfidence::High, AttributionConfidence::High);
316 assert_ne!(AttributionConfidence::High, AttributionConfidence::Low);
317 }
318}
319
320#[cfg(test)]
321mod integration_tests {
322 use super::*;
323 use std::process::Command;
324 use tempfile::TempDir;
325
326 #[test]
328 fn test_blame_in_real_git_repo() {
329 let temp = TempDir::new().unwrap();
330 let repo_dir = temp.path();
331
332 Command::new("git")
334 .current_dir(repo_dir)
335 .args(["init"])
336 .output()
337 .unwrap();
338
339 Command::new("git")
341 .current_dir(repo_dir)
342 .args(["config", "user.email", "test@test.com"])
343 .output()
344 .unwrap();
345 Command::new("git")
346 .current_dir(repo_dir)
347 .args(["config", "user.name", "Test"])
348 .output()
349 .unwrap();
350
351 std::fs::write(repo_dir.join("test.rs"), "fn main() {}\n").unwrap();
353 Command::new("git")
354 .current_dir(repo_dir)
355 .args(["add", "test.rs"])
356 .output()
357 .unwrap();
358 Command::new("git")
359 .current_dir(repo_dir)
360 .args(["commit", "-m", "[auth:1] Initial commit"])
361 .output()
362 .unwrap();
363
364 let result = blame_line(repo_dir, "test.rs", 1).unwrap();
366 assert_eq!(result, Some("auth:1".to_string()));
367 }
368
369 #[test]
370 fn test_get_task_commits() {
371 let temp = TempDir::new().unwrap();
372 let repo_dir = temp.path();
373
374 Command::new("git")
376 .current_dir(repo_dir)
377 .args(["init"])
378 .output()
379 .unwrap();
380 Command::new("git")
381 .current_dir(repo_dir)
382 .args(["config", "user.email", "test@test.com"])
383 .output()
384 .unwrap();
385 Command::new("git")
386 .current_dir(repo_dir)
387 .args(["config", "user.name", "Test"])
388 .output()
389 .unwrap();
390
391 std::fs::write(repo_dir.join("init.txt"), "init").unwrap();
393 Command::new("git")
394 .current_dir(repo_dir)
395 .args(["add", "."])
396 .output()
397 .unwrap();
398 Command::new("git")
399 .current_dir(repo_dir)
400 .args(["commit", "-m", "Initial commit"])
401 .output()
402 .unwrap();
403
404 let init_sha = Command::new("git")
406 .current_dir(repo_dir)
407 .args(["rev-parse", "HEAD"])
408 .output()
409 .unwrap();
410 let init_sha = String::from_utf8_lossy(&init_sha.stdout).trim().to_string();
411
412 std::fs::write(repo_dir.join("a.txt"), "a").unwrap();
414 Command::new("git")
415 .current_dir(repo_dir)
416 .args(["add", "."])
417 .output()
418 .unwrap();
419 Command::new("git")
420 .current_dir(repo_dir)
421 .args(["commit", "-m", "[task:1] First"])
422 .output()
423 .unwrap();
424
425 std::fs::write(repo_dir.join("b.txt"), "b").unwrap();
426 Command::new("git")
427 .current_dir(repo_dir)
428 .args(["add", "."])
429 .output()
430 .unwrap();
431 Command::new("git")
432 .current_dir(repo_dir)
433 .args(["commit", "-m", "[task:2] Second"])
434 .output()
435 .unwrap();
436
437 std::fs::write(repo_dir.join("c.txt"), "c").unwrap();
438 Command::new("git")
439 .current_dir(repo_dir)
440 .args(["add", "."])
441 .output()
442 .unwrap();
443 Command::new("git")
444 .current_dir(repo_dir)
445 .args(["commit", "-m", "[task:1] More for task 1"])
446 .output()
447 .unwrap();
448
449 let task_commits = get_task_commits(repo_dir, Some(&init_sha)).unwrap();
451
452 assert_eq!(task_commits.get("task:1").map(|v| v.len()), Some(2));
454 assert_eq!(task_commits.get("task:2").map(|v| v.len()), Some(1));
456 }
457
458 #[test]
459 fn test_attribute_failure_with_git_repo() {
460 let temp = TempDir::new().unwrap();
461 let repo_dir = temp.path();
462
463 Command::new("git")
465 .current_dir(repo_dir)
466 .args(["init"])
467 .output()
468 .unwrap();
469 Command::new("git")
470 .current_dir(repo_dir)
471 .args(["config", "user.email", "test@test.com"])
472 .output()
473 .unwrap();
474 Command::new("git")
475 .current_dir(repo_dir)
476 .args(["config", "user.name", "Test"])
477 .output()
478 .unwrap();
479
480 std::fs::create_dir_all(repo_dir.join("src")).unwrap();
482 std::fs::write(
483 repo_dir.join("src/main.rs"),
484 "fn main() {\n let x: i32 = \"bad\";\n}\n",
485 )
486 .unwrap();
487 Command::new("git")
488 .current_dir(repo_dir)
489 .args(["add", "."])
490 .output()
491 .unwrap();
492 Command::new("git")
493 .current_dir(repo_dir)
494 .args(["commit", "-m", "[api:1] Add main file"])
495 .output()
496 .unwrap();
497
498 let stderr = r#"error[E0308]: mismatched types
500 --> src/main.rs:2:18
501 |
5022 | let x: i32 = "bad";
503 | ^^^^^ expected `i32`, found `&str`
504"#;
505
506 let wave_tasks = vec!["api:1".to_string(), "api:2".to_string()];
507 let attribution = attribute_failure(repo_dir, stderr, "", &wave_tasks, None).unwrap();
508
509 assert!(attribution.responsible_tasks.contains(&"api:1".to_string()));
511 assert!(attribution.cleared_tasks.contains(&"api:2".to_string()));
513 assert_eq!(attribution.confidence, AttributionConfidence::High);
515 }
516}