scud/
attribution.rs

1//! Failure attribution using git blame
2//!
3//! Maps validation errors to specific tasks by:
4//! 1. Parsing error output for file:line references
5//! 2. Using git blame to find which commit changed each line
6//! 3. Extracting task IDs from commit messages ([TASK-ID] prefix)
7
8use anyhow::Result;
9use std::collections::{HashMap, HashSet};
10use std::path::Path;
11use std::process::Command;
12
13/// Result of attributing a failure to tasks
14#[derive(Debug, Clone)]
15pub struct Attribution {
16    /// Task IDs that likely caused the failure
17    pub responsible_tasks: Vec<String>,
18    /// Task IDs that are probably not responsible
19    pub cleared_tasks: Vec<String>,
20    /// Whether attribution was definitive or uncertain
21    pub confidence: AttributionConfidence,
22    /// Raw evidence used for attribution
23    pub evidence: Vec<AttributionEvidence>,
24}
25
26#[derive(Debug, Clone, PartialEq)]
27pub enum AttributionConfidence {
28    /// Clear single task responsible
29    High,
30    /// Multiple tasks may be responsible
31    Medium,
32    /// Could not determine - all tasks suspect
33    Low,
34}
35
36#[derive(Debug, Clone)]
37pub struct AttributionEvidence {
38    pub file: String,
39    pub line: Option<u32>,
40    pub task_id: Option<String>,
41    pub commit_sha: Option<String>,
42    pub error_snippet: String,
43}
44
45/// Parse error output for file:line references
46pub fn parse_error_locations(stderr: &str, stdout: &str) -> Vec<(String, Option<u32>)> {
47    let mut locations = Vec::new();
48    let combined = format!("{}\n{}", stderr, stdout);
49
50    // Common patterns:
51    // Rust: --> src/main.rs:42:5
52    // TypeScript: src/index.ts(10,5): error
53    // Go: ./main.go:15:3:
54    // Python: File "script.py", line 10
55    // Generic: filename:line or filename:line:col
56
57    let patterns = [
58        r"(?:-->|error\[.*?\]:)\s+([^:\s]+):(\d+)", // Rust
59        r"([^\s(]+)\((\d+),\d+\):",                 // TypeScript
60        r"([^\s:]+):(\d+):\d+:",                    // Go/generic
61        r#"File "([^"]+)", line (\d+)"#,            // Python
62        r"([^\s:]+):(\d+)",                         // Generic fallback
63    ];
64
65    for pattern in patterns {
66        if let Ok(re) = regex::Regex::new(pattern) {
67            for cap in re.captures_iter(&combined) {
68                if let (Some(file), Some(line)) = (cap.get(1), cap.get(2)) {
69                    let file_str = file.as_str().to_string();
70                    let line_num = line.as_str().parse::<u32>().ok();
71                    if !locations.iter().any(|(f, _)| f == &file_str) {
72                        locations.push((file_str, line_num));
73                    }
74                }
75            }
76        }
77    }
78
79    locations
80}
81
82/// Get task ID from a commit message (looks for [TASK-ID] prefix)
83pub fn extract_task_id_from_commit(message: &str) -> Option<String> {
84    let re = regex::Regex::new(r"\[([^\]]+)\]").ok()?;
85    re.captures(message)
86        .and_then(|cap| cap.get(1))
87        .map(|m| m.as_str().to_string())
88}
89
90/// Use git blame to find which task changed a specific line
91pub fn blame_line(working_dir: &Path, file: &str, line: u32) -> Result<Option<String>> {
92    let output = Command::new("git")
93        .current_dir(working_dir)
94        .args([
95            "blame",
96            "-L",
97            &format!("{},{}", line, line),
98            "--porcelain",
99            file,
100        ])
101        .output()?;
102
103    if !output.status.success() {
104        return Ok(None);
105    }
106
107    let blame_output = String::from_utf8_lossy(&output.stdout);
108
109    // Look for "summary" line in porcelain output
110    for blame_line in blame_output.lines() {
111        if blame_line.starts_with("summary ") {
112            let message = blame_line.strip_prefix("summary ").unwrap_or("");
113            return Ok(extract_task_id_from_commit(message));
114        }
115    }
116
117    Ok(None)
118}
119
120/// Get all commits in range that match task ID pattern
121pub fn get_task_commits(
122    working_dir: &Path,
123    start_commit: Option<&str>,
124) -> Result<HashMap<String, Vec<String>>> {
125    let range = match start_commit {
126        Some(commit) => format!("{}..HEAD", commit),
127        None => "HEAD~10..HEAD".to_string(),
128    };
129
130    let output = Command::new("git")
131        .current_dir(working_dir)
132        .args(["log", "--format=%H %s", &range])
133        .output()?;
134
135    let mut task_commits: HashMap<String, Vec<String>> = HashMap::new();
136
137    for line in String::from_utf8_lossy(&output.stdout).lines() {
138        let parts: Vec<&str> = line.splitn(2, ' ').collect();
139        if parts.len() == 2 {
140            let sha = parts[0].to_string();
141            let message = parts[1];
142            if let Some(task_id) = extract_task_id_from_commit(message) {
143                task_commits.entry(task_id).or_default().push(sha);
144            }
145        }
146    }
147
148    Ok(task_commits)
149}
150
151/// Get files changed by a specific task (via its commits)
152pub fn get_task_changed_files(
153    working_dir: &Path,
154    task_id: &str,
155    start_commit: Option<&str>,
156) -> Result<HashSet<String>> {
157    let task_commits = get_task_commits(working_dir, start_commit)?;
158    let mut files = HashSet::new();
159
160    if let Some(commits) = task_commits.get(task_id) {
161        for sha in commits {
162            let output = Command::new("git")
163                .current_dir(working_dir)
164                .args(["diff-tree", "--no-commit-id", "--name-only", "-r", sha])
165                .output()?;
166
167            for file in String::from_utf8_lossy(&output.stdout).lines() {
168                files.insert(file.to_string());
169            }
170        }
171    }
172
173    Ok(files)
174}
175
176/// Main attribution function - attributes validation failure to tasks
177pub fn attribute_failure(
178    working_dir: &Path,
179    stderr: &str,
180    stdout: &str,
181    wave_tasks: &[String],
182    start_commit: Option<&str>,
183) -> Result<Attribution> {
184    let mut evidence = Vec::new();
185    let mut responsible: HashSet<String> = HashSet::new();
186
187    // Parse error locations
188    let locations = parse_error_locations(stderr, stdout);
189
190    // Try to blame each location
191    for (file, line_opt) in &locations {
192        let mut ev = AttributionEvidence {
193            file: file.clone(),
194            line: *line_opt,
195            task_id: None,
196            commit_sha: None,
197            error_snippet: String::new(),
198        };
199
200        if let Some(line) = line_opt {
201            if let Ok(Some(task_id)) = blame_line(working_dir, file, *line) {
202                if wave_tasks.contains(&task_id) {
203                    responsible.insert(task_id.clone());
204                    ev.task_id = Some(task_id);
205                }
206            }
207        }
208
209        evidence.push(ev);
210    }
211
212    // If no direct attribution, check which tasks touched error files
213    if responsible.is_empty() && !locations.is_empty() {
214        let error_files: HashSet<String> = locations.iter().map(|(f, _)| f.clone()).collect();
215
216        for task_id in wave_tasks {
217            if let Ok(task_files) = get_task_changed_files(working_dir, task_id, start_commit) {
218                if !task_files.is_disjoint(&error_files) {
219                    responsible.insert(task_id.clone());
220                }
221            }
222        }
223    }
224
225    let confidence = if responsible.len() == 1 {
226        AttributionConfidence::High
227    } else if !responsible.is_empty() {
228        AttributionConfidence::Medium
229    } else {
230        // Could not attribute - all tasks suspect
231        responsible.extend(wave_tasks.iter().cloned());
232        AttributionConfidence::Low
233    };
234
235    let cleared: Vec<String> = wave_tasks
236        .iter()
237        .filter(|t| !responsible.contains(*t))
238        .cloned()
239        .collect();
240
241    Ok(Attribution {
242        responsible_tasks: responsible.into_iter().collect(),
243        cleared_tasks: cleared,
244        confidence,
245        evidence,
246    })
247}
248
249#[cfg(test)]
250mod tests {
251    use super::*;
252
253    #[test]
254    fn test_extract_task_id_from_commit() {
255        assert_eq!(
256            extract_task_id_from_commit("[auth:1] Add login endpoint"),
257            Some("auth:1".to_string())
258        );
259        assert_eq!(
260            extract_task_id_from_commit("[TASK-123] Fix bug"),
261            Some("TASK-123".to_string())
262        );
263        assert_eq!(extract_task_id_from_commit("No task ID here"), None);
264    }
265
266    #[test]
267    fn test_parse_error_locations_rust() {
268        let stderr = r#"
269error[E0308]: mismatched types
270 --> src/main.rs:42:5
271  |
27242 |     let x: i32 = "hello";
273  |                  ^^^^^^^ expected `i32`, found `&str`
274"#;
275        let locations = parse_error_locations(stderr, "");
276        assert!(!locations.is_empty());
277        assert!(locations
278            .iter()
279            .any(|(f, l)| f == "src/main.rs" && *l == Some(42)));
280    }
281
282    #[test]
283    fn test_parse_error_locations_python() {
284        let stderr = r#"
285Traceback (most recent call last):
286  File "script.py", line 10, in <module>
287    raise ValueError("test")
288ValueError: test
289"#;
290        let locations = parse_error_locations(stderr, "");
291        assert!(!locations.is_empty());
292        assert!(locations
293            .iter()
294            .any(|(f, l)| f == "script.py" && *l == Some(10)));
295    }
296
297    #[test]
298    fn test_parse_error_locations_go() {
299        let stderr = "./main.go:15:3: undefined: foo\n";
300        let locations = parse_error_locations(stderr, "");
301        assert!(!locations.is_empty());
302        assert!(locations
303            .iter()
304            .any(|(f, l)| f == "./main.go" && *l == Some(15)));
305    }
306
307    #[test]
308    fn test_parse_error_locations_empty() {
309        let locations = parse_error_locations("", "");
310        assert!(locations.is_empty());
311    }
312
313    #[test]
314    fn test_attribution_confidence() {
315        assert_eq!(AttributionConfidence::High, AttributionConfidence::High);
316        assert_ne!(AttributionConfidence::High, AttributionConfidence::Low);
317    }
318}
319
320#[cfg(test)]
321mod integration_tests {
322    use super::*;
323    use std::process::Command;
324    use tempfile::TempDir;
325
326    /// Test with a real git repo to verify blame functionality
327    #[test]
328    fn test_blame_in_real_git_repo() {
329        let temp = TempDir::new().unwrap();
330        let repo_dir = temp.path();
331
332        // Initialize git repo
333        Command::new("git")
334            .current_dir(repo_dir)
335            .args(["init"])
336            .output()
337            .unwrap();
338
339        // Configure git user for commits
340        Command::new("git")
341            .current_dir(repo_dir)
342            .args(["config", "user.email", "test@test.com"])
343            .output()
344            .unwrap();
345        Command::new("git")
346            .current_dir(repo_dir)
347            .args(["config", "user.name", "Test"])
348            .output()
349            .unwrap();
350
351        // Create a file and commit with task ID
352        std::fs::write(repo_dir.join("test.rs"), "fn main() {}\n").unwrap();
353        Command::new("git")
354            .current_dir(repo_dir)
355            .args(["add", "test.rs"])
356            .output()
357            .unwrap();
358        Command::new("git")
359            .current_dir(repo_dir)
360            .args(["commit", "-m", "[auth:1] Initial commit"])
361            .output()
362            .unwrap();
363
364        // Now test blame_line
365        let result = blame_line(repo_dir, "test.rs", 1).unwrap();
366        assert_eq!(result, Some("auth:1".to_string()));
367    }
368
369    #[test]
370    fn test_get_task_commits() {
371        let temp = TempDir::new().unwrap();
372        let repo_dir = temp.path();
373
374        // Initialize git repo
375        Command::new("git")
376            .current_dir(repo_dir)
377            .args(["init"])
378            .output()
379            .unwrap();
380        Command::new("git")
381            .current_dir(repo_dir)
382            .args(["config", "user.email", "test@test.com"])
383            .output()
384            .unwrap();
385        Command::new("git")
386            .current_dir(repo_dir)
387            .args(["config", "user.name", "Test"])
388            .output()
389            .unwrap();
390
391        // Create initial commit (baseline, no task ID)
392        std::fs::write(repo_dir.join("init.txt"), "init").unwrap();
393        Command::new("git")
394            .current_dir(repo_dir)
395            .args(["add", "."])
396            .output()
397            .unwrap();
398        Command::new("git")
399            .current_dir(repo_dir)
400            .args(["commit", "-m", "Initial commit"])
401            .output()
402            .unwrap();
403
404        // Get SHA of initial commit to use as range start
405        let init_sha = Command::new("git")
406            .current_dir(repo_dir)
407            .args(["rev-parse", "HEAD"])
408            .output()
409            .unwrap();
410        let init_sha = String::from_utf8_lossy(&init_sha.stdout).trim().to_string();
411
412        // Create commits with different task IDs
413        std::fs::write(repo_dir.join("a.txt"), "a").unwrap();
414        Command::new("git")
415            .current_dir(repo_dir)
416            .args(["add", "."])
417            .output()
418            .unwrap();
419        Command::new("git")
420            .current_dir(repo_dir)
421            .args(["commit", "-m", "[task:1] First"])
422            .output()
423            .unwrap();
424
425        std::fs::write(repo_dir.join("b.txt"), "b").unwrap();
426        Command::new("git")
427            .current_dir(repo_dir)
428            .args(["add", "."])
429            .output()
430            .unwrap();
431        Command::new("git")
432            .current_dir(repo_dir)
433            .args(["commit", "-m", "[task:2] Second"])
434            .output()
435            .unwrap();
436
437        std::fs::write(repo_dir.join("c.txt"), "c").unwrap();
438        Command::new("git")
439            .current_dir(repo_dir)
440            .args(["add", "."])
441            .output()
442            .unwrap();
443        Command::new("git")
444            .current_dir(repo_dir)
445            .args(["commit", "-m", "[task:1] More for task 1"])
446            .output()
447            .unwrap();
448
449        // Now use the init commit as range start (excludes init, includes task commits)
450        let task_commits = get_task_commits(repo_dir, Some(&init_sha)).unwrap();
451
452        // task:1 should have 2 commits
453        assert_eq!(task_commits.get("task:1").map(|v| v.len()), Some(2));
454        // task:2 should have 1 commit
455        assert_eq!(task_commits.get("task:2").map(|v| v.len()), Some(1));
456    }
457
458    #[test]
459    fn test_attribute_failure_with_git_repo() {
460        let temp = TempDir::new().unwrap();
461        let repo_dir = temp.path();
462
463        // Initialize git repo
464        Command::new("git")
465            .current_dir(repo_dir)
466            .args(["init"])
467            .output()
468            .unwrap();
469        Command::new("git")
470            .current_dir(repo_dir)
471            .args(["config", "user.email", "test@test.com"])
472            .output()
473            .unwrap();
474        Command::new("git")
475            .current_dir(repo_dir)
476            .args(["config", "user.name", "Test"])
477            .output()
478            .unwrap();
479
480        // Create src directory FIRST, then write the file
481        std::fs::create_dir_all(repo_dir.join("src")).unwrap();
482        std::fs::write(
483            repo_dir.join("src/main.rs"),
484            "fn main() {\n    let x: i32 = \"bad\";\n}\n",
485        )
486        .unwrap();
487        Command::new("git")
488            .current_dir(repo_dir)
489            .args(["add", "."])
490            .output()
491            .unwrap();
492        Command::new("git")
493            .current_dir(repo_dir)
494            .args(["commit", "-m", "[api:1] Add main file"])
495            .output()
496            .unwrap();
497
498        // Simulate a compilation error pointing to line 2
499        let stderr = r#"error[E0308]: mismatched types
500 --> src/main.rs:2:18
501  |
5022 |     let x: i32 = "bad";
503  |                  ^^^^^ expected `i32`, found `&str`
504"#;
505
506        let wave_tasks = vec!["api:1".to_string(), "api:2".to_string()];
507        let attribution = attribute_failure(repo_dir, stderr, "", &wave_tasks, None).unwrap();
508
509        // api:1 should be responsible (it created line 2)
510        assert!(attribution.responsible_tasks.contains(&"api:1".to_string()));
511        // api:2 should be cleared
512        assert!(attribution.cleared_tasks.contains(&"api:2".to_string()));
513        // High confidence since we found a direct blame match
514        assert_eq!(attribution.confidence, AttributionConfidence::High);
515    }
516}