Skip to main content

crap_core/adapters/diff/
mod.rs

1//! Git diff adapter — shells out to `git diff` and parses unified diff output.
2
3use crate::domain::types::{CrapError, FileChangeKind, SourceSpan};
4use crate::ports::DiffPort;
5use regex::Regex;
6use std::collections::HashMap;
7use std::path::Path;
8use std::process::Command;
9use std::sync::LazyLock;
10
11#[derive(Default)]
12pub struct GitDiffAdapter;
13
14impl GitDiffAdapter {
15    pub fn new() -> Self {
16        Self
17    }
18}
19
20impl DiffPort for GitDiffAdapter {
21    fn changed_regions(
22        &self,
23        diff_ref: &str,
24        working_dir: &Path,
25        paths: &[String],
26    ) -> Result<HashMap<String, FileChangeKind>, CrapError> {
27        let output = Command::new("git")
28            .env_remove("GIT_DIR")
29            .env_remove("GIT_WORK_TREE")
30            .env_remove("GIT_INDEX_FILE")
31            .env("GIT_PAGER", "")
32            .current_dir(working_dir)
33            .args([
34                "diff",
35                "--unified=0",
36                "--no-prefix",
37                "--no-color",
38                "--diff-filter=ACMR",
39            ])
40            .arg(diff_ref)
41            .arg("--")
42            .args(paths)
43            .output()
44            .map_err(|e| CrapError::DiffCompute(format!("failed to run git diff: {e}")))?;
45
46        if !output.status.success() {
47            let stderr = String::from_utf8_lossy(&output.stderr);
48            return Err(CrapError::DiffCompute(stderr.trim().to_string()));
49        }
50
51        let stdout = String::from_utf8_lossy(&output.stdout);
52        Ok(parse_unified_diff(&stdout))
53    }
54}
55
56/// Regex anchored to the full hunk header structure.
57/// Captures the new-side start line and optional count from `+c,d` or `+c`.
58/// Handles all 4 formats: `@@ -a,b +c,d @@`, `@@ -a +c,d @@`,
59/// `@@ -a,b +c @@` (implicit count=1), `@@ -a +c @@`.
60static HUNK_RE: LazyLock<Regex> = LazyLock::new(|| {
61    Regex::new(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@").expect("hunk regex is valid")
62});
63
64#[derive(Default)]
65struct DiffParseState {
66    current_file: Option<String>,
67    is_new_file: bool,
68}
69
70enum DiffLine<'a> {
71    DiffHeader,
72    NewFileMode,
73    FilePath(&'a str),
74    Hunk(&'a str),
75    Other,
76}
77
78/// Parse unified diff output into a map of file path → change kind.
79fn parse_unified_diff(input: &str) -> HashMap<String, FileChangeKind> {
80    let mut result: HashMap<String, FileChangeKind> = HashMap::new();
81    let mut state = DiffParseState::default();
82
83    for line in input.lines() {
84        handle_diff_line(classify_diff_line(line), &mut state, &mut result);
85    }
86
87    result
88}
89
90fn classify_diff_line(line: &str) -> DiffLine<'_> {
91    if let Some(path) = line.strip_prefix("+++ ") {
92        DiffLine::FilePath(path)
93    } else if line.starts_with("new file mode") {
94        DiffLine::NewFileMode
95    } else if line.starts_with("diff --git") {
96        DiffLine::DiffHeader
97    } else if line.starts_with("@@ ") {
98        DiffLine::Hunk(line)
99    } else {
100        DiffLine::Other
101    }
102}
103
104fn handle_diff_line(
105    line: DiffLine<'_>,
106    state: &mut DiffParseState,
107    result: &mut HashMap<String, FileChangeKind>,
108) {
109    match line {
110        DiffLine::DiffHeader => state.is_new_file = false,
111        DiffLine::NewFileMode => state.is_new_file = true,
112        DiffLine::FilePath(path) => state.current_file = normalize_diff_path(path),
113        DiffLine::Hunk(header) => handle_hunk_line(header, state, result),
114        DiffLine::Other => {}
115    }
116}
117
118fn normalize_diff_path(path: &str) -> Option<String> {
119    if path == "/dev/null" {
120        None
121    } else {
122        Some(normalize_path(path))
123    }
124}
125
126fn handle_hunk_line(
127    header: &str,
128    state: &DiffParseState,
129    result: &mut HashMap<String, FileChangeKind>,
130) {
131    let Some(file) = state.current_file.as_ref() else {
132        return;
133    };
134
135    if result.get(file) == Some(&FileChangeKind::NewFile) {
136        return;
137    }
138
139    if state.is_new_file {
140        result.insert(file.clone(), FileChangeKind::NewFile);
141        return;
142    }
143
144    if let Some(span) = parse_hunk_header(header) {
145        append_modified_span(result, file, span);
146    }
147}
148
149fn append_modified_span(
150    result: &mut HashMap<String, FileChangeKind>,
151    file: &str,
152    span: SourceSpan,
153) {
154    result
155        .entry(file.to_owned())
156        .and_modify(|kind| {
157            if let FileChangeKind::Modified(spans) = kind {
158                spans.push(span);
159            }
160        })
161        .or_insert_with(|| FileChangeKind::Modified(vec![span]));
162}
163
164/// Parse a hunk header line to extract the new-side span.
165///
166/// Returns `None` for deletion-only hunks (count=0) and for unparseable headers.
167/// Unparseable headers (e.g., usize overflow on astronomical line numbers) are
168/// treated as skippable — the caller silently omits the hunk, same as deletion-only.
169/// This is an accepted risk: git produces well-formed output and line numbers
170/// that overflow usize are not practically possible.
171fn parse_hunk_header(line: &str) -> Option<SourceSpan> {
172    let caps = HUNK_RE.captures(line)?;
173    let start: usize = caps.get(1)?.as_str().parse().ok()?;
174    let count: usize = caps
175        .get(2)
176        .and_then(|m| m.as_str().parse().ok())
177        .unwrap_or(1);
178
179    if count == 0 {
180        return None; // Deletion-only hunk
181    }
182
183    Some(SourceSpan {
184        start_line: start,
185        end_line: start + count - 1,
186        // Diff hunks are line-based — no column data. `0` signals "unknown"
187        // so reporters that distinguish (e.g., SARIF) skip the column field.
188        start_column: 0,
189        end_column: 0,
190    })
191}
192
193/// Normalize path separators to forward slash.
194fn normalize_path(path: &str) -> String {
195    path.replace('\\', "/")
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    // ── parse_hunk_header ───────────────────────────────────────────
203
204    #[test]
205    fn hunk_standard_format() {
206        // @@ -a,b +c,d @@
207        let span = parse_hunk_header("@@ -10,5 +20,3 @@ fn foo()").unwrap();
208        assert_eq!(span.start_line, 20);
209        assert_eq!(span.end_line, 22);
210    }
211
212    #[test]
213    fn hunk_one_line_removed() {
214        // @@ -a +c,d @@
215        let span = parse_hunk_header("@@ -10 +20,3 @@").unwrap();
216        assert_eq!(span.start_line, 20);
217        assert_eq!(span.end_line, 22);
218    }
219
220    #[test]
221    fn hunk_implicit_count_one() {
222        // @@ -a,b +c @@ (count=1 implicit)
223        let span = parse_hunk_header("@@ -10,5 +20 @@").unwrap();
224        assert_eq!(span.start_line, 20);
225        assert_eq!(span.end_line, 20);
226    }
227
228    #[test]
229    fn hunk_both_count_one() {
230        // @@ -a +c @@ (both implicit count=1)
231        let span = parse_hunk_header("@@ -10 +20 @@").unwrap();
232        assert_eq!(span.start_line, 20);
233        assert_eq!(span.end_line, 20);
234    }
235
236    #[test]
237    fn hunk_deletion_only() {
238        // @@ -a,b +c,0 @@ → None
239        assert!(parse_hunk_header("@@ -10,3 +20,0 @@").is_none());
240    }
241
242    // ── parse_unified_diff ──────────────────────────────────────────
243
244    #[test]
245    fn parse_empty_input() {
246        let result = parse_unified_diff("");
247        assert!(result.is_empty());
248    }
249
250    #[test]
251    fn parse_modified_file_single_hunk() {
252        let diff = "\
253diff --git src/foo.rs src/foo.rs
254index abc..def 100644
255--- src/foo.rs
256+++ src/foo.rs
257@@ -10,3 +10,5 @@ fn existing()
258+    let x = 1;
259+    let y = 2;
260";
261        let result = parse_unified_diff(diff);
262        assert_eq!(result.len(), 1);
263        match &result["src/foo.rs"] {
264            FileChangeKind::Modified(spans) => {
265                assert_eq!(spans.len(), 1);
266                assert_eq!(spans[0].start_line, 10);
267                assert_eq!(spans[0].end_line, 14);
268            }
269            _ => panic!("expected Modified"),
270        }
271    }
272
273    #[test]
274    fn parse_modified_file_multiple_hunks() {
275        let diff = "\
276diff --git src/foo.rs src/foo.rs
277index abc..def 100644
278--- src/foo.rs
279+++ src/foo.rs
280@@ -5,0 +5,2 @@ fn first()
281+    new line 1
282+    new line 2
283@@ -20,0 +22,1 @@ fn second()
284+    another line
285";
286        let result = parse_unified_diff(diff);
287        match &result["src/foo.rs"] {
288            FileChangeKind::Modified(spans) => {
289                assert_eq!(spans.len(), 2);
290                assert_eq!(spans[0].start_line, 5);
291                assert_eq!(spans[0].end_line, 6);
292                assert_eq!(spans[1].start_line, 22);
293                assert_eq!(spans[1].end_line, 22);
294            }
295            _ => panic!("expected Modified"),
296        }
297    }
298
299    #[test]
300    fn parse_new_file() {
301        let diff = "\
302diff --git src/new.rs src/new.rs
303new file mode 100644
304index 0000000..abc1234
305--- /dev/null
306+++ src/new.rs
307@@ -0,0 +1,10 @@
308+fn hello() {}
309";
310        let result = parse_unified_diff(diff);
311        assert_eq!(result["src/new.rs"], FileChangeKind::NewFile);
312    }
313
314    #[test]
315    fn parse_multiple_files() {
316        let diff = "\
317diff --git src/a.rs src/a.rs
318new file mode 100644
319index 0000000..abc
320--- /dev/null
321+++ src/a.rs
322@@ -0,0 +1,5 @@
323+content
324diff --git src/b.rs src/b.rs
325index abc..def 100644
326--- src/b.rs
327+++ src/b.rs
328@@ -10,2 +10,3 @@ fn foo()
329+added
330";
331        let result = parse_unified_diff(diff);
332        assert_eq!(result.len(), 2);
333        assert_eq!(result["src/a.rs"], FileChangeKind::NewFile);
334        assert!(matches!(result["src/b.rs"], FileChangeKind::Modified(_)));
335    }
336
337    #[test]
338    fn parse_deletion_only_hunk_skipped() {
339        let diff = "\
340diff --git src/foo.rs src/foo.rs
341index abc..def 100644
342--- src/foo.rs
343+++ src/foo.rs
344@@ -10,3 +10,0 @@ fn deleted_lines()
345-removed1
346-removed2
347-removed3
348";
349        let result = parse_unified_diff(diff);
350        // Deletion-only hunk means no new lines → file shouldn't appear
351        assert!(result.is_empty());
352    }
353
354    #[test]
355    fn parse_renamed_file_maps_to_new_path() {
356        let diff = "\
357diff --git src/old.rs src/new_name.rs
358similarity index 95%
359rename from src/old.rs
360rename to src/new_name.rs
361index abc..def 100644
362--- src/old.rs
363+++ src/new_name.rs
364@@ -5,1 +5,2 @@ fn foo()
365+    added line
366";
367        let result = parse_unified_diff(diff);
368        assert!(result.contains_key("src/new_name.rs"));
369        assert!(!result.contains_key("src/old.rs"));
370    }
371
372    // ── normalize_path ──────────────────────────────────────────────
373
374    #[test]
375    fn normalize_backslash() {
376        assert_eq!(normalize_path("src\\sub\\mod.rs"), "src/sub/mod.rs");
377    }
378
379    #[test]
380    fn normalize_forward_slash_unchanged() {
381        assert_eq!(normalize_path("src/sub/mod.rs"), "src/sub/mod.rs");
382    }
383
384    // ── integration test with real git (tempdir) ────────────────────
385
386    #[test]
387    fn git_diff_adapter_real_repo() {
388        let dir = tempfile::tempdir().unwrap();
389        let path = dir.path();
390
391        // Initialize repo
392        test_git_repo(path);
393
394        // Create initial commit
395        std::fs::write(path.join("lib.rs"), "fn old() {}\n").unwrap();
396        git(path, &["add", "."]);
397        git(path, &["commit", "-m", "initial"]);
398
399        // Modify file
400        std::fs::write(path.join("lib.rs"), "fn old() {}\nfn new_func() {}\n").unwrap();
401        git(path, &["add", "."]);
402        git(path, &["commit", "-m", "add function"]);
403
404        let adapter = GitDiffAdapter::new();
405        let result = adapter
406            .changed_regions("HEAD~1", path, &["lib.rs".to_string()])
407            .unwrap();
408
409        assert!(result.contains_key("lib.rs"));
410        match &result["lib.rs"] {
411            FileChangeKind::Modified(spans) => {
412                assert!(!spans.is_empty());
413                // The new line should be in the changed spans
414                assert!(spans.iter().any(|s| s.start_line == 2));
415            }
416            FileChangeKind::NewFile => panic!("expected Modified, got NewFile"),
417        }
418    }
419
420    #[test]
421    fn git_diff_adapter_new_file() {
422        let dir = tempfile::tempdir().unwrap();
423        let path = dir.path();
424
425        test_git_repo(path);
426
427        // Empty initial commit
428        git(path, &["commit", "--allow-empty", "-m", "initial"]);
429
430        // Add new file
431        std::fs::write(path.join("new.rs"), "fn hello() {}\n").unwrap();
432        git(path, &["add", "."]);
433        git(path, &["commit", "-m", "add new file"]);
434
435        let adapter = GitDiffAdapter::new();
436        let result = adapter
437            .changed_regions("HEAD~1", path, &["new.rs".to_string()])
438            .unwrap();
439
440        assert_eq!(result["new.rs"], FileChangeKind::NewFile);
441    }
442
443    #[test]
444    fn git_diff_adapter_bad_ref() {
445        let dir = tempfile::tempdir().unwrap();
446        let path = dir.path();
447
448        test_git_repo(path);
449        git(path, &["commit", "--allow-empty", "-m", "initial"]);
450
451        let adapter = GitDiffAdapter::new();
452        let result = adapter.changed_regions("nonexistent-ref", path, &[]);
453        assert!(result.is_err());
454        let err = result.unwrap_err().to_string();
455        assert!(
456            err.contains("nonexistent-ref"),
457            "error should mention the bad ref: {err}"
458        );
459    }
460
461    #[test]
462    fn git_diff_adapter_empty_diff() {
463        let dir = tempfile::tempdir().unwrap();
464        let path = dir.path();
465
466        test_git_repo(path);
467
468        std::fs::write(path.join("lib.rs"), "fn stable() {}\n").unwrap();
469        git(path, &["add", "."]);
470        git(path, &["commit", "-m", "initial"]);
471
472        // Diff HEAD against itself → empty
473        let adapter = GitDiffAdapter::new();
474        let result = adapter
475            .changed_regions("HEAD", path, &["lib.rs".to_string()])
476            .unwrap();
477
478        assert!(result.is_empty());
479    }
480
481    // ── helpers ─────────────────────────────────────────────────────
482
483    fn test_git_repo(dir: &Path) {
484        git(dir, &["init"]);
485        git(dir, &["config", "user.email", "test@test.com"]);
486        git(dir, &["config", "user.name", "Test"]);
487    }
488
489    fn git(dir: &Path, args: &[&str]) {
490        let output = Command::new("git")
491            .current_dir(dir)
492            .args(args)
493            .output()
494            .expect("git command failed to start");
495        assert!(
496            output.status.success(),
497            "git {} failed: {}",
498            args.join(" "),
499            String::from_utf8_lossy(&output.stderr)
500        );
501    }
502}