1use crate::domain::types::{CrapError, FileChangeKind, SourceSpan};
4use crate::ports::DiffPort;
5use regex::Regex;
6use std::collections::HashMap;
7use std::path::Path;
8use std::process::Command;
9use std::sync::LazyLock;
10
11#[derive(Default)]
12pub struct GitDiffAdapter;
13
14impl GitDiffAdapter {
15 pub fn new() -> Self {
16 Self
17 }
18}
19
20impl DiffPort for GitDiffAdapter {
21 fn changed_regions(
22 &self,
23 diff_ref: &str,
24 working_dir: &Path,
25 paths: &[String],
26 ) -> Result<HashMap<String, FileChangeKind>, CrapError> {
27 let output = Command::new("git")
28 .env_remove("GIT_DIR")
29 .env_remove("GIT_WORK_TREE")
30 .env_remove("GIT_INDEX_FILE")
31 .env("GIT_PAGER", "")
32 .current_dir(working_dir)
33 .args([
34 "diff",
35 "--unified=0",
36 "--no-prefix",
37 "--no-color",
38 "--diff-filter=ACMR",
39 ])
40 .arg(diff_ref)
41 .arg("--")
42 .args(paths)
43 .output()
44 .map_err(|e| CrapError::DiffCompute(format!("failed to run git diff: {e}")))?;
45
46 if !output.status.success() {
47 let stderr = String::from_utf8_lossy(&output.stderr);
48 return Err(CrapError::DiffCompute(stderr.trim().to_string()));
49 }
50
51 let stdout = String::from_utf8_lossy(&output.stdout);
52 Ok(parse_unified_diff(&stdout))
53 }
54}
55
56static HUNK_RE: LazyLock<Regex> = LazyLock::new(|| {
61 Regex::new(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@").expect("hunk regex is valid")
62});
63
64#[derive(Default)]
65struct DiffParseState {
66 current_file: Option<String>,
67 is_new_file: bool,
68}
69
70enum DiffLine<'a> {
71 DiffHeader,
72 NewFileMode,
73 FilePath(&'a str),
74 Hunk(&'a str),
75 Other,
76}
77
78fn parse_unified_diff(input: &str) -> HashMap<String, FileChangeKind> {
80 let mut result: HashMap<String, FileChangeKind> = HashMap::new();
81 let mut state = DiffParseState::default();
82
83 for line in input.lines() {
84 handle_diff_line(classify_diff_line(line), &mut state, &mut result);
85 }
86
87 result
88}
89
90fn classify_diff_line(line: &str) -> DiffLine<'_> {
91 if let Some(path) = line.strip_prefix("+++ ") {
92 DiffLine::FilePath(path)
93 } else if line.starts_with("new file mode") {
94 DiffLine::NewFileMode
95 } else if line.starts_with("diff --git") {
96 DiffLine::DiffHeader
97 } else if line.starts_with("@@ ") {
98 DiffLine::Hunk(line)
99 } else {
100 DiffLine::Other
101 }
102}
103
104fn handle_diff_line(
105 line: DiffLine<'_>,
106 state: &mut DiffParseState,
107 result: &mut HashMap<String, FileChangeKind>,
108) {
109 match line {
110 DiffLine::DiffHeader => state.is_new_file = false,
111 DiffLine::NewFileMode => state.is_new_file = true,
112 DiffLine::FilePath(path) => state.current_file = normalize_diff_path(path),
113 DiffLine::Hunk(header) => handle_hunk_line(header, state, result),
114 DiffLine::Other => {}
115 }
116}
117
118fn normalize_diff_path(path: &str) -> Option<String> {
119 if path == "/dev/null" {
120 None
121 } else {
122 Some(normalize_path(path))
123 }
124}
125
126fn handle_hunk_line(
127 header: &str,
128 state: &DiffParseState,
129 result: &mut HashMap<String, FileChangeKind>,
130) {
131 let Some(file) = state.current_file.as_ref() else {
132 return;
133 };
134
135 if result.get(file) == Some(&FileChangeKind::NewFile) {
136 return;
137 }
138
139 if state.is_new_file {
140 result.insert(file.clone(), FileChangeKind::NewFile);
141 return;
142 }
143
144 if let Some(span) = parse_hunk_header(header) {
145 append_modified_span(result, file, span);
146 }
147}
148
149fn append_modified_span(
150 result: &mut HashMap<String, FileChangeKind>,
151 file: &str,
152 span: SourceSpan,
153) {
154 result
155 .entry(file.to_owned())
156 .and_modify(|kind| {
157 if let FileChangeKind::Modified(spans) = kind {
158 spans.push(span);
159 }
160 })
161 .or_insert_with(|| FileChangeKind::Modified(vec![span]));
162}
163
164fn parse_hunk_header(line: &str) -> Option<SourceSpan> {
172 let caps = HUNK_RE.captures(line)?;
173 let start: usize = caps.get(1)?.as_str().parse().ok()?;
174 let count: usize = caps
175 .get(2)
176 .and_then(|m| m.as_str().parse().ok())
177 .unwrap_or(1);
178
179 if count == 0 {
180 return None; }
182
183 Some(SourceSpan {
184 start_line: start,
185 end_line: start + count - 1,
186 start_column: 0,
189 end_column: 0,
190 })
191}
192
193fn normalize_path(path: &str) -> String {
195 path.replace('\\', "/")
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201
202 #[test]
205 fn hunk_standard_format() {
206 let span = parse_hunk_header("@@ -10,5 +20,3 @@ fn foo()").unwrap();
208 assert_eq!(span.start_line, 20);
209 assert_eq!(span.end_line, 22);
210 }
211
212 #[test]
213 fn hunk_one_line_removed() {
214 let span = parse_hunk_header("@@ -10 +20,3 @@").unwrap();
216 assert_eq!(span.start_line, 20);
217 assert_eq!(span.end_line, 22);
218 }
219
220 #[test]
221 fn hunk_implicit_count_one() {
222 let span = parse_hunk_header("@@ -10,5 +20 @@").unwrap();
224 assert_eq!(span.start_line, 20);
225 assert_eq!(span.end_line, 20);
226 }
227
228 #[test]
229 fn hunk_both_count_one() {
230 let span = parse_hunk_header("@@ -10 +20 @@").unwrap();
232 assert_eq!(span.start_line, 20);
233 assert_eq!(span.end_line, 20);
234 }
235
236 #[test]
237 fn hunk_deletion_only() {
238 assert!(parse_hunk_header("@@ -10,3 +20,0 @@").is_none());
240 }
241
242 #[test]
245 fn parse_empty_input() {
246 let result = parse_unified_diff("");
247 assert!(result.is_empty());
248 }
249
250 #[test]
251 fn parse_modified_file_single_hunk() {
252 let diff = "\
253diff --git src/foo.rs src/foo.rs
254index abc..def 100644
255--- src/foo.rs
256+++ src/foo.rs
257@@ -10,3 +10,5 @@ fn existing()
258+ let x = 1;
259+ let y = 2;
260";
261 let result = parse_unified_diff(diff);
262 assert_eq!(result.len(), 1);
263 match &result["src/foo.rs"] {
264 FileChangeKind::Modified(spans) => {
265 assert_eq!(spans.len(), 1);
266 assert_eq!(spans[0].start_line, 10);
267 assert_eq!(spans[0].end_line, 14);
268 }
269 _ => panic!("expected Modified"),
270 }
271 }
272
273 #[test]
274 fn parse_modified_file_multiple_hunks() {
275 let diff = "\
276diff --git src/foo.rs src/foo.rs
277index abc..def 100644
278--- src/foo.rs
279+++ src/foo.rs
280@@ -5,0 +5,2 @@ fn first()
281+ new line 1
282+ new line 2
283@@ -20,0 +22,1 @@ fn second()
284+ another line
285";
286 let result = parse_unified_diff(diff);
287 match &result["src/foo.rs"] {
288 FileChangeKind::Modified(spans) => {
289 assert_eq!(spans.len(), 2);
290 assert_eq!(spans[0].start_line, 5);
291 assert_eq!(spans[0].end_line, 6);
292 assert_eq!(spans[1].start_line, 22);
293 assert_eq!(spans[1].end_line, 22);
294 }
295 _ => panic!("expected Modified"),
296 }
297 }
298
299 #[test]
300 fn parse_new_file() {
301 let diff = "\
302diff --git src/new.rs src/new.rs
303new file mode 100644
304index 0000000..abc1234
305--- /dev/null
306+++ src/new.rs
307@@ -0,0 +1,10 @@
308+fn hello() {}
309";
310 let result = parse_unified_diff(diff);
311 assert_eq!(result["src/new.rs"], FileChangeKind::NewFile);
312 }
313
314 #[test]
315 fn parse_multiple_files() {
316 let diff = "\
317diff --git src/a.rs src/a.rs
318new file mode 100644
319index 0000000..abc
320--- /dev/null
321+++ src/a.rs
322@@ -0,0 +1,5 @@
323+content
324diff --git src/b.rs src/b.rs
325index abc..def 100644
326--- src/b.rs
327+++ src/b.rs
328@@ -10,2 +10,3 @@ fn foo()
329+added
330";
331 let result = parse_unified_diff(diff);
332 assert_eq!(result.len(), 2);
333 assert_eq!(result["src/a.rs"], FileChangeKind::NewFile);
334 assert!(matches!(result["src/b.rs"], FileChangeKind::Modified(_)));
335 }
336
337 #[test]
338 fn parse_deletion_only_hunk_skipped() {
339 let diff = "\
340diff --git src/foo.rs src/foo.rs
341index abc..def 100644
342--- src/foo.rs
343+++ src/foo.rs
344@@ -10,3 +10,0 @@ fn deleted_lines()
345-removed1
346-removed2
347-removed3
348";
349 let result = parse_unified_diff(diff);
350 assert!(result.is_empty());
352 }
353
354 #[test]
355 fn parse_renamed_file_maps_to_new_path() {
356 let diff = "\
357diff --git src/old.rs src/new_name.rs
358similarity index 95%
359rename from src/old.rs
360rename to src/new_name.rs
361index abc..def 100644
362--- src/old.rs
363+++ src/new_name.rs
364@@ -5,1 +5,2 @@ fn foo()
365+ added line
366";
367 let result = parse_unified_diff(diff);
368 assert!(result.contains_key("src/new_name.rs"));
369 assert!(!result.contains_key("src/old.rs"));
370 }
371
372 #[test]
375 fn normalize_backslash() {
376 assert_eq!(normalize_path("src\\sub\\mod.rs"), "src/sub/mod.rs");
377 }
378
379 #[test]
380 fn normalize_forward_slash_unchanged() {
381 assert_eq!(normalize_path("src/sub/mod.rs"), "src/sub/mod.rs");
382 }
383
384 #[test]
387 fn git_diff_adapter_real_repo() {
388 let dir = tempfile::tempdir().unwrap();
389 let path = dir.path();
390
391 test_git_repo(path);
393
394 std::fs::write(path.join("lib.rs"), "fn old() {}\n").unwrap();
396 git(path, &["add", "."]);
397 git(path, &["commit", "-m", "initial"]);
398
399 std::fs::write(path.join("lib.rs"), "fn old() {}\nfn new_func() {}\n").unwrap();
401 git(path, &["add", "."]);
402 git(path, &["commit", "-m", "add function"]);
403
404 let adapter = GitDiffAdapter::new();
405 let result = adapter
406 .changed_regions("HEAD~1", path, &["lib.rs".to_string()])
407 .unwrap();
408
409 assert!(result.contains_key("lib.rs"));
410 match &result["lib.rs"] {
411 FileChangeKind::Modified(spans) => {
412 assert!(!spans.is_empty());
413 assert!(spans.iter().any(|s| s.start_line == 2));
415 }
416 FileChangeKind::NewFile => panic!("expected Modified, got NewFile"),
417 }
418 }
419
420 #[test]
421 fn git_diff_adapter_new_file() {
422 let dir = tempfile::tempdir().unwrap();
423 let path = dir.path();
424
425 test_git_repo(path);
426
427 git(path, &["commit", "--allow-empty", "-m", "initial"]);
429
430 std::fs::write(path.join("new.rs"), "fn hello() {}\n").unwrap();
432 git(path, &["add", "."]);
433 git(path, &["commit", "-m", "add new file"]);
434
435 let adapter = GitDiffAdapter::new();
436 let result = adapter
437 .changed_regions("HEAD~1", path, &["new.rs".to_string()])
438 .unwrap();
439
440 assert_eq!(result["new.rs"], FileChangeKind::NewFile);
441 }
442
443 #[test]
444 fn git_diff_adapter_bad_ref() {
445 let dir = tempfile::tempdir().unwrap();
446 let path = dir.path();
447
448 test_git_repo(path);
449 git(path, &["commit", "--allow-empty", "-m", "initial"]);
450
451 let adapter = GitDiffAdapter::new();
452 let result = adapter.changed_regions("nonexistent-ref", path, &[]);
453 assert!(result.is_err());
454 let err = result.unwrap_err().to_string();
455 assert!(
456 err.contains("nonexistent-ref"),
457 "error should mention the bad ref: {err}"
458 );
459 }
460
461 #[test]
462 fn git_diff_adapter_empty_diff() {
463 let dir = tempfile::tempdir().unwrap();
464 let path = dir.path();
465
466 test_git_repo(path);
467
468 std::fs::write(path.join("lib.rs"), "fn stable() {}\n").unwrap();
469 git(path, &["add", "."]);
470 git(path, &["commit", "-m", "initial"]);
471
472 let adapter = GitDiffAdapter::new();
474 let result = adapter
475 .changed_regions("HEAD", path, &["lib.rs".to_string()])
476 .unwrap();
477
478 assert!(result.is_empty());
479 }
480
481 fn test_git_repo(dir: &Path) {
484 git(dir, &["init"]);
485 git(dir, &["config", "user.email", "test@test.com"]);
486 git(dir, &["config", "user.name", "Test"]);
487 }
488
489 fn git(dir: &Path, args: &[&str]) {
490 let output = Command::new("git")
491 .current_dir(dir)
492 .args(args)
493 .output()
494 .expect("git command failed to start");
495 assert!(
496 output.status.success(),
497 "git {} failed: {}",
498 args.join(" "),
499 String::from_utf8_lossy(&output.stderr)
500 );
501 }
502}