1use std::collections::{BTreeMap, BTreeSet};
7use std::ops::RangeInclusive;
8use std::path::Path;
9
10use covguard_paths::normalize_diff_path;
11use covguard_ports::{DiffParseResult as PortDiffParseResult, DiffProvider};
12use thiserror::Error;
13
14pub type ChangedRanges = BTreeMap<String, Vec<RangeInclusive<u32>>>;
23
24#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct DiffParseResult {
27 pub changed_ranges: ChangedRanges,
29 pub binary_files: Vec<String>,
31}
32
33#[derive(Debug, Error, Clone, PartialEq, Eq)]
35pub enum DiffError {
36 #[error("invalid diff format: {0}")]
38 InvalidFormat(String),
39
40 #[error("I/O error: {0}")]
42 IoError(String),
43}
44
45pub struct GitDiffProvider;
47
48impl DiffProvider for GitDiffProvider {
49 fn parse_patch(&self, text: &str) -> Result<PortDiffParseResult, String> {
50 parse_patch_with_meta(text)
51 .map(|parsed| PortDiffParseResult {
52 changed_ranges: parsed.changed_ranges,
53 binary_files: parsed.binary_files,
54 })
55 .map_err(|e| e.to_string())
56 }
57
58 fn load_diff_from_git(
59 &self,
60 base: &str,
61 head: &str,
62 repo_root: &Path,
63 ) -> Result<String, String> {
64 load_diff_from_git(base, head, repo_root).map_err(|e| e.to_string())
65 }
66}
67
68pub fn load_diff_from_git(base: &str, head: &str, repo_root: &Path) -> Result<String, DiffError> {
72 let output = std::process::Command::new("git")
73 .current_dir(repo_root)
74 .args(["diff", base, head])
75 .output()
76 .map_err(|e| DiffError::IoError(e.to_string()))?;
77 Ok(String::from_utf8_lossy(&output.stdout).to_string())
80}
81
82pub fn normalize_path(path: &str) -> String {
104 normalize_diff_path(path)
105}
106
107pub use covguard_ranges::merge_ranges;
112
113pub fn parse_patch(text: &str) -> Result<ChangedRanges, DiffError> {
156 Ok(parse_patch_with_meta(text)?.changed_ranges)
157}
158
159pub fn parse_patch_with_meta(text: &str) -> Result<DiffParseResult, DiffError> {
165 let text = text.replace("\r\n", "\n");
167 let lines: Vec<&str> = text.lines().collect();
168
169 let mut result: BTreeMap<String, Vec<u32>> = BTreeMap::new();
170 let mut current_file: Option<String> = None;
171 let mut current_diff_file: Option<String> = None;
172 let mut current_new_line: u32 = 0;
173 let mut in_hunk = false;
174
175 let mut rename_to: Option<String> = None;
177 let mut binary_files: BTreeSet<String> = BTreeSet::new();
179
180 for line in lines {
181 if let Some(rest) = line.strip_prefix("diff --git ") {
183 let mut parts = rest.split_whitespace();
184 let _a = parts.next();
185 let b = parts.next();
186 current_diff_file = b.map(normalize_path);
187 continue;
188 }
189
190 if line.starts_with("rename to ") {
192 rename_to = Some(normalize_path(line.strip_prefix("rename to ").unwrap()));
193 continue;
194 }
195
196 if let Some(rest) = line.strip_prefix("Binary files ") {
198 if let Some(and_pos) = rest.find(" and ") {
199 let after_and = &rest[and_pos + 5..];
200 let path_part = after_and.strip_suffix(" differ").unwrap_or(after_and);
201 let path = path_part.trim();
202 if path != "/dev/null" {
203 binary_files.insert(normalize_path(path));
204 }
205 }
206 continue;
207 }
208
209 if line.starts_with("GIT binary patch") {
211 if let Some(path) = current_file.clone().or_else(|| current_diff_file.clone()) {
212 binary_files.insert(path);
213 }
214 continue;
215 }
216
217 if let Some(path) = line.strip_prefix("+++ ") {
219 let path = path.trim();
220
221 if path == "/dev/null" {
223 current_file = None;
224 continue;
225 }
226
227 let normalized = if let Some(ref rename) = rename_to {
229 rename.clone()
230 } else {
231 normalize_path(path)
232 };
233
234 current_file = Some(normalized);
235 rename_to = None;
236 in_hunk = false;
237 continue;
238 }
239
240 if line.starts_with("@@ ") {
242 if let Some(ref _file) = current_file {
243 if let Some(new_start) = parse_hunk_header(line) {
245 current_new_line = new_start;
246 in_hunk = true;
247 } else {
248 return Err(DiffError::InvalidFormat(format!(
249 "malformed hunk header: '{}'",
250 line
251 )));
252 }
253 }
254 continue;
255 }
256
257 if in_hunk && let Some(ref file) = current_file {
259 if let Some(first_char) = line.chars().next() {
260 match first_char {
261 '+' => {
262 result
264 .entry(file.clone())
265 .or_default()
266 .push(current_new_line);
267 current_new_line += 1;
268 }
269 '-' => {
270 }
272 ' ' => {
273 current_new_line += 1;
275 }
276 '\\' => {
277 }
279 _ => {
280 current_new_line += 1;
282 }
283 }
284 } else {
285 current_new_line += 1;
287 }
288 }
289 }
290
291 let mut ranges: ChangedRanges = BTreeMap::new();
293 for (file, lines) in result {
294 let line_ranges: Vec<RangeInclusive<u32>> = lines.into_iter().map(|l| l..=l).collect();
295 ranges.insert(file, merge_ranges(line_ranges));
296 }
297 for binary in &binary_files {
299 ranges.remove(binary);
300 }
301
302 Ok(DiffParseResult {
303 changed_ranges: ranges,
304 binary_files: binary_files.into_iter().collect(),
305 })
306}
307
308fn parse_hunk_header(line: &str) -> Option<u32> {
313 let parts: Vec<&str> = line.split_whitespace().collect();
315
316 for part in parts {
317 if let Some(new_part) = part.strip_prefix('+') {
318 let start_str = new_part.split(',').next()?;
320 return start_str.parse().ok();
321 }
322 }
323
324 None
325}
326
327#[cfg(test)]
332mod tests {
333 use super::*;
334 use covguard_ports::DiffProvider;
335 use std::fs;
336
337 #[test]
338 fn test_normalize_path_b_prefix() {
339 assert_eq!(normalize_path("b/src/lib.rs"), "src/lib.rs");
340 }
341
342 #[test]
343 fn test_normalize_path_a_prefix() {
344 assert_eq!(normalize_path("a/src/lib.rs"), "src/lib.rs");
345 }
346
347 #[test]
348 fn test_normalize_path_dot_slash() {
349 assert_eq!(normalize_path("./src/lib.rs"), "src/lib.rs");
350 }
351
352 #[test]
353 fn test_normalize_path_backslash() {
354 assert_eq!(normalize_path("src\\lib.rs"), "src/lib.rs");
355 assert_eq!(normalize_path("b\\src\\lib.rs"), "src/lib.rs");
357 }
358
359 #[test]
360 fn test_normalize_path_combined() {
361 assert_eq!(normalize_path("b/./src/lib.rs"), "src/lib.rs");
363 assert_eq!(normalize_path("./b/src/lib.rs"), "b/src/lib.rs");
365 }
366
367 #[test]
368 fn test_normalize_path_no_change() {
369 assert_eq!(normalize_path("src/lib.rs"), "src/lib.rs");
370 }
371
372 #[test]
373 fn test_parse_patch_simple_added() {
374 let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
375new file mode 100644
376index 0000000..1111111
377--- /dev/null
378+++ b/src/lib.rs
379@@ -0,0 +1,3 @@
380+pub fn add(a: i32, b: i32) -> i32 {
381+ a + b
382+}
383"#;
384
385 let ranges = parse_patch(diff).unwrap();
386 assert_eq!(ranges.len(), 1);
387 assert_eq!(ranges.get("src/lib.rs"), Some(&vec![1..=3]));
388 }
389
390 #[test]
391 fn test_parse_patch_modified_file_multiple_hunks() {
392 let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
393index 1111111..2222222 100644
394--- a/src/lib.rs
395+++ b/src/lib.rs
396@@ -1,3 +1,5 @@
397 pub fn add(a: i32, b: i32) -> i32 {
398+ // Adding numbers
399 a + b
400 }
401+
402@@ -10,2 +12,4 @@
403 fn other() {
404+ // New comment
405+ println!("hello");
406 }
407"#;
408
409 let ranges = parse_patch(diff).unwrap();
410 assert_eq!(ranges.len(), 1);
411 let file_ranges = ranges.get("src/lib.rs").unwrap();
412 assert_eq!(file_ranges, &vec![2..=2, 5..=5, 13..=14]);
414 }
415
416 #[test]
417 fn test_parse_patch_deletion_only_hunk() {
418 let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
419index 1111111..2222222 100644
420--- a/src/lib.rs
421+++ b/src/lib.rs
422@@ -1,5 +1,3 @@
423 pub fn add(a: i32, b: i32) -> i32 {
424- // Old comment
425- // Another old comment
426 a + b
427 }
428"#;
429
430 let ranges = parse_patch(diff).unwrap();
431 assert!(!ranges.contains_key("src/lib.rs"));
433 }
434
435 #[test]
436 fn test_parse_patch_rename() {
437 let diff = r#"diff --git a/old_name.rs b/new_name.rs
438similarity index 95%
439rename from old_name.rs
440rename to new_name.rs
441index 1111111..2222222 100644
442--- a/old_name.rs
443+++ b/new_name.rs
444@@ -1,3 +1,4 @@
445 fn main() {
446+ println!("added line");
447 println!("Hello");
448 }
449"#;
450
451 let ranges = parse_patch(diff).unwrap();
452 assert_eq!(ranges.len(), 1);
453 assert!(ranges.contains_key("new_name.rs"));
455 assert_eq!(ranges.get("new_name.rs"), Some(&vec![2..=2]));
456 }
457
458 #[test]
459 fn test_parse_patch_deleted_file() {
460 let diff = r#"diff --git a/deleted.rs b/deleted.rs
461deleted file mode 100644
462index 1111111..0000000
463--- a/deleted.rs
464+++ /dev/null
465@@ -1,3 +0,0 @@
466-fn main() {
467- println!("goodbye");
468-}
469"#;
470
471 let ranges = parse_patch(diff).unwrap();
472 assert!(ranges.is_empty());
474 }
475
476 #[test]
477 fn test_parse_patch_crlf() {
478 let diff = "diff --git a/src/lib.rs b/src/lib.rs\r\n\
479 new file mode 100644\r\n\
480 index 0000000..1111111\r\n\
481 --- /dev/null\r\n\
482 +++ b/src/lib.rs\r\n\
483 @@ -0,0 +1,2 @@\r\n\
484 +line one\r\n\
485 +line two\r\n";
486
487 let ranges = parse_patch(diff).unwrap();
488 assert_eq!(ranges.len(), 1);
489 assert_eq!(ranges.get("src/lib.rs"), Some(&vec![1..=2]));
490 }
491
492 #[test]
493 fn test_parse_patch_multiple_files() {
494 let diff = r#"diff --git a/src/a.rs b/src/a.rs
495new file mode 100644
496index 0000000..1111111
497--- /dev/null
498+++ b/src/a.rs
499@@ -0,0 +1,2 @@
500+fn a() {}
501+fn b() {}
502diff --git a/src/c.rs b/src/c.rs
503new file mode 100644
504index 0000000..2222222
505--- /dev/null
506+++ b/src/c.rs
507@@ -0,0 +1,1 @@
508+fn c() {}
509"#;
510
511 let ranges = parse_patch(diff).unwrap();
512 assert_eq!(ranges.len(), 2);
513 assert_eq!(ranges.get("src/a.rs"), Some(&vec![1..=2]));
514 assert_eq!(ranges.get("src/c.rs"), Some(&vec![1..=1]));
515 }
516
517 #[test]
518 fn test_parse_patch_no_newline_marker() {
519 let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
520new file mode 100644
521index 0000000..1111111
522--- /dev/null
523+++ b/src/lib.rs
524@@ -0,0 +1,2 @@
525+fn main() {}
526+fn other() {}
527\ No newline at end of file
528"#;
529
530 let ranges = parse_patch(diff).unwrap();
531 assert_eq!(ranges.len(), 1);
532 assert_eq!(ranges.get("src/lib.rs"), Some(&vec![1..=2]));
533 }
534
535 #[test]
536 fn test_parse_patch_empty() {
537 let ranges = parse_patch("").unwrap();
538 assert!(ranges.is_empty());
539 }
540
541 #[test]
542 fn test_parse_patch_binary_files_marker() {
543 let diff = r#"diff --git a/assets/logo.png b/assets/logo.png
544index 1111111..2222222
545Binary files a/assets/logo.png and b/assets/logo.png differ
546"#;
547
548 let result = parse_patch_with_meta(diff).unwrap();
549 assert!(result.changed_ranges.is_empty());
550 assert_eq!(result.binary_files, vec!["assets/logo.png".to_string()]);
551 }
552
553 #[test]
554 fn test_parse_patch_binary_files_marker_dev_null() {
555 let diff = r#"diff --git a/assets/logo.png b/assets/logo.png
556index 1111111..2222222
557Binary files a/assets/logo.png and /dev/null differ
558"#;
559
560 let result = parse_patch_with_meta(diff).unwrap();
561 assert!(result.changed_ranges.is_empty());
562 assert!(result.binary_files.is_empty());
563 }
564
565 #[test]
566 fn test_parse_patch_binary_files_marker_without_and() {
567 let diff = r#"diff --git a/assets/logo.png b/assets/logo.png
568index 1111111..2222222
569Binary files a/assets/logo.png differ
570"#;
571
572 let result = parse_patch_with_meta(diff).unwrap();
573 assert!(result.changed_ranges.is_empty());
574 assert!(result.binary_files.is_empty());
575 }
576
577 #[test]
578 fn test_parse_patch_git_binary_patch_marker() {
579 let diff = r#"diff --git a/assets/data.bin b/assets/data.bin
580index 1111111..2222222
581GIT binary patch
582literal 0
583HcmV?d00001
584"#;
585
586 let result = parse_patch_with_meta(diff).unwrap();
587 assert!(result.changed_ranges.is_empty());
588 assert_eq!(result.binary_files, vec!["assets/data.bin".to_string()]);
589 }
590
591 #[test]
592 fn test_parse_patch_malformed_hunk_header_returns_error() {
593 let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
594index 1111111..2222222 100644
595--- a/src/lib.rs
596+++ b/src/lib.rs
597@@ -1,1 @@
598+line
599"#;
600
601 let result = parse_patch(diff);
602 assert!(result.is_err());
603 }
604
605 #[test]
606 fn test_parse_patch_empty_line_in_hunk() {
607 let diff = "diff --git a/src/lib.rs b/src/lib.rs\n\
608index 1111111..2222222 100644\n\
609--- a/src/lib.rs\n\
610+++ b/src/lib.rs\n\
611@@ -1,1 +1,3 @@\n\
612+line1\n\
613\n\
614+line2\n";
615
616 let ranges = parse_patch(diff).unwrap();
617 assert_eq!(ranges.get("src/lib.rs"), Some(&vec![1..=1, 3..=3]));
618 }
619
620 #[test]
621 fn test_parse_hunk_header_with_counts() {
622 let line = "@@ -10,5 +20,8 @@ fn context()";
623 assert_eq!(parse_hunk_header(line), Some(20));
624 }
625
626 #[test]
627 fn test_parse_hunk_header_without_counts() {
628 let line = "@@ -1 +1 @@";
629 assert_eq!(parse_hunk_header(line), Some(1));
630 }
631
632 #[test]
633 fn test_parse_hunk_header_missing_plus_returns_none() {
634 let line = "@@ -10,5 @@ fn context()";
635 assert_eq!(parse_hunk_header(line), None);
636 }
637
638 #[test]
639 fn test_parse_hunk_header_new_file() {
640 let line = "@@ -0,0 +1,3 @@";
641 assert_eq!(parse_hunk_header(line), Some(1));
642 }
643
644 #[test]
645 fn test_parse_patch_mixed_additions_deletions() {
646 let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
647index 1111111..2222222 100644
648--- a/src/lib.rs
649+++ b/src/lib.rs
650@@ -1,5 +1,6 @@
651 fn main() {
652- old_code();
653+ new_code();
654+ extra_code();
655 common();
656 }
657"#;
658
659 let ranges = parse_patch(diff).unwrap();
660 assert_eq!(ranges.len(), 1);
661 assert_eq!(ranges.get("src/lib.rs"), Some(&vec![2..=3]));
663 }
664
665 #[test]
666 fn test_parse_fixture_simple_added_patch() {
667 let fixture_content = r#"diff --git a/src/lib.rs b/src/lib.rs
669new file mode 100644
670index 0000000..1111111
671--- /dev/null
672+++ b/src/lib.rs
673@@ -0,0 +1,3 @@
674+pub fn add(a: i32, b: i32) -> i32 {
675+ a + b
676+}
677"#;
678
679 let ranges = parse_patch(fixture_content).unwrap();
680 assert_eq!(ranges.len(), 1);
681 assert_eq!(ranges.get("src/lib.rs"), Some(&vec![1..=3]));
682 }
683
684 #[test]
685 fn test_parse_patch_context_without_leading_space() {
686 let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
689index 1111111..2222222 100644
690--- a/src/lib.rs
691+++ b/src/lib.rs
692@@ -1,3 +1,4 @@
693fn main() {
694+ println!("added");
695}
696"#;
697
698 let ranges = parse_patch(diff).unwrap();
699 assert_eq!(ranges.len(), 1);
700 assert_eq!(ranges.get("src/lib.rs"), Some(&vec![2..=2]));
702 }
703
704 #[test]
705 fn test_load_diff_from_git_bad_repo_path_returns_io_error() {
706 let temp = std::env::temp_dir().join(format!(
707 "covguard-diff-adapter-missing-{}",
708 std::process::id()
709 ));
710 let err = load_diff_from_git("HEAD~1", "HEAD", &temp).expect_err("expected error");
711 assert!(matches!(err, DiffError::IoError(_)));
712 }
713
714 #[test]
715 fn test_load_diff_from_git_success_in_temp_repo() {
716 let unique = std::time::SystemTime::now()
717 .duration_since(std::time::UNIX_EPOCH)
718 .expect("time")
719 .as_nanos();
720 let root = std::env::temp_dir().join(format!("covguard-diff-adapter-{unique}"));
721 fs::create_dir_all(&root).expect("create temp dir");
722
723 let init = std::process::Command::new("git")
724 .current_dir(&root)
725 .args(["init"])
726 .output()
727 .expect("git init");
728 assert!(init.status.success());
729
730 let user_name = std::process::Command::new("git")
731 .current_dir(&root)
732 .args(["config", "user.name", "covguard-test"])
733 .output()
734 .expect("git config user.name");
735 assert!(user_name.status.success());
736
737 let user_email = std::process::Command::new("git")
738 .current_dir(&root)
739 .args(["config", "user.email", "covguard-test@example.com"])
740 .output()
741 .expect("git config user.email");
742 assert!(user_email.status.success());
743
744 fs::write(root.join("a.txt"), "line1\n").expect("write initial file");
745 let add_first = std::process::Command::new("git")
746 .current_dir(&root)
747 .args(["add", "a.txt"])
748 .output()
749 .expect("git add first");
750 assert!(add_first.status.success());
751
752 let commit_first = std::process::Command::new("git")
753 .current_dir(&root)
754 .args(["commit", "-m", "first"])
755 .output()
756 .expect("git commit first");
757 assert!(commit_first.status.success());
758
759 fs::write(root.join("a.txt"), "line1\nline2\n").expect("write changed file");
760 let add_second = std::process::Command::new("git")
761 .current_dir(&root)
762 .args(["add", "a.txt"])
763 .output()
764 .expect("git add second");
765 assert!(add_second.status.success());
766
767 let commit_second = std::process::Command::new("git")
768 .current_dir(&root)
769 .args(["commit", "-m", "second"])
770 .output()
771 .expect("git commit second");
772 assert!(commit_second.status.success());
773
774 let diff = load_diff_from_git("HEAD~1", "HEAD", &root).expect("load diff");
775 assert!(diff.contains("diff --git"));
776 assert!(diff.contains("+++ b/a.txt"));
777
778 let _ = fs::remove_dir_all(&root);
779 }
780
781 #[test]
782 fn test_git_diff_provider_parse_patch() {
783 let provider = GitDiffProvider;
784 let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
785new file mode 100644
786--- /dev/null
787+++ b/src/lib.rs
788@@ -0,0 +1,1 @@
789+fn main() {}
790"#;
791
792 let parsed = provider.parse_patch(diff).expect("parse via provider");
793 assert_eq!(parsed.changed_ranges.get("src/lib.rs"), Some(&vec![1..=1]));
794 assert!(parsed.binary_files.is_empty());
795 }
796}
797
798#[cfg(test)]
803mod proptests {
804 use super::*;
805 use proptest::prelude::*;
806
807 proptest! {
808 #[test]
809 fn normalize_path_never_panics(path in ".*") {
810 let _ = normalize_path(&path);
811 }
812
813 #[test]
814 fn normalize_path_removes_leading_b_prefix(suffix in "[a-z]+") {
815 prop_assume!(!suffix.starts_with("b"));
817 let path = format!("b/{}", suffix);
818 let normalized = normalize_path(&path);
819 prop_assert!(!normalized.starts_with("b/"), "Should remove b/ prefix from {}", path);
820 }
821 }
822}