use std::collections::{BTreeMap, BTreeSet};
use std::ops::RangeInclusive;
use std::path::Path;
use covguard_paths::normalize_diff_path;
use covguard_ports::{DiffParseResult as PortDiffParseResult, DiffProvider};
use thiserror::Error;
pub type ChangedRanges = BTreeMap<String, Vec<RangeInclusive<u32>>>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DiffParseResult {
pub changed_ranges: ChangedRanges,
pub binary_files: Vec<String>,
}
#[derive(Debug, Error, Clone, PartialEq, Eq)]
pub enum DiffError {
#[error("invalid diff format: {0}")]
InvalidFormat(String),
#[error("I/O error: {0}")]
IoError(String),
}
pub struct GitDiffProvider;
impl DiffProvider for GitDiffProvider {
fn parse_patch(&self, text: &str) -> Result<PortDiffParseResult, String> {
parse_patch_with_meta(text)
.map(|parsed| PortDiffParseResult {
changed_ranges: parsed.changed_ranges,
binary_files: parsed.binary_files,
})
.map_err(|e| e.to_string())
}
fn load_diff_from_git(
&self,
base: &str,
head: &str,
repo_root: &Path,
) -> Result<String, String> {
load_diff_from_git(base, head, repo_root).map_err(|e| e.to_string())
}
}
pub fn load_diff_from_git(base: &str, head: &str, repo_root: &Path) -> Result<String, DiffError> {
let output = std::process::Command::new("git")
.current_dir(repo_root)
.args(["diff", base, head])
.output()
.map_err(|e| DiffError::IoError(e.to_string()))?;
Ok(String::from_utf8_lossy(&output.stdout).to_string())
}
pub fn normalize_path(path: &str) -> String {
normalize_diff_path(path)
}
pub use covguard_ranges::merge_ranges;
pub fn parse_patch(text: &str) -> Result<ChangedRanges, DiffError> {
Ok(parse_patch_with_meta(text)?.changed_ranges)
}
pub fn parse_patch_with_meta(text: &str) -> Result<DiffParseResult, DiffError> {
let text = text.replace("\r\n", "\n");
let lines: Vec<&str> = text.lines().collect();
let mut result: BTreeMap<String, Vec<u32>> = BTreeMap::new();
let mut current_file: Option<String> = None;
let mut current_diff_file: Option<String> = None;
let mut current_new_line: u32 = 0;
let mut in_hunk = false;
let mut rename_to: Option<String> = None;
let mut binary_files: BTreeSet<String> = BTreeSet::new();
for line in lines {
if let Some(rest) = line.strip_prefix("diff --git ") {
let mut parts = rest.split_whitespace();
let _a = parts.next();
let b = parts.next();
current_diff_file = b.map(normalize_path);
continue;
}
if line.starts_with("rename to ") {
rename_to = Some(normalize_path(line.strip_prefix("rename to ").unwrap()));
continue;
}
if let Some(rest) = line.strip_prefix("Binary files ") {
if let Some(and_pos) = rest.find(" and ") {
let after_and = &rest[and_pos + 5..];
let path_part = after_and.strip_suffix(" differ").unwrap_or(after_and);
let path = path_part.trim();
if path != "/dev/null" {
binary_files.insert(normalize_path(path));
}
}
continue;
}
if line.starts_with("GIT binary patch") {
if let Some(path) = current_file.clone().or_else(|| current_diff_file.clone()) {
binary_files.insert(path);
}
continue;
}
if let Some(path) = line.strip_prefix("+++ ") {
let path = path.trim();
if path == "/dev/null" {
current_file = None;
continue;
}
let normalized = if let Some(ref rename) = rename_to {
rename.clone()
} else {
normalize_path(path)
};
current_file = Some(normalized);
rename_to = None;
in_hunk = false;
continue;
}
if line.starts_with("@@ ") {
if let Some(ref _file) = current_file {
if let Some(new_start) = parse_hunk_header(line) {
current_new_line = new_start;
in_hunk = true;
} else {
return Err(DiffError::InvalidFormat(format!(
"malformed hunk header: '{}'",
line
)));
}
}
continue;
}
if in_hunk && let Some(ref file) = current_file {
if let Some(first_char) = line.chars().next() {
match first_char {
'+' => {
result
.entry(file.clone())
.or_default()
.push(current_new_line);
current_new_line += 1;
}
'-' => {
}
' ' => {
current_new_line += 1;
}
'\\' => {
}
_ => {
current_new_line += 1;
}
}
} else {
current_new_line += 1;
}
}
}
let mut ranges: ChangedRanges = BTreeMap::new();
for (file, lines) in result {
let line_ranges: Vec<RangeInclusive<u32>> = lines.into_iter().map(|l| l..=l).collect();
ranges.insert(file, merge_ranges(line_ranges));
}
for binary in &binary_files {
ranges.remove(binary);
}
Ok(DiffParseResult {
changed_ranges: ranges,
binary_files: binary_files.into_iter().collect(),
})
}
fn parse_hunk_header(line: &str) -> Option<u32> {
let parts: Vec<&str> = line.split_whitespace().collect();
for part in parts {
if let Some(new_part) = part.strip_prefix('+') {
let start_str = new_part.split(',').next()?;
return start_str.parse().ok();
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use covguard_ports::DiffProvider;
use std::fs;
#[test]
fn test_normalize_path_b_prefix() {
assert_eq!(normalize_path("b/src/lib.rs"), "src/lib.rs");
}
#[test]
fn test_normalize_path_a_prefix() {
assert_eq!(normalize_path("a/src/lib.rs"), "src/lib.rs");
}
#[test]
fn test_normalize_path_dot_slash() {
assert_eq!(normalize_path("./src/lib.rs"), "src/lib.rs");
}
#[test]
fn test_normalize_path_backslash() {
assert_eq!(normalize_path("src\\lib.rs"), "src/lib.rs");
assert_eq!(normalize_path("b\\src\\lib.rs"), "src/lib.rs");
}
#[test]
fn test_normalize_path_combined() {
assert_eq!(normalize_path("b/./src/lib.rs"), "src/lib.rs");
assert_eq!(normalize_path("./b/src/lib.rs"), "b/src/lib.rs");
}
#[test]
fn test_normalize_path_no_change() {
assert_eq!(normalize_path("src/lib.rs"), "src/lib.rs");
}
#[test]
fn test_parse_patch_simple_added() {
let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..1111111
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,3 @@
+pub fn add(a: i32, b: i32) -> i32 {
+ a + b
+}
"#;
let ranges = parse_patch(diff).unwrap();
assert_eq!(ranges.len(), 1);
assert_eq!(ranges.get("src/lib.rs"), Some(&vec![1..=3]));
}
#[test]
fn test_parse_patch_modified_file_multiple_hunks() {
let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
index 1111111..2222222 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,5 @@
pub fn add(a: i32, b: i32) -> i32 {
+ // Adding numbers
a + b
}
+
@@ -10,2 +12,4 @@
fn other() {
+ // New comment
+ println!("hello");
}
"#;
let ranges = parse_patch(diff).unwrap();
assert_eq!(ranges.len(), 1);
let file_ranges = ranges.get("src/lib.rs").unwrap();
assert_eq!(file_ranges, &vec![2..=2, 5..=5, 13..=14]);
}
#[test]
fn test_parse_patch_deletion_only_hunk() {
let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
index 1111111..2222222 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,3 @@
pub fn add(a: i32, b: i32) -> i32 {
- // Old comment
- // Another old comment
a + b
}
"#;
let ranges = parse_patch(diff).unwrap();
assert!(!ranges.contains_key("src/lib.rs"));
}
#[test]
fn test_parse_patch_rename() {
let diff = r#"diff --git a/old_name.rs b/new_name.rs
similarity index 95%
rename from old_name.rs
rename to new_name.rs
index 1111111..2222222 100644
--- a/old_name.rs
+++ b/new_name.rs
@@ -1,3 +1,4 @@
fn main() {
+ println!("added line");
println!("Hello");
}
"#;
let ranges = parse_patch(diff).unwrap();
assert_eq!(ranges.len(), 1);
assert!(ranges.contains_key("new_name.rs"));
assert_eq!(ranges.get("new_name.rs"), Some(&vec![2..=2]));
}
#[test]
fn test_parse_patch_deleted_file() {
let diff = r#"diff --git a/deleted.rs b/deleted.rs
deleted file mode 100644
index 1111111..0000000
--- a/deleted.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-fn main() {
- println!("goodbye");
-}
"#;
let ranges = parse_patch(diff).unwrap();
assert!(ranges.is_empty());
}
#[test]
fn test_parse_patch_crlf() {
let diff = "diff --git a/src/lib.rs b/src/lib.rs\r\n\
new file mode 100644\r\n\
index 0000000..1111111\r\n\
--- /dev/null\r\n\
+++ b/src/lib.rs\r\n\
@@ -0,0 +1,2 @@\r\n\
+line one\r\n\
+line two\r\n";
let ranges = parse_patch(diff).unwrap();
assert_eq!(ranges.len(), 1);
assert_eq!(ranges.get("src/lib.rs"), Some(&vec![1..=2]));
}
#[test]
fn test_parse_patch_multiple_files() {
let diff = r#"diff --git a/src/a.rs b/src/a.rs
new file mode 100644
index 0000000..1111111
--- /dev/null
+++ b/src/a.rs
@@ -0,0 +1,2 @@
+fn a() {}
+fn b() {}
diff --git a/src/c.rs b/src/c.rs
new file mode 100644
index 0000000..2222222
--- /dev/null
+++ b/src/c.rs
@@ -0,0 +1,1 @@
+fn c() {}
"#;
let ranges = parse_patch(diff).unwrap();
assert_eq!(ranges.len(), 2);
assert_eq!(ranges.get("src/a.rs"), Some(&vec![1..=2]));
assert_eq!(ranges.get("src/c.rs"), Some(&vec![1..=1]));
}
#[test]
fn test_parse_patch_no_newline_marker() {
let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..1111111
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,2 @@
+fn main() {}
+fn other() {}
\ No newline at end of file
"#;
let ranges = parse_patch(diff).unwrap();
assert_eq!(ranges.len(), 1);
assert_eq!(ranges.get("src/lib.rs"), Some(&vec![1..=2]));
}
#[test]
fn test_parse_patch_empty() {
let ranges = parse_patch("").unwrap();
assert!(ranges.is_empty());
}
#[test]
fn test_parse_patch_binary_files_marker() {
let diff = r#"diff --git a/assets/logo.png b/assets/logo.png
index 1111111..2222222
Binary files a/assets/logo.png and b/assets/logo.png differ
"#;
let result = parse_patch_with_meta(diff).unwrap();
assert!(result.changed_ranges.is_empty());
assert_eq!(result.binary_files, vec!["assets/logo.png".to_string()]);
}
#[test]
fn test_parse_patch_binary_files_marker_dev_null() {
let diff = r#"diff --git a/assets/logo.png b/assets/logo.png
index 1111111..2222222
Binary files a/assets/logo.png and /dev/null differ
"#;
let result = parse_patch_with_meta(diff).unwrap();
assert!(result.changed_ranges.is_empty());
assert!(result.binary_files.is_empty());
}
#[test]
fn test_parse_patch_binary_files_marker_without_and() {
let diff = r#"diff --git a/assets/logo.png b/assets/logo.png
index 1111111..2222222
Binary files a/assets/logo.png differ
"#;
let result = parse_patch_with_meta(diff).unwrap();
assert!(result.changed_ranges.is_empty());
assert!(result.binary_files.is_empty());
}
#[test]
fn test_parse_patch_git_binary_patch_marker() {
let diff = r#"diff --git a/assets/data.bin b/assets/data.bin
index 1111111..2222222
GIT binary patch
literal 0
HcmV?d00001
"#;
let result = parse_patch_with_meta(diff).unwrap();
assert!(result.changed_ranges.is_empty());
assert_eq!(result.binary_files, vec!["assets/data.bin".to_string()]);
}
#[test]
fn test_parse_patch_malformed_hunk_header_returns_error() {
let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
index 1111111..2222222 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,1 @@
+line
"#;
let result = parse_patch(diff);
assert!(result.is_err());
}
#[test]
fn test_parse_patch_empty_line_in_hunk() {
let diff = "diff --git a/src/lib.rs b/src/lib.rs\n\
index 1111111..2222222 100644\n\
--- a/src/lib.rs\n\
+++ b/src/lib.rs\n\
@@ -1,1 +1,3 @@\n\
+line1\n\
\n\
+line2\n";
let ranges = parse_patch(diff).unwrap();
assert_eq!(ranges.get("src/lib.rs"), Some(&vec![1..=1, 3..=3]));
}
#[test]
fn test_parse_hunk_header_with_counts() {
let line = "@@ -10,5 +20,8 @@ fn context()";
assert_eq!(parse_hunk_header(line), Some(20));
}
#[test]
fn test_parse_hunk_header_without_counts() {
let line = "@@ -1 +1 @@";
assert_eq!(parse_hunk_header(line), Some(1));
}
#[test]
fn test_parse_hunk_header_missing_plus_returns_none() {
let line = "@@ -10,5 @@ fn context()";
assert_eq!(parse_hunk_header(line), None);
}
#[test]
fn test_parse_hunk_header_new_file() {
let line = "@@ -0,0 +1,3 @@";
assert_eq!(parse_hunk_header(line), Some(1));
}
#[test]
fn test_parse_patch_mixed_additions_deletions() {
let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
index 1111111..2222222 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,6 @@
fn main() {
- old_code();
+ new_code();
+ extra_code();
common();
}
"#;
let ranges = parse_patch(diff).unwrap();
assert_eq!(ranges.len(), 1);
assert_eq!(ranges.get("src/lib.rs"), Some(&vec![2..=3]));
}
#[test]
fn test_parse_fixture_simple_added_patch() {
let fixture_content = r#"diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..1111111
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,3 @@
+pub fn add(a: i32, b: i32) -> i32 {
+ a + b
+}
"#;
let ranges = parse_patch(fixture_content).unwrap();
assert_eq!(ranges.len(), 1);
assert_eq!(ranges.get("src/lib.rs"), Some(&vec![1..=3]));
}
#[test]
fn test_parse_patch_context_without_leading_space() {
let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
index 1111111..2222222 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,4 @@
fn main() {
+ println!("added");
}
"#;
let ranges = parse_patch(diff).unwrap();
assert_eq!(ranges.len(), 1);
assert_eq!(ranges.get("src/lib.rs"), Some(&vec![2..=2]));
}
#[test]
fn test_load_diff_from_git_bad_repo_path_returns_io_error() {
let temp = std::env::temp_dir().join(format!(
"covguard-diff-adapter-missing-{}",
std::process::id()
));
let err = load_diff_from_git("HEAD~1", "HEAD", &temp).expect_err("expected error");
assert!(matches!(err, DiffError::IoError(_)));
}
#[test]
fn test_load_diff_from_git_success_in_temp_repo() {
let unique = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.expect("time")
.as_nanos();
let root = std::env::temp_dir().join(format!("covguard-diff-adapter-{unique}"));
fs::create_dir_all(&root).expect("create temp dir");
let init = std::process::Command::new("git")
.current_dir(&root)
.args(["init"])
.output()
.expect("git init");
assert!(init.status.success());
let user_name = std::process::Command::new("git")
.current_dir(&root)
.args(["config", "user.name", "covguard-test"])
.output()
.expect("git config user.name");
assert!(user_name.status.success());
let user_email = std::process::Command::new("git")
.current_dir(&root)
.args(["config", "user.email", "covguard-test@example.com"])
.output()
.expect("git config user.email");
assert!(user_email.status.success());
fs::write(root.join("a.txt"), "line1\n").expect("write initial file");
let add_first = std::process::Command::new("git")
.current_dir(&root)
.args(["add", "a.txt"])
.output()
.expect("git add first");
assert!(add_first.status.success());
let commit_first = std::process::Command::new("git")
.current_dir(&root)
.args(["commit", "-m", "first"])
.output()
.expect("git commit first");
assert!(commit_first.status.success());
fs::write(root.join("a.txt"), "line1\nline2\n").expect("write changed file");
let add_second = std::process::Command::new("git")
.current_dir(&root)
.args(["add", "a.txt"])
.output()
.expect("git add second");
assert!(add_second.status.success());
let commit_second = std::process::Command::new("git")
.current_dir(&root)
.args(["commit", "-m", "second"])
.output()
.expect("git commit second");
assert!(commit_second.status.success());
let diff = load_diff_from_git("HEAD~1", "HEAD", &root).expect("load diff");
assert!(diff.contains("diff --git"));
assert!(diff.contains("+++ b/a.txt"));
let _ = fs::remove_dir_all(&root);
}
#[test]
fn test_git_diff_provider_parse_patch() {
let provider = GitDiffProvider;
let diff = r#"diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,1 @@
+fn main() {}
"#;
let parsed = provider.parse_patch(diff).expect("parse via provider");
assert_eq!(parsed.changed_ranges.get("src/lib.rs"), Some(&vec![1..=1]));
assert!(parsed.binary_files.is_empty());
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn normalize_path_never_panics(path in ".*") {
let _ = normalize_path(&path);
}
#[test]
fn normalize_path_removes_leading_b_prefix(suffix in "[a-z]+") {
prop_assume!(!suffix.starts_with("b"));
let path = format!("b/{}", suffix);
let normalized = normalize_path(&path);
prop_assert!(!normalized.starts_with("b/"), "Should remove b/ prefix from {}", path);
}
}
}