use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Attribution {
InChangedHunk,
InChangedFile,
InUnchangedFile,
}
pub struct DiffHunks {
hunks: HashMap<PathBuf, Vec<(u32, u32)>>,
changed_files: HashSet<PathBuf>,
renames: HashMap<PathBuf, PathBuf>,
}
const HUNK_MARGIN: u32 = 3;
impl DiffHunks {
pub fn from_git_diff(repo_path: &Path, base_ref: &str) -> anyhow::Result<Self> {
let output = std::process::Command::new("git")
.args(["diff", "-U0", &format!("{base_ref}..HEAD")])
.current_dir(repo_path)
.output()
.map_err(|e| anyhow::anyhow!("Failed to run git diff: {e}"))?;
if !output.status.success() {
return Ok(Self {
hunks: HashMap::new(),
changed_files: HashSet::new(),
renames: HashMap::new(),
});
}
let diff_text = String::from_utf8_lossy(&output.stdout);
Ok(Self::parse_diff(&diff_text))
}
pub fn parse_diff(diff_text: &str) -> Self {
let mut hunks: HashMap<PathBuf, Vec<(u32, u32)>> = HashMap::new();
let mut changed_files: HashSet<PathBuf> = HashSet::new();
let mut renames: HashMap<PathBuf, PathBuf> = HashMap::new();
let mut current_file: Option<PathBuf> = None;
let mut pending_rename_from: Option<PathBuf> = None;
for line in diff_text.lines() {
if line.starts_with("diff --git ") {
pending_rename_from = None;
}
if let Some(old) = line
.strip_prefix("rename from ")
.or_else(|| line.strip_prefix("copy from "))
{
pending_rename_from = Some(PathBuf::from(old));
} else if let Some(new) = line
.strip_prefix("rename to ")
.or_else(|| line.strip_prefix("copy to "))
{
if let Some(old) = pending_rename_from.take() {
let new_path = PathBuf::from(new);
changed_files.insert(new_path.clone());
renames.insert(old, new_path);
}
} else if let Some(path) = line.strip_prefix("+++ b/") {
let p = PathBuf::from(path);
changed_files.insert(p.clone());
current_file = Some(p);
} else if line.starts_with("--- ") {
if let Some(path) = line.strip_prefix("--- a/") {
changed_files.insert(PathBuf::from(path));
}
} else if line.starts_with("@@ ") {
if let Some(ref file) = current_file {
if let Some((start, count)) = parse_hunk_header(line) {
let end = if count == 0 {
start } else {
start + count - 1
};
if count > 0 {
hunks.entry(file.clone()).or_default().push((start, end));
}
}
}
}
}
Self {
hunks,
changed_files,
renames,
}
}
pub fn attribute(&self, file: &Path, line: Option<u32>) -> Attribution {
let effective = self.renames.get(file).map(|p| p.as_path()).unwrap_or(file);
if !self.changed_files.contains(effective) {
return Attribution::InUnchangedFile;
}
let line = match line {
Some(l) => l,
None => return Attribution::InChangedFile,
};
if let Some(file_hunks) = self.hunks.get(effective) {
for &(start, end) in file_hunks {
let expanded_start = start.saturating_sub(HUNK_MARGIN);
let expanded_end = end.saturating_add(HUNK_MARGIN);
if line >= expanded_start && line <= expanded_end {
return Attribution::InChangedHunk;
}
}
}
Attribution::InChangedFile
}
pub fn changed_file_count(&self) -> usize {
self.changed_files.len()
}
}
fn parse_hunk_header(line: &str) -> Option<(u32, u32)> {
let plus_idx = line.find('+')?;
let after_plus = &line[plus_idx + 1..];
let end_idx = after_plus.find(" @@").unwrap_or(after_plus.len());
let range_str = &after_plus[..end_idx];
if let Some((start_str, count_str)) = range_str.split_once(',') {
let start = start_str.parse::<u32>().ok()?;
let count = count_str.parse::<u32>().ok()?;
Some((start, count))
} else {
let start = range_str.parse::<u32>().ok()?;
Some((start, 1)) }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_hunk_header_with_count() {
assert_eq!(
parse_hunk_header("@@ -10,5 +20,3 @@ fn foo()"),
Some((20, 3))
);
}
#[test]
fn test_parse_hunk_header_without_count() {
assert_eq!(parse_hunk_header("@@ -10 +20 @@"), Some((20, 1)));
}
#[test]
fn test_parse_hunk_header_zero_count() {
assert_eq!(parse_hunk_header("@@ -10,3 +20,0 @@"), Some((20, 0)));
}
#[test]
fn test_parse_diff_single_file() {
let diff = "\
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,3 +10,5 @@ fn main() {
+ let x = 1;
+ let y = 2;
";
let hunks = DiffHunks::parse_diff(diff);
assert!(hunks.changed_files.contains(&PathBuf::from("src/main.rs")));
let file_hunks = hunks.hunks.get(&PathBuf::from("src/main.rs")).unwrap();
assert_eq!(file_hunks, &[(10, 14)]); }
#[test]
fn test_parse_diff_multiple_hunks() {
let diff = "\
diff --git a/src/api.rs b/src/api.rs
--- a/src/api.rs
+++ b/src/api.rs
@@ -5,2 +5,3 @@ fn handler() {
+ new_line();
@@ -50,1 +51,4 @@ fn query() {
+ more();
+ code();
+ here();
";
let hunks = DiffHunks::parse_diff(diff);
let file_hunks = hunks.hunks.get(&PathBuf::from("src/api.rs")).unwrap();
assert_eq!(file_hunks.len(), 2);
assert_eq!(file_hunks[0], (5, 7)); assert_eq!(file_hunks[1], (51, 54)); }
#[test]
fn test_attribute_in_changed_hunk() {
let diff = "\
diff --git a/src/api.rs b/src/api.rs
--- a/src/api.rs
+++ b/src/api.rs
@@ -10,2 +10,5 @@ fn handler() {
";
let hunks = DiffHunks::parse_diff(diff);
assert_eq!(
hunks.attribute(Path::new("src/api.rs"), Some(12)),
Attribution::InChangedHunk
);
}
#[test]
fn test_attribute_in_changed_hunk_with_margin() {
let diff = "\
diff --git a/src/api.rs b/src/api.rs
--- a/src/api.rs
+++ b/src/api.rs
@@ -10,2 +10,5 @@ fn handler() {
";
let hunks = DiffHunks::parse_diff(diff);
assert_eq!(
hunks.attribute(Path::new("src/api.rs"), Some(17)),
Attribution::InChangedHunk
);
assert_eq!(
hunks.attribute(Path::new("src/api.rs"), Some(18)),
Attribution::InChangedFile
);
}
#[test]
fn test_attribute_in_changed_file() {
let diff = "\
diff --git a/src/api.rs b/src/api.rs
--- a/src/api.rs
+++ b/src/api.rs
@@ -10,2 +10,5 @@ fn handler() {
";
let hunks = DiffHunks::parse_diff(diff);
assert_eq!(
hunks.attribute(Path::new("src/api.rs"), Some(100)),
Attribution::InChangedFile
);
}
#[test]
fn test_attribute_in_unchanged_file() {
let diff = "\
diff --git a/src/api.rs b/src/api.rs
--- a/src/api.rs
+++ b/src/api.rs
@@ -10,2 +10,5 @@ fn handler() {
";
let hunks = DiffHunks::parse_diff(diff);
assert_eq!(
hunks.attribute(Path::new("src/other.rs"), Some(10)),
Attribution::InUnchangedFile
);
}
#[test]
fn test_attribute_no_line_number() {
let diff = "\
diff --git a/src/api.rs b/src/api.rs
--- a/src/api.rs
+++ b/src/api.rs
@@ -10,2 +10,5 @@ fn handler() {
";
let hunks = DiffHunks::parse_diff(diff);
assert_eq!(
hunks.attribute(Path::new("src/api.rs"), None),
Attribution::InChangedFile
);
}
#[test]
fn test_empty_diff() {
let hunks = DiffHunks::parse_diff("");
assert_eq!(hunks.changed_file_count(), 0);
assert_eq!(
hunks.attribute(Path::new("any.rs"), Some(1)),
Attribution::InUnchangedFile
);
}
#[test]
fn test_parse_diff_rename_without_content_change() {
let diff = "\
diff --git a/src/old.rs b/src/new.rs
similarity index 100%
rename from src/old.rs
rename to src/new.rs
";
let hunks = DiffHunks::parse_diff(diff);
assert_eq!(
hunks.attribute(Path::new("src/old.rs"), Some(10)),
Attribution::InChangedFile
);
}
#[test]
fn test_parse_diff_rename_with_content_change() {
let diff = "\
diff --git a/src/old.rs b/src/new.rs
similarity index 80%
rename from src/old.rs
rename to src/new.rs
--- a/src/old.rs
+++ b/src/new.rs
@@ -5,0 +5,3 @@ fn foo() {
+ added();
+ lines();
+ here();
";
let hunks = DiffHunks::parse_diff(diff);
assert_eq!(
hunks.attribute(Path::new("src/old.rs"), Some(6)),
Attribution::InChangedHunk
);
assert_eq!(
hunks.attribute(Path::new("src/old.rs"), Some(100)),
Attribution::InChangedFile
);
}
}