use regex::Regex;
use std::{collections::HashMap, ops::Range, path::PathBuf};
use crate::{FileDiffLines, FileFilter, LinesChangedOnly, error::DiffError};
pub struct DiffHunkHeader {
pub old_start: u32,
pub old_lines: u32,
pub new_start: u32,
pub new_lines: u32,
}
fn get_filename_from_front_matter(front_matter: &str) -> Result<Option<&str>, DiffError> {
let diff_file_name = Regex::new(r"(?m)^\+\+\+\sb?/(.*)$")?;
let diff_renamed_file = Regex::new(r"(?m)^rename to (.*)$")?;
let diff_binary_file = Regex::new(r"(?m)^Binary\sfiles\s")?;
if let Some(captures) = diff_file_name.captures(front_matter)
&& let Some(name) = captures.get(1)
{
return Ok(Some(name.as_str()));
}
if front_matter.starts_with("similarity")
&& let Some(captures) = diff_renamed_file.captures(front_matter)
&& let Some(name) = captures.get(1)
{
return Ok(Some(name.as_str()));
}
if !diff_binary_file.is_match(front_matter) {
log::warn!("Unrecognized diff starting with:\n{}", front_matter);
}
Ok(None)
}
static HUNK_INFO_PATTERN: &str = r"(?m)@@\s\-\d+,?\d*\s\+(\d+),?(\d*)\s@@";
fn parse_patch(patch: &str) -> Result<(Vec<u32>, Vec<Range<u32>>), DiffError> {
let mut diff_hunks = Vec::new();
let mut additions = Vec::new();
let hunk_info = Regex::new(HUNK_INFO_PATTERN)?;
let hunk_headers = hunk_info.captures_iter(patch).collect::<Vec<_>>();
if !hunk_headers.is_empty() {
let hunks = hunk_info.split(patch).skip(1);
for (hunk, header) in hunks.zip(hunk_headers) {
let [start_line, end_range] = header.extract().1.map(|v| v.parse::<u32>().unwrap_or(1));
let mut line_numb_in_diff = start_line;
diff_hunks.push(start_line..start_line + end_range);
for (line_index, line) in hunk.split('\n').enumerate() {
if line.starts_with('+') {
additions.push(line_numb_in_diff);
}
if line_index > 0 && !line.starts_with('-') {
line_numb_in_diff += 1;
}
}
}
}
Ok((additions, diff_hunks))
}
pub fn parse_diff(
diff: &str,
file_filter: &FileFilter,
lines_changed_only: &LinesChangedOnly,
) -> Result<HashMap<String, FileDiffLines>, DiffError> {
let mut results = HashMap::new();
let diff_file_delimiter = Regex::new(r"(?m)^diff \-\-git a/.*$")?;
let hunk_info = Regex::new(HUNK_INFO_PATTERN)?;
let file_diffs = diff_file_delimiter.split(diff);
for file_diff in file_diffs {
if file_diff.is_empty() || file_diff.starts_with("deleted file") {
continue;
}
let hunk_start = if let Some(first_hunk) = hunk_info.find(file_diff) {
first_hunk.start()
} else {
file_diff.len()
};
let front_matter = &file_diff[..hunk_start];
if let Some(file_name) = get_filename_from_front_matter(front_matter.trim_start())? {
let file_name = file_name.strip_prefix('/').unwrap_or(file_name);
let file_path = PathBuf::from(file_name);
if file_filter.is_qualified(&file_path) {
let (added_lines, diff_hunks) = parse_patch(&file_diff[hunk_start..])?;
if lines_changed_only
.is_change_valid(!added_lines.is_empty(), !diff_hunks.is_empty())
{
results
.entry(file_name.to_string())
.or_insert_with(|| FileDiffLines::with_info(added_lines, diff_hunks));
}
}
}
}
Ok(results)
}
#[cfg(test)]
mod test {
#![allow(clippy::unwrap_used)]
use super::parse_diff;
use crate::{FileFilter, LinesChangedOnly};
const RENAMED_DIFF: &str = r#"diff --git a/tests/demo/some source.cpp b/tests/demo/some source.c
similarity index 100%
rename from /tests/demo/some source.cpp
rename to /tests/demo/some source.c
diff --git a/some picture.png b/some picture.png
new file mode 100644
Binary files /dev/null and b/some picture.png differ
"#;
#[test]
fn parse_renamed_diff() {
let files = parse_diff(
RENAMED_DIFF,
&FileFilter::new(&[], &["c"], None),
&LinesChangedOnly::Off,
)
.unwrap();
let git_file = files.get("tests/demo/some source.c").unwrap();
assert!(git_file.added_lines.is_empty());
assert!(git_file.diff_hunks.is_empty());
}
#[test]
fn parse_renamed_only_diff() {
let files = parse_diff(
RENAMED_DIFF,
&FileFilter::new(&[], &["c"], None),
&LinesChangedOnly::Diff,
)
.unwrap();
assert!(files.is_empty());
}
const RENAMED_DIFF_WITH_CHANGES: &str = r#"diff --git a/tests/demo/some source.cpp b/tests/demo/some source.c
similarity index 99%
rename from /tests/demo/some source.cpp
rename to /tests/demo/some source.c
@@ -3,7 +3,7 @@
\n \n \n-#include "math.h"
+#include <math.h>\n \n \n \n"#;
#[test]
fn parse_renamed_diff_with_patch() {
let files = parse_diff(
&String::from_iter([RENAMED_DIFF_WITH_CHANGES, TERSE_HEADERS]),
&FileFilter::new(&["src/*"], &["c", "cpp"], None),
&LinesChangedOnly::On,
)
.unwrap();
eprintln!("files: {files:#?}");
let git_file = files.get("tests/demo/some source.c").unwrap();
assert!(!git_file.is_line_in_diff(&1));
assert!(git_file.is_line_in_diff(&4));
}
const TYPICAL_DIFF: &str = "diff --git a/path/for/Some file.cpp b/path/to/Some file.cpp\n\
--- a/path/for/Some file.cpp\n\
+++ b/path/to/Some file.cpp\n\
@@ -3,7 +3,7 @@\n \n \n \n\
-#include <some_lib/render/animation.hpp>\n\
+#include <some_lib/render/animations.hpp>\n \n \n \n";
#[test]
fn parse_typical_diff() {
let files = parse_diff(
TYPICAL_DIFF,
&FileFilter::new(&[], &["cpp"], None),
&LinesChangedOnly::On,
)
.unwrap();
assert!(!files.is_empty());
}
const BINARY_DIFF: &str = "diff --git a/some picture.png b/some picture.png\n\
new file mode 100644\n\
Binary files /dev/null and b/some picture.png differ\n";
#[test]
fn parse_binary_diff() {
let files = parse_diff(
BINARY_DIFF,
&FileFilter::new(&[], &["png"], None),
&LinesChangedOnly::Diff,
)
.unwrap();
assert!(files.is_empty());
}
const TERSE_HEADERS: &str = r#"diff --git a/src/demo.cpp b/src/demo.cpp
--- a/src/demo.cpp
+++ b/src/demo.cpp
@@ -3 +3 @@
-#include <stdio.h>
+#include "stdio.h"
@@ -4,0 +5,2 @@
+auto main() -> int
+{
@@ -18 +17,2 @@ int main(){
- return 0;}
+ return 0;
+}"#;
#[test]
fn terse_hunk_header() {
let file_filter = FileFilter::new(&[], &["cpp"], None);
let files = parse_diff(TERSE_HEADERS, &file_filter, &LinesChangedOnly::Diff).unwrap();
let file_diff = files.get("src/demo.cpp").unwrap();
assert_eq!(file_diff.diff_hunks, vec![3..4, 5..7, 17..19]);
}
}