#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[non_exhaustive]
pub struct DiffStat {
pub files_changed: usize,
pub insertions: usize,
pub deletions: usize,
}
impl DiffStat {
pub fn new(files_changed: usize, insertions: usize, deletions: usize) -> Self {
Self {
files_changed,
insertions,
deletions,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[non_exhaustive]
pub enum ChangeKind {
Added,
Modified,
Deleted,
Renamed,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[non_exhaustive]
pub enum DiffLine {
Context(String),
Added(String),
Removed(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[non_exhaustive]
pub struct Hunk {
pub old_start: usize,
pub old_lines: usize,
pub new_start: usize,
pub new_lines: usize,
pub section: String,
pub lines: Vec<DiffLine>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[non_exhaustive]
pub struct FileDiff {
pub change: ChangeKind,
pub path: String,
pub old_path: Option<String>,
pub hunks: Vec<Hunk>,
pub raw: String,
}
pub fn parse_diff(diff: &str) -> Vec<FileDiff> {
diff_sections(diff).filter_map(parse_section).collect()
}
fn diff_sections(full: &str) -> impl Iterator<Item = &str> {
let mut bounds = Vec::new();
let mut idx = 0;
for line in full.split_inclusive('\n') {
if line.starts_with("diff --git ") {
bounds.push(idx);
}
idx += line.len();
}
let ends = bounds
.iter()
.skip(1)
.copied()
.chain(std::iter::once(full.len()));
bounds
.clone()
.into_iter()
.zip(ends)
.map(move |(s, e)| &full[s..e])
.collect::<Vec<_>>()
.into_iter()
}
fn parse_section(section: &str) -> Option<FileDiff> {
let mut kind = ChangeKind::Modified;
let mut new_path = None;
let mut minus_path = None;
let mut rename_to = None;
let mut rename_from = None;
let mut hunks: Vec<Hunk> = Vec::new();
let mut current: Option<Hunk> = None;
for line in section.lines() {
if let Some(hunk) = parse_hunk_header(line) {
if let Some(done) = current.replace(hunk) {
hunks.push(done);
}
continue;
}
if let Some(hunk) = current.as_mut() {
match line.as_bytes().first() {
Some(b' ') => hunk.lines.push(DiffLine::Context(line[1..].to_string())),
Some(b'+') => hunk.lines.push(DiffLine::Added(line[1..].to_string())),
Some(b'-') => hunk.lines.push(DiffLine::Removed(line[1..].to_string())),
_ => {}
}
continue;
}
if line.starts_with("new file") {
kind = ChangeKind::Added;
} else if line.starts_with("deleted file") {
kind = ChangeKind::Deleted;
} else if let Some(p) = line.strip_prefix("rename to ") {
rename_to = Some(p.trim_end().to_string());
} else if let Some(p) = line.strip_prefix("rename from ") {
rename_from = Some(p.trim_end().to_string());
} else if let Some(p) = line.strip_prefix("+++ b/") {
new_path = Some(p.trim_end().to_string());
} else if let Some(p) = line.strip_prefix("--- a/") {
minus_path = Some(p.trim_end().to_string());
}
}
if let Some(done) = current.take() {
hunks.push(done);
}
let normalize = |p: String| p.replace('\\', "/");
let old_path = if rename_to.is_some() {
kind = ChangeKind::Renamed;
rename_from.map(normalize)
} else {
None
};
let path = rename_to
.or(new_path)
.or(minus_path)
.or_else(|| header_b_path(section))?;
Some(FileDiff {
change: kind,
path: normalize(path),
old_path,
hunks,
raw: section.to_string(),
})
}
fn parse_hunk_header(line: &str) -> Option<Hunk> {
let rest = line.strip_prefix("@@ ")?;
let (ranges, section) = rest.split_once(" @@")?;
let mut parts = ranges.split_whitespace();
let (old_start, old_lines) = parse_hunk_range(parts.next()?.strip_prefix('-')?);
let (new_start, new_lines) = parse_hunk_range(parts.next()?.strip_prefix('+')?);
Some(Hunk {
old_start,
old_lines,
new_start,
new_lines,
section: section.strip_prefix(' ').unwrap_or(section).to_string(),
lines: Vec::new(),
})
}
fn parse_hunk_range(range: &str) -> (usize, usize) {
match range.split_once(',') {
Some((start, count)) => (start.parse().unwrap_or(0), count.parse().unwrap_or(0)),
None => (range.parse().unwrap_or(0), 1),
}
}
fn header_b_path(section: &str) -> Option<String> {
let first = section.lines().next()?;
let s = first.strip_prefix("diff --git ")?;
let idx = s.find(" b/")?;
Some(s[idx + 1..].strip_prefix("b/").unwrap_or("").to_string())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn diff_covers_add_modify_delete_rename() {
let full = concat!(
"diff --git a/new b/new\n",
"new file mode 100644\n--- /dev/null\n+++ b/new\n@@ -0,0 +1 @@\n+n\n",
"diff --git a/mod b/mod\n",
"--- a/mod\n+++ b/mod\n@@ -1 +1 @@\n-a\n+b\n",
"diff --git a/gone b/gone\n",
"deleted file mode 100644\n--- a/gone\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n",
"diff --git a/old/f.txt b/new/f.txt\n",
"similarity index 100%\nrename from old/f.txt\nrename to new/f.txt\n",
);
let files = parse_diff(full);
let kinds: Vec<_> = files.iter().map(|f| (f.path.as_str(), f.change)).collect();
assert_eq!(
kinds,
vec![
("new", ChangeKind::Added),
("mod", ChangeKind::Modified),
("gone", ChangeKind::Deleted),
("new/f.txt", ChangeKind::Renamed),
]
);
let rename = files
.iter()
.find(|f| f.change == ChangeKind::Renamed)
.unwrap();
assert_eq!(rename.old_path.as_deref(), Some("old/f.txt"));
}
#[test]
fn diff_handles_space_paths() {
let full = "diff --git a/a b/c.txt b/a b/c.txt\n--- a/a b/c.txt\t\n+++ b/a b/c.txt\t\n@@ -1 +1 @@\n-x\n+y\n";
let files = parse_diff(full);
assert_eq!(files.len(), 1);
assert_eq!(files[0].path, "a b/c.txt");
}
#[test]
fn diff_parses_hunk_ranges_and_body() {
let full = "diff --git a/f b/f\n--- a/f\n+++ b/f\n@@ -1,2 +1,3 @@ fn main()\n ctx\n-old\n+new\n+added\n";
let files = parse_diff(full);
assert_eq!(files.len(), 1);
assert_eq!(files[0].raw, full);
let hunk = &files[0].hunks[0];
assert_eq!(
(
hunk.old_start,
hunk.old_lines,
hunk.new_start,
hunk.new_lines
),
(1, 2, 1, 3)
);
assert_eq!(hunk.section, "fn main()");
assert_eq!(
hunk.lines,
vec![
DiffLine::Context("ctx".into()),
DiffLine::Removed("old".into()),
DiffLine::Added("new".into()),
DiffLine::Added("added".into()),
]
);
}
#[test]
fn diff_omitted_count_defaults_to_one() {
let full = "diff --git a/f b/f\n--- a/f\n+++ b/f\n@@ -3 +3 @@\n-a\n+b\n";
let hunk = &parse_diff(full)[0].hunks[0];
assert_eq!((hunk.old_start, hunk.old_lines), (3, 1));
assert_eq!((hunk.new_start, hunk.new_lines), (3, 1));
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
fn diff_line() -> impl Strategy<Value = String> {
prop_oneof![
Just("diff --git a/f b/f\n".to_string()),
Just("--- a/f\n".to_string()),
Just("+++ b/f\n".to_string()),
Just("@@ -1,2 +3,4 @@ ctx\n".to_string()),
Just("@@ -1 +1 @@\n".to_string()),
Just("new file mode 100644\n".to_string()),
Just("deleted file mode 100644\n".to_string()),
Just("rename from {old => new}.rs\n".to_string()),
Just("rename to é/r.rs\n".to_string()),
"[-+ ]?[a-zé\t]{0,12}\n", ]
}
fn diff_doc() -> impl Strategy<Value = String> {
prop::collection::vec(diff_line(), 0..40).prop_map(|lines| lines.concat())
}
proptest! {
#[test]
fn parse_diff_never_panics_on_arbitrary_text(s in any::<String>()) {
let _ = parse_diff(&s);
}
#[test]
fn parse_diff_never_panics_on_structured_text(s in diff_doc()) {
let _ = parse_diff(&s);
}
#[test]
fn parse_diff_sections_are_well_formed(s in diff_doc()) {
for file in parse_diff(&s) {
prop_assert!(file.raw.starts_with("diff --git"));
}
}
}
}
#[cfg(all(test, feature = "serde"))]
mod serde_tests {
use super::*;
#[test]
fn diff_stat_and_change_kind_serialize() {
assert_eq!(
serde_json::to_value(DiffStat::new(3, 12, 4)).unwrap(),
serde_json::json!({"files_changed": 3, "insertions": 12, "deletions": 4})
);
assert_eq!(
serde_json::to_value(ChangeKind::Renamed).unwrap(),
serde_json::json!("Renamed")
);
}
}