#[derive(Debug, Clone)]
pub enum DiffSpec {
WorkingTree,
Rev(String),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[non_exhaustive]
pub struct DiffStat {
pub files_changed: usize,
pub insertions: usize,
pub deletions: usize,
}
impl DiffStat {
pub fn new(files_changed: usize, insertions: usize, deletions: usize) -> Self {
Self {
files_changed,
insertions,
deletions,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[non_exhaustive]
pub enum ChangeKind {
Added,
Modified,
Deleted,
Renamed,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[non_exhaustive]
pub enum DiffLine {
Context(String),
Added(String),
Removed(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[non_exhaustive]
pub struct Hunk {
pub old_start: usize,
pub old_lines: usize,
pub new_start: usize,
pub new_lines: usize,
pub section: String,
pub lines: Vec<DiffLine>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[non_exhaustive]
pub struct FileDiff {
pub change: ChangeKind,
pub path: String,
pub old_path: Option<String>,
pub hunks: Vec<Hunk>,
pub raw: String,
}
pub fn parse_diff(diff: &str) -> Vec<FileDiff> {
diff_sections(diff).filter_map(parse_section).collect()
}
fn diff_sections(full: &str) -> impl Iterator<Item = &str> {
let mut bounds = Vec::new();
let mut idx = 0;
for line in full.split_inclusive('\n') {
if line.starts_with("diff --git ") {
bounds.push(idx);
}
idx += line.len();
}
let ends = bounds
.iter()
.skip(1)
.copied()
.chain(std::iter::once(full.len()));
bounds
.clone()
.into_iter()
.zip(ends)
.map(move |(s, e)| &full[s..e])
.collect::<Vec<_>>()
.into_iter()
}
fn parse_section(section: &str) -> Option<FileDiff> {
let mut kind = ChangeKind::Modified;
let mut new_path = None;
let mut minus_path = None;
let mut rename_to = None;
let mut rename_from = None;
let mut hunks: Vec<Hunk> = Vec::new();
let mut current: Option<Hunk> = None;
for line in section.lines() {
if let Some(hunk) = parse_hunk_header(line) {
if let Some(done) = current.replace(hunk) {
hunks.push(done);
}
continue;
}
if let Some(hunk) = current.as_mut() {
match line.as_bytes().first() {
Some(b' ') => hunk.lines.push(DiffLine::Context(line[1..].to_string())),
Some(b'+') => hunk.lines.push(DiffLine::Added(line[1..].to_string())),
Some(b'-') => hunk.lines.push(DiffLine::Removed(line[1..].to_string())),
_ => {}
}
continue;
}
if line.starts_with("new file") {
kind = ChangeKind::Added;
} else if line.starts_with("deleted file") {
kind = ChangeKind::Deleted;
} else if let Some(p) = line.strip_prefix("rename to ") {
rename_to = Some(unquote_git_path(p.trim_end()));
} else if let Some(p) = line.strip_prefix("rename from ") {
rename_from = Some(unquote_git_path(p.trim_end()));
} else if let Some(rest) = line.strip_prefix("+++ ") {
new_path = unquote_git_path(rest.trim_end())
.strip_prefix("b/")
.map(str::to_string);
} else if let Some(rest) = line.strip_prefix("--- ") {
minus_path = unquote_git_path(rest.trim_end())
.strip_prefix("a/")
.map(str::to_string);
}
}
if let Some(done) = current.take() {
hunks.push(done);
}
let normalize = |p: String| p.replace('\\', "/");
let old_path = if rename_to.is_some() {
kind = ChangeKind::Renamed;
rename_from.map(normalize)
} else {
None
};
let path = [rename_to, new_path, minus_path]
.into_iter()
.flatten()
.find(|p| !p.is_empty())
.or_else(|| header_b_path(section))?;
Some(FileDiff {
change: kind,
path: normalize(path),
old_path,
hunks,
raw: section.to_string(),
})
}
fn parse_hunk_header(line: &str) -> Option<Hunk> {
let rest = line.strip_prefix("@@ ")?;
let (ranges, section) = rest.split_once(" @@")?;
let mut parts = ranges.split_whitespace();
let (old_start, old_lines) = parse_hunk_range(parts.next()?.strip_prefix('-')?);
let (new_start, new_lines) = parse_hunk_range(parts.next()?.strip_prefix('+')?);
Some(Hunk {
old_start,
old_lines,
new_start,
new_lines,
section: section.strip_prefix(' ').unwrap_or(section).to_string(),
lines: Vec::new(),
})
}
fn parse_hunk_range(range: &str) -> (usize, usize) {
match range.split_once(',') {
Some((start, count)) => (start.parse().unwrap_or(0), count.parse().unwrap_or(0)),
None => (range.parse().unwrap_or(0), 1),
}
}
fn header_b_path(section: &str) -> Option<String> {
let first = section.lines().next()?;
let s = first.strip_prefix("diff --git ")?;
let path = if let Some(q) = s.rfind("\"b/") {
unquote_git_path(&s[q..])
.strip_prefix("b/")
.unwrap_or("")
.to_string()
} else {
let idx = s.find(" b/")?;
s[idx + 1..].strip_prefix("b/").unwrap_or("").to_string()
};
(!path.is_empty()).then_some(path)
}
fn unquote_git_path(s: &str) -> String {
let bytes = s.as_bytes();
if bytes.first() != Some(&b'"') {
return s.to_string();
}
let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
let mut i = 1; while i < bytes.len() {
match bytes[i] {
b'"' => break, b'\\' if i + 1 < bytes.len() => {
i += 1;
match bytes[i] {
b'a' => out.push(0x07),
b'b' => out.push(0x08),
b't' => out.push(b'\t'),
b'n' => out.push(b'\n'),
b'v' => out.push(0x0b),
b'f' => out.push(0x0c),
b'r' => out.push(b'\r'),
b'"' => out.push(b'"'),
b'\\' => out.push(b'\\'),
d @ b'0'..=b'7' => {
let mut val = u32::from(d - b'0');
let mut taken = 0;
while taken < 2
&& i + 1 < bytes.len()
&& (b'0'..=b'7').contains(&bytes[i + 1])
{
i += 1;
val = val * 8 + u32::from(bytes[i] - b'0');
taken += 1;
}
out.push(val as u8);
}
other => out.push(other), }
i += 1;
}
b => {
out.push(b);
i += 1;
}
}
}
String::from_utf8_lossy(&out).into_owned()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn diff_covers_add_modify_delete_rename() {
let full = concat!(
"diff --git a/new b/new\n",
"new file mode 100644\n--- /dev/null\n+++ b/new\n@@ -0,0 +1 @@\n+n\n",
"diff --git a/mod b/mod\n",
"--- a/mod\n+++ b/mod\n@@ -1 +1 @@\n-a\n+b\n",
"diff --git a/gone b/gone\n",
"deleted file mode 100644\n--- a/gone\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n",
"diff --git a/old/f.txt b/new/f.txt\n",
"similarity index 100%\nrename from old/f.txt\nrename to new/f.txt\n",
);
let files = parse_diff(full);
let kinds: Vec<_> = files.iter().map(|f| (f.path.as_str(), f.change)).collect();
assert_eq!(
kinds,
vec![
("new", ChangeKind::Added),
("mod", ChangeKind::Modified),
("gone", ChangeKind::Deleted),
("new/f.txt", ChangeKind::Renamed),
]
);
let rename = files
.iter()
.find(|f| f.change == ChangeKind::Renamed)
.unwrap();
assert_eq!(rename.old_path.as_deref(), Some("old/f.txt"));
}
#[test]
fn diff_handles_space_paths() {
let full = "diff --git a/a b/c.txt b/a b/c.txt\n--- a/a b/c.txt\t\n+++ b/a b/c.txt\t\n@@ -1 +1 @@\n-x\n+y\n";
let files = parse_diff(full);
assert_eq!(files.len(), 1);
assert_eq!(files[0].path, "a b/c.txt");
}
#[test]
fn diff_unquotes_non_ascii_modify() {
let full = concat!(
"diff --git \"a/caf\\303\\251.txt\" \"b/caf\\303\\251.txt\"\n",
"index 45b983b..b023018 100644\n",
"--- \"a/caf\\303\\251.txt\"\n",
"+++ \"b/caf\\303\\251.txt\"\n",
"@@ -1 +1 @@\n-hi\n+bye\n",
);
let files = parse_diff(full);
assert_eq!(files.len(), 1, "the non-ASCII file must not be dropped");
assert_eq!(files[0].path, "café.txt");
assert_eq!(files[0].change, ChangeKind::Modified);
}
#[test]
fn diff_unquotes_non_ascii_rename() {
let full = concat!(
"diff --git \"a/caf\\303\\251.txt\" \"b/r\\303\\251sum\\303\\251.txt\"\n",
"similarity index 100%\n",
"rename from \"caf\\303\\251.txt\"\n",
"rename to \"r\\303\\251sum\\303\\251.txt\"\n",
);
let files = parse_diff(full);
assert_eq!(files.len(), 1);
assert_eq!(files[0].path, "résumé.txt");
assert_eq!(files[0].change, ChangeKind::Renamed);
assert_eq!(files[0].old_path.as_deref(), Some("café.txt"));
}
#[test]
fn diff_unquotes_quoted_header_fallback() {
let full = concat!(
"diff --git \"a/caf\\303\\251.bin\" \"b/caf\\303\\251.bin\"\n",
"index 0000000..1111111 100644\n",
"Binary files \"a/caf\\303\\251.bin\" and \"b/caf\\303\\251.bin\" differ\n",
);
let files = parse_diff(full);
assert_eq!(files.len(), 1);
assert_eq!(files[0].path, "café.bin");
}
#[test]
fn diff_unquotes_escaped_tab_path() {
let full = "diff --git \"a/a\\tb.txt\" \"b/a\\tb.txt\"\n--- \"a/a\\tb.txt\"\n+++ \"b/a\\tb.txt\"\n@@ -1 +1 @@\n-x\n+y\n";
let files = parse_diff(full);
assert_eq!(files.len(), 1);
assert_eq!(files[0].path, "a\tb.txt");
}
#[test]
fn unquote_git_path_decodes_escapes_and_passes_through_plain() {
assert_eq!(unquote_git_path("b/plain.txt"), "b/plain.txt"); assert_eq!(unquote_git_path("\"b/caf\\303\\251.txt\""), "b/café.txt"); assert_eq!(unquote_git_path("\"a\\tb\""), "a\tb"); assert_eq!(unquote_git_path("\"a\\\\b\""), "a\\b"); assert_eq!(unquote_git_path("\"a\\\"b\""), "a\"b"); }
#[test]
fn diff_drops_sections_with_no_resolvable_path() {
let bad = "diff --git a/x b/\nbinary files differ\n";
assert!(parse_diff(bad).is_empty());
let recover = "diff --git a/real.txt b/real.txt\n+++ b/\nbinary files differ\n";
let files = parse_diff(recover);
assert_eq!(files.len(), 1);
assert_eq!(files[0].path, "real.txt");
let mode_only = "diff --git a/f.sh b/f.sh\nold mode 100644\nnew mode 100755\n";
let files = parse_diff(mode_only);
assert_eq!(files.len(), 1);
assert_eq!(files[0].path, "f.sh");
}
#[test]
fn diff_parses_hunk_ranges_and_body() {
let full = "diff --git a/f b/f\n--- a/f\n+++ b/f\n@@ -1,2 +1,3 @@ fn main()\n ctx\n-old\n+new\n+added\n";
let files = parse_diff(full);
assert_eq!(files.len(), 1);
assert_eq!(files[0].raw, full);
let hunk = &files[0].hunks[0];
assert_eq!(
(
hunk.old_start,
hunk.old_lines,
hunk.new_start,
hunk.new_lines
),
(1, 2, 1, 3)
);
assert_eq!(hunk.section, "fn main()");
assert_eq!(
hunk.lines,
vec![
DiffLine::Context("ctx".into()),
DiffLine::Removed("old".into()),
DiffLine::Added("new".into()),
DiffLine::Added("added".into()),
]
);
}
#[test]
fn diff_omitted_count_defaults_to_one() {
let full = "diff --git a/f b/f\n--- a/f\n+++ b/f\n@@ -3 +3 @@\n-a\n+b\n";
let hunk = &parse_diff(full)[0].hunks[0];
assert_eq!((hunk.old_start, hunk.old_lines), (3, 1));
assert_eq!((hunk.new_start, hunk.new_lines), (3, 1));
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
fn diff_line() -> impl Strategy<Value = String> {
prop_oneof![
Just("diff --git a/f b/f\n".to_string()),
Just("--- a/f\n".to_string()),
Just("+++ b/f\n".to_string()),
Just("@@ -1,2 +3,4 @@ ctx\n".to_string()),
Just("@@ -1 +1 @@\n".to_string()),
Just("new file mode 100644\n".to_string()),
Just("deleted file mode 100644\n".to_string()),
Just("rename from {old => new}.rs\n".to_string()),
Just("rename to é/r.rs\n".to_string()),
"[-+ ]?[a-zé\t]{0,12}\n", ]
}
fn diff_doc() -> impl Strategy<Value = String> {
prop::collection::vec(diff_line(), 0..40).prop_map(|lines| lines.concat())
}
proptest! {
#[test]
fn parse_diff_never_panics_on_arbitrary_text(s in any::<String>()) {
let _ = parse_diff(&s);
}
#[test]
fn parse_diff_never_panics_on_structured_text(s in diff_doc()) {
let _ = parse_diff(&s);
}
#[test]
fn parse_diff_sections_are_well_formed(s in diff_doc()) {
for file in parse_diff(&s) {
prop_assert!(file.raw.starts_with("diff --git"));
}
}
}
}
#[cfg(all(test, feature = "serde"))]
mod serde_tests {
use super::*;
#[test]
fn diff_stat_and_change_kind_serialize() {
assert_eq!(
serde_json::to_value(DiffStat::new(3, 12, 4)).unwrap(),
serde_json::json!({"files_changed": 3, "insertions": 12, "deletions": 4})
);
assert_eq!(
serde_json::to_value(ChangeKind::Renamed).unwrap(),
serde_json::json!("Renamed")
);
}
}