use crate::looks_binary;
use serde::{Deserialize, Serialize};
use similar::{ChangeTag, TextDiff};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum BinaryReason {
NullByte,
NotUtf8,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DiffLineKind {
Delete,
Insert,
Equal,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DiffLine {
pub kind: DiffLineKind,
#[serde(skip_serializing_if = "Option::is_none")]
pub a_line: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub b_line: Option<u32>,
pub content: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DiffHunk {
pub a_start: u32,
pub a_count: u32,
pub b_start: u32,
pub b_count: u32,
pub lines: Vec<DiffLine>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FileDiffStatus {
Unchanged,
Added,
Deleted,
Modified,
Binary {
reason: BinaryReason,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FileDiff {
pub a_path: Option<String>,
pub b_path: Option<String>,
pub status: FileDiffStatus,
pub hunks: Vec<DiffHunk>,
pub additions: u32,
pub deletions: u32,
}
pub fn diff_blobs(
a: Option<&[u8]>,
b: Option<&[u8]>,
a_path: Option<&str>,
b_path: Option<&str>,
context_lines: usize,
) -> FileDiff {
let status = match (a, b) {
(None, None) => FileDiffStatus::Unchanged,
(None, Some(_)) => FileDiffStatus::Added,
(Some(_), None) => FileDiffStatus::Deleted,
(Some(_), Some(_)) => FileDiffStatus::Modified, };
let mut out = FileDiff {
a_path: a_path.map(str::to_string),
b_path: b_path.map(str::to_string),
status,
hunks: Vec::new(),
additions: 0,
deletions: 0,
};
let any_binary = a.map_or(false, looks_binary) || b.map_or(false, looks_binary);
if any_binary {
out.status = FileDiffStatus::Binary {
reason: BinaryReason::NullByte,
};
return out;
}
let a_text = match a {
Some(b) => match std::str::from_utf8(b) {
Ok(s) => Some(s.to_string()),
Err(_) => {
out.status = FileDiffStatus::Binary {
reason: BinaryReason::NotUtf8,
};
return out;
}
},
None => None,
};
let b_text = match b {
Some(b) => match std::str::from_utf8(b) {
Ok(s) => Some(s.to_string()),
Err(_) => {
out.status = FileDiffStatus::Binary {
reason: BinaryReason::NotUtf8,
};
return out;
}
},
None => None,
};
if a_text.as_deref() == b_text.as_deref() {
out.status = FileDiffStatus::Unchanged;
return out;
}
let a_str = a_text.as_deref().unwrap_or("");
let b_str = b_text.as_deref().unwrap_or("");
let diff = TextDiff::from_lines(a_str, b_str);
for group in diff.grouped_ops(context_lines) {
let mut hunk_lines: Vec<DiffLine> = Vec::new();
let mut hunk_a_start = u32::MAX;
let mut hunk_b_start = u32::MAX;
let mut hunk_a_count: u32 = 0;
let mut hunk_b_count: u32 = 0;
for op in group {
for change in diff.iter_changes(&op) {
let content = strip_trailing_newline(change.value());
let a_line = change.old_index().map(|i| (i as u32) + 1);
let b_line = change.new_index().map(|i| (i as u32) + 1);
if let Some(n) = a_line {
if hunk_a_start == u32::MAX {
hunk_a_start = n;
}
}
if let Some(n) = b_line {
if hunk_b_start == u32::MAX {
hunk_b_start = n;
}
}
let kind = match change.tag() {
ChangeTag::Delete => {
hunk_a_count += 1;
out.deletions += 1;
DiffLineKind::Delete
}
ChangeTag::Insert => {
hunk_b_count += 1;
out.additions += 1;
DiffLineKind::Insert
}
ChangeTag::Equal => {
hunk_a_count += 1;
hunk_b_count += 1;
DiffLineKind::Equal
}
};
hunk_lines.push(DiffLine {
kind,
a_line,
b_line,
content,
});
}
}
let a_start = if hunk_a_start == u32::MAX {
0
} else {
hunk_a_start
};
let b_start = if hunk_b_start == u32::MAX {
0
} else {
hunk_b_start
};
out.hunks.push(DiffHunk {
a_start,
a_count: hunk_a_count,
b_start,
b_count: hunk_b_count,
lines: hunk_lines,
});
}
if out.hunks.is_empty() && a_text.as_deref() == b_text.as_deref() {
out.status = FileDiffStatus::Unchanged;
}
out
}
pub fn file_diff(
a: Option<&str>,
b: Option<&str>,
a_path: Option<&str>,
b_path: Option<&str>,
context_lines: usize,
) -> FileDiff {
diff_blobs(
a.map(str::as_bytes),
b.map(str::as_bytes),
a_path,
b_path,
context_lines,
)
}
fn strip_trailing_newline(s: &str) -> String {
s.strip_suffix("\r\n")
.or_else(|| s.strip_suffix('\n'))
.unwrap_or(s)
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unchanged_files_produce_no_hunks() {
let d = file_diff(
Some("a\nb\nc\n"),
Some("a\nb\nc\n"),
Some("x.txt"),
Some("x.txt"),
3,
);
assert_eq!(d.status, FileDiffStatus::Unchanged);
assert!(d.hunks.is_empty());
assert_eq!(d.additions, 0);
assert_eq!(d.deletions, 0);
}
#[test]
fn added_file_yields_only_inserts() {
let d = file_diff(None, Some("hello\nworld\n"), None, Some("greet.txt"), 3);
assert_eq!(d.status, FileDiffStatus::Added);
assert_eq!(d.additions, 2);
assert_eq!(d.deletions, 0);
let lines: Vec<_> = d
.hunks
.iter()
.flat_map(|h| h.lines.iter().map(|l| l.kind))
.collect();
assert!(lines.iter().all(|k| matches!(k, DiffLineKind::Insert)));
}
#[test]
fn deleted_file_yields_only_deletes() {
let d = file_diff(Some("a\nb\n"), None, Some("gone.txt"), None, 3);
assert_eq!(d.status, FileDiffStatus::Deleted);
assert_eq!(d.additions, 0);
assert_eq!(d.deletions, 2);
}
#[test]
fn modified_file_groups_hunks_with_context() {
let a = "alpha\nbeta\ngamma\ndelta\nepsilon\n";
let b = "alpha\nBETA\ngamma\ndelta\nepsilon\n";
let d = file_diff(Some(a), Some(b), Some("greek.txt"), Some("greek.txt"), 1);
assert_eq!(d.status, FileDiffStatus::Modified);
assert_eq!(d.additions, 1);
assert_eq!(d.deletions, 1);
assert_eq!(d.hunks.len(), 1);
let kinds: Vec<_> = d.hunks[0].lines.iter().map(|l| l.kind).collect();
assert_eq!(
kinds,
vec![
DiffLineKind::Equal,
DiffLineKind::Delete,
DiffLineKind::Insert,
DiffLineKind::Equal,
]
);
}
#[test]
fn binary_blobs_surface_as_binary() {
let a = b"hello\n".to_vec();
let mut b = b"hello".to_vec();
b.push(0); let d = diff_blobs(Some(&a), Some(&b), Some("x"), Some("x"), 3);
assert!(matches!(
d.status,
FileDiffStatus::Binary {
reason: BinaryReason::NullByte
}
));
}
#[test]
fn round_trips_through_json() {
let d = file_diff(Some("a\n"), Some("b\n"), Some("p"), Some("p"), 3);
let s = serde_json::to_string(&d).unwrap();
let back: FileDiff = serde_json::from_str(&s).unwrap();
assert_eq!(d, back);
}
}