use crate::error::Result;
use crate::types::{DiffHunk, FileDiff, LineChange, DiffOptions};
use std::path::Path;
pub fn compute_line_diff(
old_content: &[u8],
new_content: &[u8],
options: &DiffOptions,
) -> Result<Vec<DiffHunk>> {
let old_text = String::from_utf8_lossy(old_content);
let new_text = String::from_utf8_lossy(new_content);
let old_lines: Vec<&str> = old_text.lines().collect();
let new_lines: Vec<&str> = new_text.lines().collect();
if old_lines.is_empty() && new_lines.is_empty() {
return Ok(vec![]);
}
let changes = compute_changes(&old_lines, &new_lines, options);
let hunks = create_hunks(changes, &old_lines, &new_lines, options.context_lines);
Ok(hunks)
}
#[derive(Debug, Clone)]
enum ChangeOp {
Keep(usize, usize), Delete(usize), Insert(usize), }
fn compute_changes(
old_lines: &[&str],
new_lines: &[&str],
options: &DiffOptions,
) -> Vec<ChangeOp> {
if old_lines.is_empty() {
return new_lines.iter().enumerate()
.map(|(i, _)| ChangeOp::Insert(i))
.collect();
}
if new_lines.is_empty() {
return old_lines.iter().enumerate()
.map(|(i, _)| ChangeOp::Delete(i))
.collect();
}
let lcs = compute_lcs(old_lines, new_lines, options.ignore_whitespace);
lcs_to_changes(&lcs, old_lines.len(), new_lines.len())
}
fn compute_lcs(
old_lines: &[&str],
new_lines: &[&str],
ignore_whitespace: bool,
) -> Vec<(usize, usize)> {
let m = old_lines.len();
let n = new_lines.len();
let mut dp = vec![vec![0; n + 1]; m + 1];
for i in 1..=m {
for j in 1..=n {
if lines_equal(old_lines[i-1], new_lines[j-1], ignore_whitespace) {
dp[i][j] = dp[i-1][j-1] + 1;
} else {
dp[i][j] = dp[i-1][j].max(dp[i][j-1]);
}
}
}
let mut lcs = Vec::new();
let mut i = m;
let mut j = n;
while i > 0 && j > 0 {
if lines_equal(old_lines[i-1], new_lines[j-1], ignore_whitespace) {
lcs.push((i-1, j-1));
i -= 1;
j -= 1;
} else if dp[i-1][j] > dp[i][j-1] {
i -= 1;
} else {
j -= 1;
}
}
lcs.reverse();
lcs
}
fn lines_equal(a: &str, b: &str, ignore_whitespace: bool) -> bool {
if ignore_whitespace {
a.trim() == b.trim()
} else {
a == b
}
}
fn lcs_to_changes(lcs: &[(usize, usize)], old_len: usize, new_len: usize) -> Vec<ChangeOp> {
let mut changes = Vec::new();
let mut old_idx = 0;
let mut new_idx = 0;
let mut lcs_idx = 0;
while old_idx < old_len || new_idx < new_len {
if lcs_idx < lcs.len() {
let (lcs_old, lcs_new) = lcs[lcs_idx];
while old_idx < lcs_old {
changes.push(ChangeOp::Delete(old_idx));
old_idx += 1;
}
while new_idx < lcs_new {
changes.push(ChangeOp::Insert(new_idx));
new_idx += 1;
}
if old_idx == lcs_old && new_idx == lcs_new {
changes.push(ChangeOp::Keep(old_idx, new_idx));
old_idx += 1;
new_idx += 1;
lcs_idx += 1;
}
} else {
while old_idx < old_len {
changes.push(ChangeOp::Delete(old_idx));
old_idx += 1;
}
while new_idx < new_len {
changes.push(ChangeOp::Insert(new_idx));
new_idx += 1;
}
}
}
changes
}
fn create_hunks(
changes: Vec<ChangeOp>,
old_lines: &[&str],
new_lines: &[&str],
context_lines: usize,
) -> Vec<DiffHunk> {
if changes.is_empty() {
return vec![];
}
let mut hunks = Vec::new();
let mut current_hunk: Option<HunkBuilder> = None;
for (i, change) in changes.iter().enumerate() {
match change {
ChangeOp::Keep(old_idx, new_idx) => {
let near_change = is_near_change(&changes, i, context_lines);
if near_change {
if let Some(ref mut hunk) = current_hunk {
hunk.add_context(*old_idx, *new_idx, old_lines[*old_idx]);
} else {
let mut hunk = HunkBuilder::new();
let start = i.saturating_sub(context_lines);
for j in start..i {
if let ChangeOp::Keep(o, n) = &changes[j] {
hunk.add_context(*o, *n, old_lines[*o]);
}
}
hunk.add_context(*old_idx, *new_idx, old_lines[*old_idx]);
current_hunk = Some(hunk);
}
} else if let Some(hunk) = current_hunk.take() {
if let Some(built) = hunk.build() {
hunks.push(built);
}
}
}
ChangeOp::Delete(old_idx) => {
if current_hunk.is_none() {
let mut hunk = HunkBuilder::new();
let start = i.saturating_sub(context_lines);
for j in start..i {
if let ChangeOp::Keep(o, n) = &changes[j] {
hunk.add_context(*o, *n, old_lines[*o]);
}
}
current_hunk = Some(hunk);
}
if let Some(ref mut hunk) = current_hunk {
hunk.add_deletion(*old_idx, old_lines[*old_idx]);
}
}
ChangeOp::Insert(new_idx) => {
if current_hunk.is_none() {
let mut hunk = HunkBuilder::new();
let start = i.saturating_sub(context_lines);
for j in start..i {
if let ChangeOp::Keep(o, n) = &changes[j] {
hunk.add_context(*o, *n, old_lines[*o]);
}
}
current_hunk = Some(hunk);
}
if let Some(ref mut hunk) = current_hunk {
hunk.add_insertion(*new_idx, new_lines[*new_idx]);
}
}
}
}
if let Some(hunk) = current_hunk {
if let Some(built) = hunk.build() {
hunks.push(built);
}
}
hunks
}
fn is_near_change(changes: &[ChangeOp], pos: usize, context_lines: usize) -> bool {
let start = pos.saturating_sub(context_lines);
let end = (pos + context_lines + 1).min(changes.len());
for i in start..end {
if i == pos {
continue;
}
match &changes[i] {
ChangeOp::Delete(_) | ChangeOp::Insert(_) => return true,
ChangeOp::Keep(_, _) => continue,
}
}
false
}
struct HunkBuilder {
from_start: Option<usize>,
to_start: Option<usize>,
from_count: usize,
to_count: usize,
changes: Vec<LineChange>,
}
impl HunkBuilder {
fn new() -> Self {
Self {
from_start: None,
to_start: None,
from_count: 0,
to_count: 0,
changes: Vec::new(),
}
}
fn add_context(&mut self, old_idx: usize, new_idx: usize, content: &str) {
if self.from_start.is_none() {
self.from_start = Some(old_idx + 1); self.to_start = Some(new_idx + 1);
}
self.from_count += 1;
self.to_count += 1;
self.changes.push(LineChange::Context(old_idx + 1, content.to_string()));
}
fn add_deletion(&mut self, old_idx: usize, content: &str) {
if self.from_start.is_none() {
self.from_start = Some(old_idx + 1);
self.to_start = Some(1); }
self.from_count += 1;
self.changes.push(LineChange::Deleted(old_idx + 1, content.to_string()));
}
fn add_insertion(&mut self, new_idx: usize, content: &str) {
if self.to_start.is_none() {
self.from_start = Some(1); self.to_start = Some(new_idx + 1);
}
self.to_count += 1;
self.changes.push(LineChange::Added(new_idx + 1, content.to_string()));
}
fn build(self) -> Option<DiffHunk> {
if self.changes.is_empty() {
return None;
}
Some(DiffHunk {
from_line: self.from_start.unwrap_or(1),
from_count: self.from_count,
to_line: self.to_start.unwrap_or(1),
to_count: self.to_count,
changes: self.changes,
})
}
}
pub fn is_binary_content(content: &[u8]) -> bool {
let check_len = content.len().min(8192);
content[..check_len].contains(&0)
}
pub fn create_file_diff(
path: &Path,
from_hash: &str,
to_hash: &str,
old_content: &[u8],
new_content: &[u8],
options: &DiffOptions,
) -> Result<FileDiff> {
let is_binary = is_binary_content(old_content) || is_binary_content(new_content);
let (hunks, lines_added, lines_deleted) = if is_binary {
(vec![], 0, 0)
} else {
let hunks = compute_line_diff(old_content, new_content, options)?;
let mut added = 0;
let mut deleted = 0;
for hunk in &hunks {
for change in &hunk.changes {
match change {
LineChange::Added(_, _) => added += 1,
LineChange::Deleted(_, _) => deleted += 1,
LineChange::Context(_, _) => {}
}
}
}
(hunks, added, deleted)
};
Ok(FileDiff {
path: path.to_path_buf(),
from_hash: from_hash.to_string(),
to_hash: to_hash.to_string(),
is_binary,
hunks,
lines_added,
lines_deleted,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_diff() {
let old = b"line1\nline2\nline3";
let new = b"line1\nline2 modified\nline3\nline4";
let options = DiffOptions::default();
let hunks = compute_line_diff(old, new, &options).unwrap();
assert_eq!(hunks.len(), 1);
let hunk = &hunks[0];
assert!(hunk.changes.iter().any(|c| matches!(c, LineChange::Deleted(_, _))));
assert!(hunk.changes.iter().any(|c| matches!(c, LineChange::Added(_, _))));
}
#[test]
fn test_empty_files() {
let options = DiffOptions::default();
let hunks = compute_line_diff(b"", b"", &options).unwrap();
assert_eq!(hunks.len(), 0);
let hunks = compute_line_diff(b"", b"new line", &options).unwrap();
assert_eq!(hunks.len(), 1);
assert!(hunks[0].changes.iter().all(|c| matches!(c, LineChange::Added(_, _))));
let hunks = compute_line_diff(b"old line", b"", &options).unwrap();
assert_eq!(hunks.len(), 1);
assert!(hunks[0].changes.iter().all(|c| matches!(c, LineChange::Deleted(_, _))));
}
#[test]
fn test_binary_detection() {
assert!(is_binary_content(b"hello\x00world"));
assert!(!is_binary_content(b"hello world"));
}
#[test]
fn test_context_lines() {
let old = b"1\n2\n3\n4\n5\n6\n7\n8\n9";
let new = b"1\n2\n3\nMODIFIED\n5\n6\n7\n8\n9";
let mut options = DiffOptions::default();
options.context_lines = 2;
let hunks = compute_line_diff(old, new, &options).unwrap();
assert_eq!(hunks.len(), 1);
let context_count = hunks[0].changes.iter()
.filter(|c| matches!(c, LineChange::Context(_, _)))
.count();
assert!(context_count >= 4); }
}