#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum DiffOp {
Equal,
Insert,
Delete,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct DiffChange {
pub op: DiffOp,
pub text: String,
pub old_line: Option<usize>,
pub new_line: Option<usize>,
}
impl DiffChange {
pub fn new(op: DiffOp, text: impl Into<String>) -> Self {
Self {
op,
text: text.into(),
old_line: None,
new_line: None,
}
}
pub fn equal(text: impl Into<String>) -> Self {
Self::new(DiffOp::Equal, text)
}
pub fn insert(text: impl Into<String>) -> Self {
Self::new(DiffOp::Insert, text)
}
pub fn delete(text: impl Into<String>) -> Self {
Self::new(DiffOp::Delete, text)
}
pub fn with_lines(mut self, old: Option<usize>, new: Option<usize>) -> Self {
self.old_line = old;
self.new_line = new;
self
}
pub fn is_equal(&self) -> bool {
self.op == DiffOp::Equal
}
pub fn is_insert(&self) -> bool {
self.op == DiffOp::Insert
}
pub fn is_delete(&self) -> bool {
self.op == DiffOp::Delete
}
}
pub fn diff_lines(old: &str, new: &str) -> Vec<DiffChange> {
let old_lines: Vec<&str> = old.lines().collect();
let new_lines: Vec<&str> = new.lines().collect();
diff_sequences(&old_lines, &new_lines)
}
pub fn diff_chars(old: &str, new: &str) -> Vec<DiffChange> {
diff_chars_simple(old, new)
}
fn diff_chars_simple(old: &str, new: &str) -> Vec<DiffChange> {
let old_chars: Vec<char> = old.chars().collect();
let new_chars: Vec<char> = new.chars().collect();
let lcs = longest_common_subsequence(&old_chars, &new_chars);
let mut changes = Vec::new();
let mut old_idx = 0;
let mut new_idx = 0;
let mut lcs_idx = 0;
while old_idx < old_chars.len() || new_idx < new_chars.len() {
if lcs_idx < lcs.len() {
let (lcs_old, lcs_new) = lcs[lcs_idx];
while old_idx < lcs_old {
changes.push(DiffChange::delete(old_chars[old_idx].to_string()));
old_idx += 1;
}
while new_idx < lcs_new {
changes.push(DiffChange::insert(new_chars[new_idx].to_string()));
new_idx += 1;
}
changes.push(DiffChange::equal(old_chars[old_idx].to_string()));
old_idx += 1;
new_idx += 1;
lcs_idx += 1;
} else {
while old_idx < old_chars.len() {
changes.push(DiffChange::delete(old_chars[old_idx].to_string()));
old_idx += 1;
}
while new_idx < new_chars.len() {
changes.push(DiffChange::insert(new_chars[new_idx].to_string()));
new_idx += 1;
}
}
}
merge_changes(changes)
}
const MAX_DIFF_SIZE: usize = 10_000;
const MAX_DIFF_MEMORY_MB: usize = 50;
fn longest_common_subsequence<T: PartialEq>(a: &[T], b: &[T]) -> Vec<(usize, usize)> {
let m = a.len();
let n = b.len();
if m == 0 || n == 0 {
return vec![];
}
if m > MAX_DIFF_SIZE || n > MAX_DIFF_SIZE {
return simplified_diff(a, b);
}
let estimated_bytes = (m + 1) * (n + 1) * std::mem::size_of::<usize>();
let max_bytes = MAX_DIFF_MEMORY_MB * 1024 * 1024;
if estimated_bytes > max_bytes {
return simplified_diff(a, b);
}
let mut dp = vec![vec![0usize; n + 1]; m + 1];
for i in 1..=m {
for j in 1..=n {
if a[i - 1] == b[j - 1] {
dp[i][j] = dp[i - 1][j - 1] + 1;
} else {
dp[i][j] = dp[i - 1][j].max(dp[i][j - 1]);
}
}
}
let mut result = Vec::new();
let mut i = m;
let mut j = n;
while i > 0 && j > 0 {
if a[i - 1] == b[j - 1] {
result.push((i - 1, j - 1));
i -= 1;
j -= 1;
} else if dp[i - 1][j] > dp[i][j - 1] {
i -= 1;
} else {
j -= 1;
}
}
result.reverse();
result
}
fn simplified_diff<T: PartialEq>(a: &[T], b: &[T]) -> Vec<(usize, usize)> {
let mut result = Vec::new();
for (i, a_val) in a.iter().enumerate() {
for (j, b_val) in b.iter().enumerate() {
if a_val == b_val {
result.push((i, j));
break;
}
}
}
result
}
fn diff_sequences(old: &[&str], new: &[&str]) -> Vec<DiffChange> {
let lcs = longest_common_subsequence(old, new);
let mut changes = Vec::new();
let mut old_idx = 0;
let mut new_idx = 0;
let mut old_line = 1;
let mut new_line = 1;
for (lcs_old, lcs_new) in lcs {
while old_idx < lcs_old {
changes.push(DiffChange::delete(old[old_idx]).with_lines(Some(old_line), None));
old_idx += 1;
old_line += 1;
}
while new_idx < lcs_new {
changes.push(DiffChange::insert(new[new_idx]).with_lines(None, Some(new_line)));
new_idx += 1;
new_line += 1;
}
changes.push(DiffChange::equal(old[old_idx]).with_lines(Some(old_line), Some(new_line)));
old_idx += 1;
new_idx += 1;
old_line += 1;
new_line += 1;
}
while old_idx < old.len() {
changes.push(DiffChange::delete(old[old_idx]).with_lines(Some(old_line), None));
old_idx += 1;
old_line += 1;
}
while new_idx < new.len() {
changes.push(DiffChange::insert(new[new_idx]).with_lines(None, Some(new_line)));
new_idx += 1;
new_line += 1;
}
changes
}
fn merge_changes(changes: Vec<DiffChange>) -> Vec<DiffChange> {
if changes.is_empty() {
return changes;
}
let mut merged = Vec::new();
let mut current = changes[0].clone();
for change in changes.into_iter().skip(1) {
if change.op == current.op {
current.text.push_str(&change.text);
} else {
merged.push(current);
current = change;
}
}
merged.push(current);
merged
}
pub fn diff_words(old: &str, new: &str) -> Vec<DiffChange> {
let old_words: Vec<&str> = old.split_whitespace().collect();
let new_words: Vec<&str> = new.split_whitespace().collect();
diff_sequences(&old_words, &new_words)
}
pub fn format_unified_diff(old: &str, new: &str, old_name: &str, new_name: &str) -> String {
let changes = diff_lines(old, new);
let mut output = String::new();
output.push_str(&format!("--- {}\n", old_name));
output.push_str(&format!("+++ {}\n", new_name));
let mut i = 0;
while i < changes.len() {
if changes[i].is_equal() {
i += 1;
continue;
}
let hunk_start = i.saturating_sub(3); let mut hunk_end = i;
while hunk_end < changes.len() {
if changes[hunk_end].is_equal() {
let context_end = (hunk_end + 3).min(changes.len());
let has_more_changes = changes[hunk_end..context_end].iter().any(|c| !c.is_equal());
if !has_more_changes {
hunk_end = context_end.min(changes.len());
break;
}
}
hunk_end += 1;
}
let old_start = changes[hunk_start].old_line.unwrap_or(1);
let new_start = changes[hunk_start].new_line.unwrap_or(1);
let old_count = changes[hunk_start..hunk_end]
.iter()
.filter(|c| !c.is_insert())
.count();
let new_count = changes[hunk_start..hunk_end]
.iter()
.filter(|c| !c.is_delete())
.count();
output.push_str(&format!(
"@@ -{},{} +{},{} @@\n",
old_start, old_count, new_start, new_count
));
for change in &changes[hunk_start..hunk_end] {
let prefix = match change.op {
DiffOp::Equal => ' ',
DiffOp::Insert => '+',
DiffOp::Delete => '-',
};
output.push(prefix);
output.push_str(&change.text);
output.push('\n');
}
i = hunk_end;
}
output
}
#[derive(Clone, Debug, Default)]
pub struct DiffStats {
pub equal: usize,
pub insertions: usize,
pub deletions: usize,
}
impl DiffStats {
pub fn from_changes(changes: &[DiffChange]) -> Self {
let mut stats = Self::default();
for change in changes {
match change.op {
DiffOp::Equal => stats.equal += 1,
DiffOp::Insert => stats.insertions += 1,
DiffOp::Delete => stats.deletions += 1,
}
}
stats
}
pub fn total_changes(&self) -> usize {
self.insertions + self.deletions
}
pub fn similarity(&self) -> f64 {
let total = self.equal + self.insertions + self.deletions;
if total == 0 {
1.0
} else {
self.equal as f64 / total as f64
}
}
}