use serde::{Deserialize, Serialize};
use crate::error::DiffError;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum DiffOp {
Equal(String),
Insert(String),
Delete(String),
}
impl DiffOp {
pub fn kind(&self) -> &'static str {
match self {
DiffOp::Equal(_) => "=",
DiffOp::Insert(_) => "+",
DiffOp::Delete(_) => "-",
}
}
pub fn text(&self) -> &str {
match self {
DiffOp::Equal(s) | DiffOp::Insert(s) | DiffOp::Delete(s) => s,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TextDiff {
pub ops: Vec<DiffOp>,
pub similarity: f64,
}
impl TextDiff {
pub fn compute(old: &str, new: &str) -> Self {
if old == new {
return Self { ops: vec![DiffOp::Equal(old.to_string())], similarity: 1.0 };
}
let old_lines: Vec<&str> = old.lines().collect();
let new_lines: Vec<&str> = new.lines().collect();
let ops = line_diff(&old_lines, &new_lines);
let similarity = compute_similarity(old, new);
Self { ops, similarity }
}
pub fn insertions(&self) -> usize {
self.ops.iter().filter(|op| matches!(op, DiffOp::Insert(_))).count()
}
pub fn deletions(&self) -> usize {
self.ops.iter().filter(|op| matches!(op, DiffOp::Delete(_))).count()
}
pub fn is_identical(&self) -> bool {
(self.similarity - 1.0).abs() < f64::EPSILON
}
}
fn line_diff(old: &[&str], new: &[&str]) -> Vec<DiffOp> {
let m = old.len();
let n = new.len();
let mut dp = vec![vec![0usize; n + 1]; m + 1];
for i in 1..=m {
for j in 1..=n {
dp[i][j] = if old[i - 1] == new[j - 1] {
dp[i - 1][j - 1] + 1
} else {
dp[i - 1][j].max(dp[i][j - 1])
};
}
}
let mut ops = Vec::new();
let (mut i, mut j) = (m, n);
while i > 0 || j > 0 {
if i > 0 && j > 0 && old[i - 1] == new[j - 1] {
ops.push(DiffOp::Equal(old[i - 1].to_string()));
i -= 1;
j -= 1;
} else if j > 0 && (i == 0 || dp[i][j - 1] >= dp[i - 1][j]) {
ops.push(DiffOp::Insert(new[j - 1].to_string()));
j -= 1;
} else {
ops.push(DiffOp::Delete(old[i - 1].to_string()));
i -= 1;
}
}
ops.reverse();
ops
}
fn compute_similarity(a: &str, b: &str) -> f64 {
use std::collections::HashSet;
let words_a: HashSet<&str> = a.split_whitespace().collect();
let words_b: HashSet<&str> = b.split_whitespace().collect();
let intersection = words_a.intersection(&words_b).count();
let union = words_a.union(&words_b).count();
if union == 0 { 1.0 } else { intersection as f64 / union as f64 }
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum JsonDiffOp {
ValueChanged { path: String, old: serde_json::Value, new: serde_json::Value },
KeyAdded { path: String, value: serde_json::Value },
KeyRemoved { path: String, value: serde_json::Value },
Equal,
}
pub fn json_diff(old_json: &str, new_json: &str) -> Result<Vec<JsonDiffOp>, DiffError> {
let old: serde_json::Value = serde_json::from_str(old_json)?;
let new: serde_json::Value = serde_json::from_str(new_json)?;
let mut ops = Vec::new();
diff_values("$", &old, &new, &mut ops);
if ops.is_empty() {
ops.push(JsonDiffOp::Equal);
}
Ok(ops)
}
fn diff_values(path: &str, old: &serde_json::Value, new: &serde_json::Value, ops: &mut Vec<JsonDiffOp>) {
match (old, new) {
(serde_json::Value::Object(o), serde_json::Value::Object(n)) => {
for (k, ov) in o {
let child_path = format!("{path}.{k}");
if let Some(nv) = n.get(k) {
diff_values(&child_path, ov, nv, ops);
} else {
ops.push(JsonDiffOp::KeyRemoved { path: child_path, value: ov.clone() });
}
}
for (k, nv) in n {
if !o.contains_key(k) {
ops.push(JsonDiffOp::KeyAdded { path: format!("{path}.{k}"), value: nv.clone() });
}
}
}
(o, n) if o == n => {}
(o, n) => ops.push(JsonDiffOp::ValueChanged {
path: path.to_string(),
old: o.clone(),
new: n.clone(),
}),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_text_diff_identical_strings_similarity_one() {
let d = TextDiff::compute("hello", "hello");
assert!(d.is_identical());
assert_eq!(d.similarity, 1.0);
}
#[test]
fn test_text_diff_completely_different_similarity_less_than_one() {
let d = TextDiff::compute("aaa bbb ccc", "xxx yyy zzz");
assert!(d.similarity < 1.0);
}
#[test]
fn test_text_diff_insertions_counted() {
let d = TextDiff::compute("line1", "line1\nline2");
assert!(d.insertions() > 0);
}
#[test]
fn test_text_diff_deletions_counted() {
let d = TextDiff::compute("line1\nline2", "line1");
assert!(d.deletions() > 0);
}
#[test]
fn test_text_diff_similarity_in_range() {
let d = TextDiff::compute("the quick brown fox", "the slow blue dog");
assert!(d.similarity >= 0.0 && d.similarity <= 1.0);
}
#[test]
fn test_diff_op_kind_labels() {
assert_eq!(DiffOp::Equal("x".into()).kind(), "=");
assert_eq!(DiffOp::Insert("x".into()).kind(), "+");
assert_eq!(DiffOp::Delete("x".into()).kind(), "-");
}
#[test]
fn test_diff_op_text_returns_content() {
assert_eq!(DiffOp::Insert("hello".into()).text(), "hello");
}
#[test]
fn test_json_diff_equal_returns_equal_op() {
let ops = json_diff(r#"{"a":1}"#, r#"{"a":1}"#).unwrap();
assert!(matches!(ops[0], JsonDiffOp::Equal));
}
#[test]
fn test_json_diff_value_changed_detected() {
let ops = json_diff(r#"{"a":1}"#, r#"{"a":2}"#).unwrap();
assert!(ops.iter().any(|op| matches!(op, JsonDiffOp::ValueChanged { .. })));
}
#[test]
fn test_json_diff_key_added_detected() {
let ops = json_diff(r#"{"a":1}"#, r#"{"a":1,"b":2}"#).unwrap();
assert!(ops.iter().any(|op| matches!(op, JsonDiffOp::KeyAdded { .. })));
}
#[test]
fn test_json_diff_key_removed_detected() {
let ops = json_diff(r#"{"a":1,"b":2}"#, r#"{"a":1}"#).unwrap();
assert!(ops.iter().any(|op| matches!(op, JsonDiffOp::KeyRemoved { .. })));
}
#[test]
fn test_json_diff_invalid_json_returns_serialization_error() {
let err = json_diff("not json", "{}").unwrap_err();
assert!(matches!(err, DiffError::Serialization(_)));
}
#[test]
fn test_text_diff_empty_strings_identical() {
let d = TextDiff::compute("", "");
assert!(d.is_identical());
}
}