Skip to main content

llm_diff/
diff.rs

1// SPDX-License-Identifier: MIT
2//! Diff primitives: line-level text diff and structural JSON diff.
3
4use serde::{Deserialize, Serialize};
5use crate::error::DiffError;
6
7/// A single edit operation in a text diff.
8#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
9pub enum DiffOp {
10    /// Content present in both old and new.
11    Equal(String),
12    /// Content only in the new text.
13    Insert(String),
14    /// Content only in the old text.
15    Delete(String),
16}
17
18impl DiffOp {
19    /// Returns a single-character label: `=`, `+`, or `-`.
20    pub fn kind(&self) -> &'static str {
21        match self {
22            DiffOp::Equal(_) => "=",
23            DiffOp::Insert(_) => "+",
24            DiffOp::Delete(_) => "-",
25        }
26    }
27
28    /// Returns the text content of the operation.
29    pub fn text(&self) -> &str {
30        match self {
31            DiffOp::Equal(s) | DiffOp::Insert(s) | DiffOp::Delete(s) => s,
32        }
33    }
34}
35
36/// The result of comparing two text outputs.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct TextDiff {
39    /// Sequence of edit operations transforming old into new.
40    pub ops: Vec<DiffOp>,
41    /// Jaccard similarity on word bags: 0.0 = no overlap, 1.0 = identical.
42    pub similarity: f64,
43}
44
45impl TextDiff {
46    /// Computes a line-level diff between `old` and `new`.
47    pub fn compute(old: &str, new: &str) -> Self {
48        if old == new {
49            return Self { ops: vec![DiffOp::Equal(old.to_string())], similarity: 1.0 };
50        }
51        let old_lines: Vec<&str> = old.lines().collect();
52        let new_lines: Vec<&str> = new.lines().collect();
53        let ops = line_diff(&old_lines, &new_lines);
54        let similarity = compute_similarity(old, new);
55        Self { ops, similarity }
56    }
57
58    /// Returns the number of inserted lines.
59    pub fn insertions(&self) -> usize {
60        self.ops.iter().filter(|op| matches!(op, DiffOp::Insert(_))).count()
61    }
62
63    /// Returns the number of deleted lines.
64    pub fn deletions(&self) -> usize {
65        self.ops.iter().filter(|op| matches!(op, DiffOp::Delete(_))).count()
66    }
67
68    /// Returns `true` if the two texts were identical.
69    pub fn is_identical(&self) -> bool {
70        (self.similarity - 1.0).abs() < f64::EPSILON
71    }
72}
73
74/// Computes a line-level diff using LCS backtracking.
75fn line_diff(old: &[&str], new: &[&str]) -> Vec<DiffOp> {
76    let m = old.len();
77    let n = new.len();
78    let mut dp = vec![vec![0usize; n + 1]; m + 1];
79    for i in 1..=m {
80        for j in 1..=n {
81            dp[i][j] = if old[i - 1] == new[j - 1] {
82                dp[i - 1][j - 1] + 1
83            } else {
84                dp[i - 1][j].max(dp[i][j - 1])
85            };
86        }
87    }
88    let mut ops = Vec::new();
89    let (mut i, mut j) = (m, n);
90    while i > 0 || j > 0 {
91        if i > 0 && j > 0 && old[i - 1] == new[j - 1] {
92            ops.push(DiffOp::Equal(old[i - 1].to_string()));
93            i -= 1;
94            j -= 1;
95        } else if j > 0 && (i == 0 || dp[i][j - 1] >= dp[i - 1][j]) {
96            ops.push(DiffOp::Insert(new[j - 1].to_string()));
97            j -= 1;
98        } else {
99            ops.push(DiffOp::Delete(old[i - 1].to_string()));
100            i -= 1;
101        }
102    }
103    ops.reverse();
104    ops
105}
106
107/// Computes Jaccard similarity on word bags.
108fn compute_similarity(a: &str, b: &str) -> f64 {
109    use std::collections::HashSet;
110    let words_a: HashSet<&str> = a.split_whitespace().collect();
111    let words_b: HashSet<&str> = b.split_whitespace().collect();
112    let intersection = words_a.intersection(&words_b).count();
113    let union = words_a.union(&words_b).count();
114    if union == 0 { 1.0 } else { intersection as f64 / union as f64 }
115}
116
117/// An operation in a structural JSON diff.
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub enum JsonDiffOp {
120    /// A scalar value changed at the given JSON path.
121    ValueChanged { path: String, old: serde_json::Value, new: serde_json::Value },
122    /// A key was added to an object.
123    KeyAdded { path: String, value: serde_json::Value },
124    /// A key was removed from an object.
125    KeyRemoved { path: String, value: serde_json::Value },
126    /// No differences found.
127    Equal,
128}
129
130/// Computes a structural diff between two JSON strings.
131///
132/// # Errors
133/// Returns [`DiffError::Serialization`] if either string is not valid JSON.
134pub fn json_diff(old_json: &str, new_json: &str) -> Result<Vec<JsonDiffOp>, DiffError> {
135    let old: serde_json::Value = serde_json::from_str(old_json)?;
136    let new: serde_json::Value = serde_json::from_str(new_json)?;
137    let mut ops = Vec::new();
138    diff_values("$", &old, &new, &mut ops);
139    if ops.is_empty() {
140        ops.push(JsonDiffOp::Equal);
141    }
142    Ok(ops)
143}
144
145fn diff_values(path: &str, old: &serde_json::Value, new: &serde_json::Value, ops: &mut Vec<JsonDiffOp>) {
146    match (old, new) {
147        (serde_json::Value::Object(o), serde_json::Value::Object(n)) => {
148            for (k, ov) in o {
149                let child_path = format!("{path}.{k}");
150                if let Some(nv) = n.get(k) {
151                    diff_values(&child_path, ov, nv, ops);
152                } else {
153                    ops.push(JsonDiffOp::KeyRemoved { path: child_path, value: ov.clone() });
154                }
155            }
156            for (k, nv) in n {
157                if !o.contains_key(k) {
158                    ops.push(JsonDiffOp::KeyAdded { path: format!("{path}.{k}"), value: nv.clone() });
159                }
160            }
161        }
162        (o, n) if o == n => {}
163        (o, n) => ops.push(JsonDiffOp::ValueChanged {
164            path: path.to_string(),
165            old: o.clone(),
166            new: n.clone(),
167        }),
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174
175    #[test]
176    fn test_text_diff_identical_strings_similarity_one() {
177        let d = TextDiff::compute("hello", "hello");
178        assert!(d.is_identical());
179        assert_eq!(d.similarity, 1.0);
180    }
181
182    #[test]
183    fn test_text_diff_completely_different_similarity_less_than_one() {
184        let d = TextDiff::compute("aaa bbb ccc", "xxx yyy zzz");
185        assert!(d.similarity < 1.0);
186    }
187
188    #[test]
189    fn test_text_diff_insertions_counted() {
190        let d = TextDiff::compute("line1", "line1\nline2");
191        assert!(d.insertions() > 0);
192    }
193
194    #[test]
195    fn test_text_diff_deletions_counted() {
196        let d = TextDiff::compute("line1\nline2", "line1");
197        assert!(d.deletions() > 0);
198    }
199
200    #[test]
201    fn test_text_diff_similarity_in_range() {
202        let d = TextDiff::compute("the quick brown fox", "the slow blue dog");
203        assert!(d.similarity >= 0.0 && d.similarity <= 1.0);
204    }
205
206    #[test]
207    fn test_diff_op_kind_labels() {
208        assert_eq!(DiffOp::Equal("x".into()).kind(), "=");
209        assert_eq!(DiffOp::Insert("x".into()).kind(), "+");
210        assert_eq!(DiffOp::Delete("x".into()).kind(), "-");
211    }
212
213    #[test]
214    fn test_diff_op_text_returns_content() {
215        assert_eq!(DiffOp::Insert("hello".into()).text(), "hello");
216    }
217
218    #[test]
219    fn test_json_diff_equal_returns_equal_op() {
220        let ops = json_diff(r#"{"a":1}"#, r#"{"a":1}"#).unwrap();
221        assert!(matches!(ops[0], JsonDiffOp::Equal));
222    }
223
224    #[test]
225    fn test_json_diff_value_changed_detected() {
226        let ops = json_diff(r#"{"a":1}"#, r#"{"a":2}"#).unwrap();
227        assert!(ops.iter().any(|op| matches!(op, JsonDiffOp::ValueChanged { .. })));
228    }
229
230    #[test]
231    fn test_json_diff_key_added_detected() {
232        let ops = json_diff(r#"{"a":1}"#, r#"{"a":1,"b":2}"#).unwrap();
233        assert!(ops.iter().any(|op| matches!(op, JsonDiffOp::KeyAdded { .. })));
234    }
235
236    #[test]
237    fn test_json_diff_key_removed_detected() {
238        let ops = json_diff(r#"{"a":1,"b":2}"#, r#"{"a":1}"#).unwrap();
239        assert!(ops.iter().any(|op| matches!(op, JsonDiffOp::KeyRemoved { .. })));
240    }
241
242    #[test]
243    fn test_json_diff_invalid_json_returns_serialization_error() {
244        let err = json_diff("not json", "{}").unwrap_err();
245        assert!(matches!(err, DiffError::Serialization(_)));
246    }
247
248    #[test]
249    fn test_text_diff_empty_strings_identical() {
250        let d = TextDiff::compute("", "");
251        assert!(d.is_identical());
252    }
253}