Skip to main content

hoist_diff/
semantic.rs

1//! Semantic JSON diff algorithm for Azure AI Search resources
2
3use serde::{Deserialize, Serialize};
4use serde_json::Value;
5use std::collections::{BTreeMap, HashSet};
6
7/// Result of comparing two JSON values
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct DiffResult {
10    /// Whether the values are identical
11    pub is_equal: bool,
12    /// List of changes
13    pub changes: Vec<Change>,
14}
15
16/// A single change between two JSON values
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct Change {
19    /// JSON path to the changed value
20    pub path: String,
21    /// Type of change
22    pub kind: ChangeKind,
23    /// Old value (for modifications and deletions)
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub old_value: Option<Value>,
26    /// New value (for modifications and additions)
27    #[serde(skip_serializing_if = "Option::is_none")]
28    pub new_value: Option<Value>,
29}
30
31/// Type of change
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
33#[serde(rename_all = "lowercase")]
34pub enum ChangeKind {
35    Added,
36    Removed,
37    Modified,
38}
39
40/// Compute semantic diff between two JSON values
41///
42/// This differs from a standard JSON diff by:
43/// - Using key-based matching for arrays (by identity_key, typically "name")
44/// - Producing human-readable paths
45/// - Ignoring order for objects
46pub fn diff(old: &Value, new: &Value, identity_key: &str) -> DiffResult {
47    let mut changes = Vec::new();
48    diff_values(old, new, "", identity_key, &mut changes);
49
50    DiffResult {
51        is_equal: changes.is_empty(),
52        changes,
53    }
54}
55
56fn diff_values(
57    old: &Value,
58    new: &Value,
59    path: &str,
60    identity_key: &str,
61    changes: &mut Vec<Change>,
62) {
63    match (old, new) {
64        (Value::Object(old_obj), Value::Object(new_obj)) => {
65            diff_objects(old_obj, new_obj, path, identity_key, changes);
66        }
67        (Value::Array(old_arr), Value::Array(new_arr)) => {
68            diff_arrays(old_arr, new_arr, path, identity_key, changes);
69        }
70        _ if old != new => {
71            changes.push(Change {
72                path: if path.is_empty() {
73                    ".".to_string()
74                } else {
75                    path.to_string()
76                },
77                kind: ChangeKind::Modified,
78                old_value: Some(old.clone()),
79                new_value: Some(new.clone()),
80            });
81        }
82        _ => {}
83    }
84}
85
86fn diff_objects(
87    old: &serde_json::Map<String, Value>,
88    new: &serde_json::Map<String, Value>,
89    path: &str,
90    identity_key: &str,
91    changes: &mut Vec<Change>,
92) {
93    let old_keys: HashSet<_> = old.keys().collect();
94    let new_keys: HashSet<_> = new.keys().collect();
95
96    // Removed keys
97    for key in old_keys.difference(&new_keys) {
98        let key_path = format_path(path, key);
99        changes.push(Change {
100            path: key_path,
101            kind: ChangeKind::Removed,
102            old_value: old.get(*key).cloned(),
103            new_value: None,
104        });
105    }
106
107    // Added keys
108    for key in new_keys.difference(&old_keys) {
109        let key_path = format_path(path, key);
110        changes.push(Change {
111            path: key_path,
112            kind: ChangeKind::Added,
113            old_value: None,
114            new_value: new.get(*key).cloned(),
115        });
116    }
117
118    // Modified keys
119    for key in old_keys.intersection(&new_keys) {
120        let old_val = old.get(*key).unwrap();
121        let new_val = new.get(*key).unwrap();
122        let key_path = format_path(path, key);
123        diff_values(old_val, new_val, &key_path, identity_key, changes);
124    }
125}
126
127fn diff_arrays(
128    old: &[Value],
129    new: &[Value],
130    path: &str,
131    identity_key: &str,
132    changes: &mut Vec<Change>,
133) {
134    // Try semantic matching by identity key
135    let old_has_keys = old.iter().all(|v| v.get(identity_key).is_some());
136    let new_has_keys = new.iter().all(|v| v.get(identity_key).is_some());
137
138    if old_has_keys && new_has_keys {
139        diff_arrays_by_key(old, new, path, identity_key, changes);
140    } else {
141        diff_arrays_positional(old, new, path, identity_key, changes);
142    }
143}
144
145fn diff_arrays_by_key(
146    old: &[Value],
147    new: &[Value],
148    path: &str,
149    identity_key: &str,
150    changes: &mut Vec<Change>,
151) {
152    let old_map: BTreeMap<&str, &Value> = old
153        .iter()
154        .filter_map(|v| v.get(identity_key).and_then(|k| k.as_str()).map(|k| (k, v)))
155        .collect();
156
157    let new_map: BTreeMap<&str, &Value> = new
158        .iter()
159        .filter_map(|v| v.get(identity_key).and_then(|k| k.as_str()).map(|k| (k, v)))
160        .collect();
161
162    let old_keys: HashSet<_> = old_map.keys().cloned().collect();
163    let new_keys: HashSet<_> = new_map.keys().cloned().collect();
164
165    // Removed items
166    for key in old_keys.difference(&new_keys) {
167        let item_path = format!("{}[{}]", path, key);
168        changes.push(Change {
169            path: item_path,
170            kind: ChangeKind::Removed,
171            old_value: old_map.get(key).cloned().cloned(),
172            new_value: None,
173        });
174    }
175
176    // Added items
177    for key in new_keys.difference(&old_keys) {
178        let item_path = format!("{}[{}]", path, key);
179        changes.push(Change {
180            path: item_path,
181            kind: ChangeKind::Added,
182            old_value: None,
183            new_value: new_map.get(key).cloned().cloned(),
184        });
185    }
186
187    // Modified items
188    for key in old_keys.intersection(&new_keys) {
189        let old_val = old_map.get(key).unwrap();
190        let new_val = new_map.get(key).unwrap();
191        let item_path = format!("{}[{}]", path, key);
192        diff_values(old_val, new_val, &item_path, identity_key, changes);
193    }
194}
195
196fn diff_arrays_positional(
197    old: &[Value],
198    new: &[Value],
199    path: &str,
200    identity_key: &str,
201    changes: &mut Vec<Change>,
202) {
203    let max_len = old.len().max(new.len());
204
205    for i in 0..max_len {
206        let item_path = format!("{}[{}]", path, i);
207
208        match (old.get(i), new.get(i)) {
209            (Some(old_val), Some(new_val)) => {
210                diff_values(old_val, new_val, &item_path, identity_key, changes);
211            }
212            (Some(old_val), None) => {
213                changes.push(Change {
214                    path: item_path,
215                    kind: ChangeKind::Removed,
216                    old_value: Some(old_val.clone()),
217                    new_value: None,
218                });
219            }
220            (None, Some(new_val)) => {
221                changes.push(Change {
222                    path: item_path,
223                    kind: ChangeKind::Added,
224                    old_value: None,
225                    new_value: Some(new_val.clone()),
226                });
227            }
228            (None, None) => unreachable!(),
229        }
230    }
231}
232
233fn format_path(base: &str, key: &str) -> String {
234    if base.is_empty() {
235        key.to_string()
236    } else {
237        format!("{}.{}", base, key)
238    }
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244    use serde_json::json;
245
246    #[test]
247    fn test_equal_values() {
248        let old = json!({"name": "test", "value": 42});
249        let new = json!({"name": "test", "value": 42});
250
251        let result = diff(&old, &new, "name");
252        assert!(result.is_equal);
253        assert!(result.changes.is_empty());
254    }
255
256    #[test]
257    fn test_added_field() {
258        let old = json!({"name": "test"});
259        let new = json!({"name": "test", "value": 42});
260
261        let result = diff(&old, &new, "name");
262        assert!(!result.is_equal);
263        assert_eq!(result.changes.len(), 1);
264        assert_eq!(result.changes[0].kind, ChangeKind::Added);
265        assert_eq!(result.changes[0].path, "value");
266    }
267
268    #[test]
269    fn test_removed_field() {
270        let old = json!({"name": "test", "value": 42});
271        let new = json!({"name": "test"});
272
273        let result = diff(&old, &new, "name");
274        assert!(!result.is_equal);
275        assert_eq!(result.changes.len(), 1);
276        assert_eq!(result.changes[0].kind, ChangeKind::Removed);
277        assert_eq!(result.changes[0].path, "value");
278    }
279
280    #[test]
281    fn test_modified_field() {
282        let old = json!({"name": "test", "value": 42});
283        let new = json!({"name": "test", "value": 100});
284
285        let result = diff(&old, &new, "name");
286        assert!(!result.is_equal);
287        assert_eq!(result.changes.len(), 1);
288        assert_eq!(result.changes[0].kind, ChangeKind::Modified);
289        assert_eq!(result.changes[0].path, "value");
290    }
291
292    #[test]
293    fn test_array_by_key() {
294        let old = json!({
295            "items": [
296                {"name": "a", "value": 1},
297                {"name": "b", "value": 2}
298            ]
299        });
300        let new = json!({
301            "items": [
302                {"name": "b", "value": 2},
303                {"name": "c", "value": 3}
304            ]
305        });
306
307        let result = diff(&old, &new, "name");
308        assert!(!result.is_equal);
309
310        // Should detect: removed "a", added "c"
311        let removed: Vec<_> = result
312            .changes
313            .iter()
314            .filter(|c| c.kind == ChangeKind::Removed)
315            .collect();
316        let added: Vec<_> = result
317            .changes
318            .iter()
319            .filter(|c| c.kind == ChangeKind::Added)
320            .collect();
321
322        assert_eq!(removed.len(), 1);
323        assert!(removed[0].path.contains("[a]"));
324
325        assert_eq!(added.len(), 1);
326        assert!(added[0].path.contains("[c]"));
327    }
328}