json_archive/
diff.rs

1// json-archive is a tool for tracking JSON file changes over time
2// Copyright (C) 2025  Peoples Grocers LLC
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published
6// by the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16//
17// To purchase a license under different terms contact admin@peoplesgrocers.com
18// To request changes, report bugs, or give user feedback contact
19// marxism@peoplesgrocers.com
20//
21
22use serde_json::Value;
23use std::collections::{HashMap, HashSet};
24
25use crate::events::Event;
26
27pub fn diff(old: &Value, new: &Value, base_path: &str, observation_id: &str) -> Vec<Event> {
28    let mut result = Vec::<Event>::new();
29    diff_recursive(old, new, base_path, observation_id, &mut result);
30    result
31}
32
33fn diff_recursive(
34    old: &Value,
35    new: &Value,
36    path: &str,
37    observation_id: &str,
38    result: &mut Vec<Event>,
39) {
40    match (old, new) {
41        (Value::Object(old_obj), Value::Object(new_obj)) => {
42            diff_objects(old_obj, new_obj, path, observation_id, result);
43        }
44        (Value::Array(old_arr), Value::Array(new_arr)) => {
45            diff_arrays(old_arr, new_arr, path, observation_id, result);
46        }
47        _ => {
48            if old != new {
49                result.push(Event::Change {
50                    path: path.to_string(),
51                    new_value: new.clone(),
52                    observation_id: observation_id.to_string(),
53                });
54            }
55        }
56    }
57}
58
59fn diff_objects(
60    old: &serde_json::Map<String, Value>,
61    new: &serde_json::Map<String, Value>,
62    base_path: &str,
63    observation_id: &str,
64    result: &mut Vec<Event>,
65) {
66    let old_keys: HashSet<&String> = old.keys().collect();
67    let new_keys: HashSet<&String> = new.keys().collect();
68
69    // Removed keys
70    for key in old_keys.difference(&new_keys) {
71        let path = format_path(base_path, key);
72        result.push(Event::Remove {
73            path,
74            observation_id: observation_id.to_string(),
75        });
76    }
77
78    // Added keys
79    for key in new_keys.difference(&old_keys) {
80        let path = format_path(base_path, key);
81        result.push(Event::Add {
82            path,
83            value: new[*key].clone(),
84            observation_id: observation_id.to_string(),
85        });
86    }
87
88    // Changed keys
89    for key in old_keys.intersection(&new_keys) {
90        let path = format_path(base_path, key);
91        let old_value = &old[*key];
92        let new_value = &new[*key];
93
94        if old_value != new_value {
95            diff_recursive(old_value, new_value, &path, observation_id, result);
96        }
97    }
98}
99
100fn diff_arrays(
101    old: &[Value],
102    new: &[Value],
103    base_path: &str,
104    observation_id: &str,
105    result: &mut Vec<Event>,
106) {
107    // Simple implementation: we'll use a more sophisticated approach for move detection
108    let mut old_items: HashMap<String, (usize, &Value)> = HashMap::new();
109    let mut new_items: HashMap<String, (usize, &Value)> = HashMap::new();
110
111    // Create content-based indices for move detection
112    for (i, value) in old.iter().enumerate() {
113        let key = value_hash(value);
114        old_items.insert(key, (i, value));
115    }
116
117    for (i, value) in new.iter().enumerate() {
118        let key = value_hash(value);
119        new_items.insert(key, (i, value));
120    }
121
122    let old_keys: HashSet<&String> = old_items.keys().collect();
123    let new_keys: HashSet<&String> = new_items.keys().collect();
124    let common_keys: HashSet<&String> = old_keys.intersection(&new_keys).cloned().collect();
125
126    // Track which items have been processed
127    let mut processed_old: HashSet<usize> = HashSet::new();
128    let mut processed_new: HashSet<usize> = HashSet::new();
129
130    // Handle items that exist in both arrays (potential moves or unchanged)
131    let mut moves: Vec<(usize, usize)> = Vec::new();
132
133    for key in &common_keys {
134        let (old_idx, old_val) = old_items[*key];
135        let (new_idx, new_val) = new_items[*key];
136
137        processed_old.insert(old_idx);
138        processed_new.insert(new_idx);
139
140        if old_val != new_val {
141            // Value changed
142            let path = format!("{}/{}", base_path, new_idx);
143            result.push(Event::Change {
144                path,
145                new_value: new_val.clone(),
146                observation_id: observation_id.to_string(),
147            });
148        } else if old_idx != new_idx {
149            // Same value, different position - this is a move
150            moves.push((old_idx, new_idx));
151        }
152    }
153
154    // Generate move events if any
155    if !moves.is_empty() {
156        // Sort moves by original position to ensure consistent ordering
157        moves.sort_by_key(|(old_idx, _)| *old_idx);
158        result.push(Event::Move {
159            path: base_path.to_string(),
160            moves,
161            observation_id: observation_id.to_string(),
162        });
163    }
164
165    // Handle removed items (in old but not in new)
166    let mut removed_indices: Vec<usize> = (0..old.len())
167        .filter(|i| !processed_old.contains(i))
168        .collect();
169
170    // Remove from highest index to lowest to avoid index shifting issues
171    removed_indices.sort_by(|a, b| b.cmp(a));
172
173    for idx in removed_indices {
174        let path = format!("{}/{}", base_path, idx);
175        result.push(Event::Remove {
176            path,
177            observation_id: observation_id.to_string(),
178        });
179    }
180
181    // Handle added items (in new but not in old)
182    for i in 0..new.len() {
183        if !processed_new.contains(&i) {
184            let path = format!("{}/{}", base_path, i);
185            result.push(Event::Add {
186                path,
187                value: new[i].clone(),
188                observation_id: observation_id.to_string(),
189            });
190        }
191    }
192}
193
194fn format_path(base: &str, segment: &str) -> String {
195    let escaped_segment = segment.replace("~", "~0").replace("/", "~1");
196    if base.is_empty() {
197        format!("/{}", escaped_segment)
198    } else {
199        format!("{}/{}", base, escaped_segment)
200    }
201}
202
203fn value_hash(value: &Value) -> String {
204    // Simple content-based hash for move detection
205    // In a real implementation, you might want a more sophisticated hash
206    format!("{:?}", value)
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use serde_json::json;
213
214    #[test]
215    fn test_object_add() {
216        let old = json!({"a": 1});
217        let new = json!({"a": 1, "b": 2});
218        let result = diff(&old, &new, "", "obs-1");
219
220        assert_eq!(result.len(), 1);
221        match &result[0] {
222            Event::Add { path, value, .. } => {
223                assert_eq!(path, "/b");
224                assert_eq!(value, &json!(2));
225            }
226            _ => panic!("Expected Add event"),
227        }
228    }
229
230    #[test]
231    fn test_object_remove() {
232        let old = json!({"a": 1, "b": 2});
233        let new = json!({"a": 1});
234        let result = diff(&old, &new, "", "obs-1");
235
236        assert_eq!(result.len(), 1);
237        match &result[0] {
238            Event::Remove { path, .. } => {
239                assert_eq!(path, "/b");
240            }
241            _ => panic!("Expected Remove event"),
242        }
243    }
244
245    #[test]
246    fn test_object_change() {
247        let old = json!({"a": 1});
248        let new = json!({"a": 2});
249        let result = diff(&old, &new, "", "obs-1");
250
251        assert_eq!(result.len(), 1);
252        match &result[0] {
253            Event::Change {
254                path, new_value, ..
255            } => {
256                assert_eq!(path, "/a");
257                assert_eq!(new_value, &json!(2));
258            }
259            _ => panic!("Expected Change event"),
260        }
261    }
262
263    #[test]
264    fn test_nested_object() {
265        let old = json!({"user": {"name": "Alice", "age": 30}});
266        let new = json!({"user": {"name": "Alice", "age": 31}});
267        let result = diff(&old, &new, "", "obs-1");
268
269        assert_eq!(result.len(), 1);
270        match &result[0] {
271            Event::Change {
272                path, new_value, ..
273            } => {
274                assert_eq!(path, "/user/age");
275                assert_eq!(new_value, &json!(31));
276            }
277            _ => panic!("Expected Change event"),
278        }
279    }
280
281    #[test]
282    fn test_array_add() {
283        let old = json!(["a", "b"]);
284        let new = json!(["a", "b", "c"]);
285        let result = diff(&old, &new, "", "obs-1");
286
287        assert_eq!(result.len(), 1);
288        match &result[0] {
289            Event::Add { path, value, .. } => {
290                assert_eq!(path, "/2");
291                assert_eq!(value, &json!("c"));
292            }
293            _ => panic!("Expected Add event"),
294        }
295    }
296
297    #[test]
298    fn test_array_remove() {
299        let old = json!(["a", "b", "c"]);
300        let new = json!(["a", "b"]);
301        let result = diff(&old, &new, "", "obs-1");
302
303        assert_eq!(result.len(), 1);
304        match &result[0] {
305            Event::Remove { path, .. } => {
306                assert_eq!(path, "/2");
307            }
308            _ => panic!("Expected Remove event"),
309        }
310    }
311
312    #[test]
313    fn test_array_move() {
314        let old = json!(["a", "b", "c"]);
315        let new = json!(["c", "a", "b"]);
316        let result = diff(&old, &new, "", "obs-1");
317
318        // Should generate move events
319        assert!(!result.is_empty());
320
321        // Check if we have a move event
322        let has_move = result.iter().any(|e| matches!(e, Event::Move { .. }));
323        assert!(has_move, "Expected at least one Move event");
324    }
325
326    #[test]
327    fn test_escape_sequences_in_keys() {
328        let old = json!({});
329        let new = json!({"foo/bar": "value", "foo~bar": "value2"});
330        let result = diff(&old, &new, "", "obs-1");
331
332        assert_eq!(result.len(), 2);
333
334        let paths: Vec<&String> = result
335            .iter()
336            .filter_map(|e| match e {
337                Event::Add { path, .. } => Some(path),
338                _ => None,
339            })
340            .collect();
341
342        assert!(paths.contains(&&"/foo~1bar".to_string()));
343        assert!(paths.contains(&&"/foo~0bar".to_string()));
344    }
345}