json_diff_ng/
process.rs

1use std::collections::HashMap;
2use std::collections::HashSet;
3
4use diffs::{Diff, myers, Replace};
5use regex::Regex;
6use serde_json::Map;
7use serde_json::Value;
8
9use crate::DiffTreeNode;
10use crate::Mismatch;
11use crate::Result;
12use crate::sort::preprocess_array;
13
14/// Compares two string slices containing serialized json with each other, returns an error or a [`Mismatch`] structure holding all differences.
15/// Internally this calls into [`compare_serde_values`] after deserializing the string slices into [`serde_json::Value`].
16/// Arguments are the string slices, a bool to trigger deep sorting of arrays and ignored_keys as a list of regex to match keys against.
17/// Ignoring a regex from comparison will also ignore the key from having an impact on sorting arrays.
18pub fn compare_strs(
19    a: &str,
20    b: &str,
21    sort_arrays: bool,
22    ignore_keys: &[Regex],
23) -> Result<Mismatch> {
24    let value1 = serde_json::from_str(a)?;
25    let value2 = serde_json::from_str(b)?;
26    compare_serde_values(&value1, &value2, sort_arrays, ignore_keys)
27}
28
29/// Compares two [`serde_json::Value`] items with each other, returns an error or a [`Mismatch`] structure holding all differences.
30/// Arguments are the values, a bool to trigger deep sorting of arrays and ignored_keys as a list of regex to match keys against.
31/// Ignoring a regex from comparison will also ignore the key from having an impact on sorting arrays.
32pub fn compare_serde_values(
33    a: &Value,
34    b: &Value,
35    sort_arrays: bool,
36    ignore_keys: &[Regex],
37) -> Result<Mismatch> {
38    match_json(a, b, sort_arrays, ignore_keys)
39}
40
41fn values_to_node(vec: Vec<(usize, &Value)>) -> DiffTreeNode {
42    if vec.is_empty() {
43        DiffTreeNode::Null
44    } else {
45        DiffTreeNode::Array(
46            vec.into_iter()
47                .map(|(l, v)| (l, DiffTreeNode::Value(v.clone(), v.clone())))
48                .collect(),
49        )
50    }
51}
52
53struct ListDiffHandler<'a> {
54    replaced: &'a mut Vec<(usize, usize, usize, usize)>,
55    deletion: &'a mut Vec<(usize, usize)>,
56    insertion: &'a mut Vec<(usize, usize)>,
57}
58impl<'a> ListDiffHandler<'a> {
59    pub fn new(
60        replaced: &'a mut Vec<(usize, usize, usize, usize)>,
61        deletion: &'a mut Vec<(usize, usize)>,
62        insertion: &'a mut Vec<(usize, usize)>,
63    ) -> Self {
64        Self {
65            replaced,
66            deletion,
67            insertion,
68        }
69    }
70}
71impl<'a> Diff for ListDiffHandler<'a> {
72    type Error = ();
73    fn delete(&mut self, old: usize, len: usize, _new: usize) -> std::result::Result<(), ()> {
74        self.deletion.push((old, len));
75        Ok(())
76    }
77    fn insert(&mut self, _o: usize, new: usize, len: usize) -> std::result::Result<(), ()> {
78        self.insertion.push((new, len));
79        Ok(())
80    }
81    fn replace(
82        &mut self,
83        old: usize,
84        len: usize,
85        new: usize,
86        new_len: usize,
87    ) -> std::result::Result<(), ()> {
88        self.replaced.push((old, len, new, new_len));
89        Ok(())
90    }
91}
92
93fn match_json(
94    value1: &Value,
95    value2: &Value,
96    sort_arrays: bool,
97    ignore_keys: &[Regex],
98) -> Result<Mismatch> {
99    match (value1, value2) {
100        (Value::Object(a), Value::Object(b)) => process_objects(a, b, ignore_keys, sort_arrays),
101        (Value::Array(a), Value::Array(b)) => process_arrays(sort_arrays, a, ignore_keys, b),
102        (a, b) => process_values(a, b),
103    }
104}
105
106fn process_values(a: &Value, b: &Value) -> Result<Mismatch> {
107    if a == b {
108        Ok(Mismatch::empty())
109    } else {
110        Ok(Mismatch::new(
111            DiffTreeNode::Null,
112            DiffTreeNode::Null,
113            DiffTreeNode::Value(a.clone(), b.clone()),
114        ))
115    }
116}
117
118fn process_objects(
119    a: &Map<String, Value>,
120    b: &Map<String, Value>,
121    ignore_keys: &[Regex],
122    sort_arrays: bool,
123) -> Result<Mismatch> {
124    let diff = intersect_maps(a, b, ignore_keys);
125    let mut left_only_keys = get_map_of_keys(diff.left_only);
126    let mut right_only_keys = get_map_of_keys(diff.right_only);
127    let intersection_keys = diff.intersection;
128
129    let mut unequal_keys = DiffTreeNode::Null;
130
131    for key in intersection_keys {
132        let Mismatch {
133            left_only: l,
134            right_only: r,
135            unequal_values: u,
136        } = match_json(
137            a.get(&key).unwrap(),
138            b.get(&key).unwrap(),
139            sort_arrays,
140            ignore_keys,
141        )?;
142        left_only_keys = insert_child_key_map(left_only_keys, l, &key)?;
143        right_only_keys = insert_child_key_map(right_only_keys, r, &key)?;
144        unequal_keys = insert_child_key_map(unequal_keys, u, &key)?;
145    }
146
147    Ok(Mismatch::new(left_only_keys, right_only_keys, unequal_keys))
148}
149
150fn process_arrays(
151    sort_arrays: bool,
152    a: &Vec<Value>,
153    ignore_keys: &[Regex],
154    b: &Vec<Value>,
155) -> Result<Mismatch> {
156    let a = preprocess_array(sort_arrays, a, ignore_keys);
157    let b = preprocess_array(sort_arrays, b, ignore_keys);
158
159    let mut replaced = Vec::new();
160    let mut deleted = Vec::new();
161    let mut inserted = Vec::new();
162
163    let mut diff = Replace::new(ListDiffHandler::new(
164        &mut replaced,
165        &mut deleted,
166        &mut inserted,
167    ));
168    myers::diff(
169        &mut diff,
170        a.as_slice(),
171        0,
172        a.len(),
173        b.as_slice(),
174        0,
175        b.len(),
176    )
177    .unwrap();
178
179    fn extract_one_sided_values(v: Vec<(usize, usize)>, vals: &[Value]) -> Vec<(usize, &Value)> {
180        v.into_iter()
181            .flat_map(|(o, ol)| (o..o + ol).map(|i| (i, &vals[i])))
182            .collect::<Vec<(usize, &Value)>>()
183    }
184
185    let left_only_values: Vec<_> = extract_one_sided_values(deleted, a.as_slice());
186    let right_only_values: Vec<_> = extract_one_sided_values(inserted, b.as_slice());
187
188    let mut left_only_nodes = values_to_node(left_only_values);
189    let mut right_only_nodes = values_to_node(right_only_values);
190    let mut diff = DiffTreeNode::Null;
191
192    for (o, ol, n, nl) in replaced {
193        let max_length = ol.max(nl);
194        for i in 0..max_length {
195            let inner_a = a.get(o + i).unwrap_or(&Value::Null);
196            let inner_b = b.get(n + i).unwrap_or(&Value::Null);
197            let cdiff = match_json(inner_a, inner_b, sort_arrays, ignore_keys)?;
198            let position = o + i;
199            let Mismatch {
200                left_only: l,
201                right_only: r,
202                unequal_values: u,
203            } = cdiff;
204            left_only_nodes = insert_child_key_diff(left_only_nodes, l, position)?;
205            right_only_nodes = insert_child_key_diff(right_only_nodes, r, position)?;
206            diff = insert_child_key_diff(diff, u, position)?;
207        }
208    }
209
210    Ok(Mismatch::new(left_only_nodes, right_only_nodes, diff))
211}
212
213fn get_map_of_keys(set: HashSet<String>) -> DiffTreeNode {
214    if !set.is_empty() {
215        DiffTreeNode::Node(
216            set.iter()
217                .map(|key| (String::from(key), DiffTreeNode::Null))
218                .collect(),
219        )
220    } else {
221        DiffTreeNode::Null
222    }
223}
224
225fn insert_child_key_diff(
226    parent: DiffTreeNode,
227    child: DiffTreeNode,
228    line: usize,
229) -> Result<DiffTreeNode> {
230    if child == DiffTreeNode::Null {
231        return Ok(parent);
232    }
233    if let DiffTreeNode::Array(mut array) = parent {
234        array.push((line, child));
235        Ok(DiffTreeNode::Array(array))
236    } else if let DiffTreeNode::Null = parent {
237        Ok(DiffTreeNode::Array(vec![(line, child)]))
238    } else {
239        Err(format!("Tried to insert child: {child:?} into parent {parent:?} - structure incoherent, expected a parent array - somehow json structure seems broken").into())
240    }
241}
242
243fn insert_child_key_map(
244    parent: DiffTreeNode,
245    child: DiffTreeNode,
246    key: &String,
247) -> Result<DiffTreeNode> {
248    if child == DiffTreeNode::Null {
249        return Ok(parent);
250    }
251    if let DiffTreeNode::Node(mut map) = parent {
252        map.insert(String::from(key), child);
253        Ok(DiffTreeNode::Node(map))
254    } else if let DiffTreeNode::Null = parent {
255        let mut map = HashMap::new();
256        map.insert(String::from(key), child);
257        Ok(DiffTreeNode::Node(map))
258    } else {
259        Err(format!("Tried to insert child: {child:?} into parent {parent:?} - structure incoherent, expected a parent object - somehow json structure seems broken").into())
260    }
261}
262
263struct MapDifference {
264    left_only: HashSet<String>,
265    right_only: HashSet<String>,
266    intersection: HashSet<String>,
267}
268
269impl MapDifference {
270    pub fn new(
271        left_only: HashSet<String>,
272        right_only: HashSet<String>,
273        intersection: HashSet<String>,
274    ) -> Self {
275        Self {
276            right_only,
277            left_only,
278            intersection,
279        }
280    }
281}
282
283fn intersect_maps(
284    a: &Map<String, Value>,
285    b: &Map<String, Value>,
286    ignore_keys: &[Regex],
287) -> MapDifference {
288    let mut intersection = HashSet::new();
289    let mut left = HashSet::new();
290
291    let mut right = HashSet::new();
292    for a_key in a
293        .keys()
294        .filter(|k| ignore_keys.iter().all(|r| !r.is_match(k.as_str())))
295    {
296        if b.contains_key(a_key) {
297            intersection.insert(String::from(a_key));
298        } else {
299            left.insert(String::from(a_key));
300        }
301    }
302    for b_key in b
303        .keys()
304        .filter(|k| ignore_keys.iter().all(|r| !r.is_match(k.as_str())))
305    {
306        if !a.contains_key(b_key) {
307            right.insert(String::from(b_key));
308        }
309    }
310
311    MapDifference::new(left, right, intersection)
312}
313
314#[cfg(test)]
315mod tests {
316    use maplit::hashmap;
317    use serde_json::json;
318
319    use super::*;
320
321    #[test]
322    fn sorting_ignores_ignored_keys() {
323        let data1: Value =
324            serde_json::from_str(r#"[{"a": 1, "b":2 }, { "a": 2, "b" : 1 }]"#).unwrap();
325        let ignore = [Regex::new("a").unwrap()];
326        let sorted_ignores = preprocess_array(true, data1.as_array().unwrap(), &ignore);
327        let sorted_no_ignores = preprocess_array(true, data1.as_array().unwrap(), &[]);
328
329        assert_eq!(
330            sorted_ignores
331                .first()
332                .unwrap()
333                .as_object()
334                .unwrap()
335                .get("b")
336                .unwrap()
337                .as_i64()
338                .unwrap(),
339            1
340        );
341        assert_eq!(
342            sorted_no_ignores
343                .first()
344                .unwrap()
345                .as_object()
346                .unwrap()
347                .get("b")
348                .unwrap()
349                .as_i64()
350                .unwrap(),
351            2
352        );
353    }
354
355    #[test]
356    fn test_arrays_sorted_objects_ignored() {
357        let data1 = r#"[{"c": {"d": "e"} },"b","c"]"#;
358        let data2 = r#"["b","c",{"c": {"d": "f"} }]"#;
359        let ignore = Regex::new("d").unwrap();
360        let diff = compare_strs(data1, data2, true, &[ignore]).unwrap();
361        assert!(diff.is_empty());
362    }
363
364    #[test]
365    fn test_arrays_sorted_simple() {
366        let data1 = r#"["a","b","c"]"#;
367        let data2 = r#"["b","c","a"]"#;
368        let diff = compare_strs(data1, data2, true, &[]).unwrap();
369        assert!(diff.is_empty());
370    }
371
372    #[test]
373    fn test_arrays_sorted_objects() {
374        let data1 = r#"[{"c": {"d": "e"} },"b","c"]"#;
375        let data2 = r#"["b","c",{"c": {"d": "e"} }]"#;
376        let diff = compare_strs(data1, data2, true, &[]).unwrap();
377        assert!(diff.is_empty());
378    }
379
380    #[test]
381    fn test_arrays_deep_sorted_objects() {
382        let data1 = r#"[{"c": ["d","e"] },"b","c"]"#;
383        let data2 = r#"["b","c",{"c": ["e", "d"] }]"#;
384        let diff = compare_strs(data1, data2, true, &[]).unwrap();
385        assert!(diff.is_empty());
386    }
387
388    #[test]
389    fn test_arrays_deep_sorted_objects_with_arrays() {
390        let data1 = r#"[{"a": [{"b": ["3", "1"]}] }, {"a": [{"b": ["2", "3"]}] }]"#;
391        let data2 = r#"[{"a": [{"b": ["2", "3"]}] }, {"a": [{"b": ["1", "3"]}] }]"#;
392        let diff = compare_strs(data1, data2, true, &[]).unwrap();
393        assert!(diff.is_empty());
394    }
395
396    #[test]
397    fn test_arrays_deep_sorted_objects_with_outer_diff() {
398        let data1 = r#"[{"c": ["d","e"] },"b"]"#;
399        let data2 = r#"["b","c",{"c": ["e", "d"] }]"#;
400        let diff = compare_strs(data1, data2, true, &[]).unwrap();
401        assert!(!diff.is_empty());
402        let insertions = diff.right_only.get_diffs();
403        assert_eq!(insertions.len(), 1);
404        assert_eq!(insertions.first().unwrap().to_string(), r#".[2].("c")"#);
405    }
406
407    #[test]
408    fn test_arrays_deep_sorted_objects_with_inner_diff() {
409        let data1 = r#"["a",{"c": ["d","e", "f"] },"b"]"#;
410        let data2 = r#"["b",{"c": ["e","d"] },"a"]"#;
411        let diff = compare_strs(data1, data2, true, &[]).unwrap();
412        assert!(!diff.is_empty());
413        let deletions = diff.left_only.get_diffs();
414
415        assert_eq!(deletions.len(), 1);
416        assert_eq!(
417            deletions.first().unwrap().to_string(),
418            r#".[0].c.[2].("f")"#
419        );
420    }
421
422    #[test]
423    fn test_arrays_deep_sorted_objects_with_inner_diff_mutation() {
424        let data1 = r#"["a",{"c": ["d", "f"] },"b"]"#;
425        let data2 = r#"["b",{"c": ["e","d"] },"a"]"#;
426        let diffs = compare_strs(data1, data2, true, &[]).unwrap();
427        assert!(!diffs.is_empty());
428        let diffs = diffs.unequal_values.get_diffs();
429
430        assert_eq!(diffs.len(), 1);
431        assert_eq!(
432            diffs.first().unwrap().to_string(),
433            r#".[0].c.[1].("f" != "e")"#
434        );
435    }
436
437    #[test]
438    fn test_arrays_simple_diff() {
439        let data1 = r#"["a","b","c"]"#;
440        let data2 = r#"["a","b","d"]"#;
441        let diff = compare_strs(data1, data2, false, &[]).unwrap();
442        assert_eq!(diff.left_only, DiffTreeNode::Null);
443        assert_eq!(diff.right_only, DiffTreeNode::Null);
444        let diff = diff.unequal_values.get_diffs();
445        assert_eq!(diff.len(), 1);
446        assert_eq!(diff.first().unwrap().to_string(), r#".[2].("c" != "d")"#);
447    }
448
449    #[test]
450    fn test_arrays_more_complex_diff() {
451        let data1 = r#"["a","b","c"]"#;
452        let data2 = r#"["a","a","b","d"]"#;
453        let diff = compare_strs(data1, data2, false, &[]).unwrap();
454
455        let changes_diff = diff.unequal_values.get_diffs();
456        assert_eq!(diff.left_only, DiffTreeNode::Null);
457
458        assert_eq!(changes_diff.len(), 1);
459        assert_eq!(
460            changes_diff.first().unwrap().to_string(),
461            r#".[2].("c" != "d")"#
462        );
463        let insertions = diff.right_only.get_diffs();
464        assert_eq!(insertions.len(), 1);
465        assert_eq!(insertions.first().unwrap().to_string(), r#".[0].("a")"#);
466    }
467
468    #[test]
469    fn test_arrays_extra_left() {
470        let data1 = r#"["a","b","c"]"#;
471        let data2 = r#"["a","b"]"#;
472        let diff = compare_strs(data1, data2, false, &[]).unwrap();
473
474        let diffs = diff.left_only.get_diffs();
475        assert_eq!(diffs.len(), 1);
476        assert_eq!(diffs.first().unwrap().to_string(), r#".[2].("c")"#);
477        assert_eq!(diff.unequal_values, DiffTreeNode::Null);
478        assert_eq!(diff.right_only, DiffTreeNode::Null);
479    }
480
481    #[test]
482    fn test_arrays_extra_right() {
483        let data1 = r#"["a","b"]"#;
484        let data2 = r#"["a","b","c"]"#;
485        let diff = compare_strs(data1, data2, false, &[]).unwrap();
486
487        let diffs = diff.right_only.get_diffs();
488        assert_eq!(diffs.len(), 1);
489        assert_eq!(diffs.first().unwrap().to_string(), r#".[2].("c")"#);
490        assert_eq!(diff.unequal_values, DiffTreeNode::Null);
491        assert_eq!(diff.left_only, DiffTreeNode::Null);
492    }
493
494    #[test]
495    fn long_insertion_modification() {
496        let data1 = r#"["a","b","a"]"#;
497        let data2 = r#"["a","c","c","c","a"]"#;
498        let diff = compare_strs(data1, data2, false, &[]).unwrap();
499        let diffs = diff.unequal_values.get_diffs();
500
501        assert_eq!(diffs.len(), 3);
502        let diffs: Vec<_> = diffs.into_iter().map(|d| d.to_string()).collect();
503
504        assert!(diffs.contains(&r#".[3].(null != "c")"#.to_string()));
505        assert!(diffs.contains(&r#".[1].("b" != "c")"#.to_string()));
506        assert!(diffs.contains(&r#".[2].("a" != "c")"#.to_string()));
507        assert_eq!(diff.right_only, DiffTreeNode::Null);
508        assert_eq!(diff.left_only, DiffTreeNode::Null);
509    }
510
511    #[test]
512    fn test_arrays_object_extra() {
513        let data1 = r#"["a","b"]"#;
514        let data2 = r#"["a","b", {"c": {"d": "e"} }]"#;
515        let diff = compare_strs(data1, data2, false, &[]).unwrap();
516
517        let diffs = diff.right_only.get_diffs();
518        assert_eq!(diffs.len(), 1);
519        assert_eq!(
520            diffs.first().unwrap().to_string(),
521            r#".[2].({"c":{"d":"e"}})"#
522        );
523        assert_eq!(diff.unequal_values, DiffTreeNode::Null);
524        assert_eq!(diff.left_only, DiffTreeNode::Null);
525    }
526
527    #[test]
528    fn nested_diff() {
529        let data1 = r#"{
530            "a":"b",
531            "b":{
532                "c":{
533                    "d":true,
534                    "e":5,
535                    "f":9,
536                    "h":{
537                        "i":true,
538                        "j":false
539                    }
540                }
541            }
542        }"#;
543        let data2 = r#"{
544            "a":"b",
545            "b":{
546                "c":{
547                    "d":true,
548                    "e":6,
549                    "g":0,
550                    "h":{
551                        "i":false,
552                        "k":false
553                    }
554                }
555            }
556        }"#;
557
558        let expected_left = DiffTreeNode::Node(hashmap! {
559        "b".to_string() => DiffTreeNode::Node(hashmap! {
560                "c".to_string() => DiffTreeNode::Node(hashmap! {
561                        "f".to_string() => DiffTreeNode::Null,
562                        "h".to_string() => DiffTreeNode::Node( hashmap! {
563                                "j".to_string() => DiffTreeNode::Null,
564                            }
565                        ),
566                }
567                ),
568            }),
569        });
570        let expected_right = DiffTreeNode::Node(hashmap! {
571            "b".to_string() => DiffTreeNode::Node(hashmap! {
572                    "c".to_string() => DiffTreeNode::Node(hashmap! {
573                            "g".to_string() => DiffTreeNode::Null,
574                            "h".to_string() => DiffTreeNode::Node(hashmap! {
575                                    "k".to_string() => DiffTreeNode::Null,
576                                }
577                            )
578                        }
579                    )
580                }
581            )
582        });
583        let expected_uneq = DiffTreeNode::Node(hashmap! {
584            "b".to_string() => DiffTreeNode::Node(hashmap! {
585                    "c".to_string() => DiffTreeNode::Node(hashmap! {
586                            "e".to_string() => DiffTreeNode::Value(json!(5), json!(6)),
587                            "h".to_string() => DiffTreeNode::Node(hashmap! {
588                                    "i".to_string() => DiffTreeNode::Value(json!(true), json!(false)),
589                                }
590                            )
591                        }
592                    )
593                }
594            )
595        });
596        let expected = Mismatch::new(expected_left, expected_right, expected_uneq);
597
598        let mismatch = compare_strs(data1, data2, false, &[]).unwrap();
599        assert_eq!(mismatch, expected, "Diff was incorrect.");
600    }
601
602    #[test]
603    fn no_diff() {
604        let data1 = r#"{
605            "a":"b",
606            "b":{
607                "c":{
608                    "d":true,
609                    "e":5,
610                    "f":9,
611                    "h":{
612                        "i":true,
613                        "j":false
614                    }
615                }
616            }
617        }"#;
618        let data2 = r#"{
619            "a":"b",
620            "b":{
621                "c":{
622                    "d":true,
623                    "e":5,
624                    "f":9,
625                    "h":{
626                        "i":true,
627                        "j":false
628                    }
629                }
630            }
631        }"#;
632
633        assert_eq!(
634            compare_strs(data1, data2, false, &[]).unwrap(),
635            Mismatch::new(DiffTreeNode::Null, DiffTreeNode::Null, DiffTreeNode::Null)
636        );
637    }
638
639    #[test]
640    fn no_json() {
641        let data1 = r#"{}"#;
642        let data2 = r#"{}"#;
643
644        assert_eq!(
645            compare_strs(data1, data2, false, &[]).unwrap(),
646            Mismatch::empty()
647        );
648    }
649
650    #[test]
651    fn parse_err_source_one() {
652        let invalid_json1 = r#"{invalid: json}"#;
653        let valid_json2 = r#"{"a":"b"}"#;
654        compare_strs(invalid_json1, valid_json2, false, &[])
655            .expect_err("Parsing invalid JSON didn't throw an error");
656    }
657
658    #[test]
659    fn parse_err_source_two() {
660        let valid_json1 = r#"{"a":"b"}"#;
661        let invalid_json2 = r#"{invalid: json}"#;
662        compare_strs(valid_json1, invalid_json2, false, &[])
663            .expect_err("Parsing invalid JSON didn't throw an err");
664    }
665}