1use std::collections::HashMap;
2use std::collections::HashSet;
3
4use diffs::{Diff, myers, Replace};
5use regex::Regex;
6use serde_json::Map;
7use serde_json::Value;
8
9use crate::DiffTreeNode;
10use crate::Mismatch;
11use crate::Result;
12use crate::sort::preprocess_array;
13
14pub fn compare_strs(
19 a: &str,
20 b: &str,
21 sort_arrays: bool,
22 ignore_keys: &[Regex],
23) -> Result<Mismatch> {
24 let value1 = serde_json::from_str(a)?;
25 let value2 = serde_json::from_str(b)?;
26 compare_serde_values(&value1, &value2, sort_arrays, ignore_keys)
27}
28
29pub fn compare_serde_values(
33 a: &Value,
34 b: &Value,
35 sort_arrays: bool,
36 ignore_keys: &[Regex],
37) -> Result<Mismatch> {
38 match_json(a, b, sort_arrays, ignore_keys)
39}
40
41fn values_to_node(vec: Vec<(usize, &Value)>) -> DiffTreeNode {
42 if vec.is_empty() {
43 DiffTreeNode::Null
44 } else {
45 DiffTreeNode::Array(
46 vec.into_iter()
47 .map(|(l, v)| (l, DiffTreeNode::Value(v.clone(), v.clone())))
48 .collect(),
49 )
50 }
51}
52
53struct ListDiffHandler<'a> {
54 replaced: &'a mut Vec<(usize, usize, usize, usize)>,
55 deletion: &'a mut Vec<(usize, usize)>,
56 insertion: &'a mut Vec<(usize, usize)>,
57}
58impl<'a> ListDiffHandler<'a> {
59 pub fn new(
60 replaced: &'a mut Vec<(usize, usize, usize, usize)>,
61 deletion: &'a mut Vec<(usize, usize)>,
62 insertion: &'a mut Vec<(usize, usize)>,
63 ) -> Self {
64 Self {
65 replaced,
66 deletion,
67 insertion,
68 }
69 }
70}
71impl<'a> Diff for ListDiffHandler<'a> {
72 type Error = ();
73 fn delete(&mut self, old: usize, len: usize, _new: usize) -> std::result::Result<(), ()> {
74 self.deletion.push((old, len));
75 Ok(())
76 }
77 fn insert(&mut self, _o: usize, new: usize, len: usize) -> std::result::Result<(), ()> {
78 self.insertion.push((new, len));
79 Ok(())
80 }
81 fn replace(
82 &mut self,
83 old: usize,
84 len: usize,
85 new: usize,
86 new_len: usize,
87 ) -> std::result::Result<(), ()> {
88 self.replaced.push((old, len, new, new_len));
89 Ok(())
90 }
91}
92
93fn match_json(
94 value1: &Value,
95 value2: &Value,
96 sort_arrays: bool,
97 ignore_keys: &[Regex],
98) -> Result<Mismatch> {
99 match (value1, value2) {
100 (Value::Object(a), Value::Object(b)) => process_objects(a, b, ignore_keys, sort_arrays),
101 (Value::Array(a), Value::Array(b)) => process_arrays(sort_arrays, a, ignore_keys, b),
102 (a, b) => process_values(a, b),
103 }
104}
105
106fn process_values(a: &Value, b: &Value) -> Result<Mismatch> {
107 if a == b {
108 Ok(Mismatch::empty())
109 } else {
110 Ok(Mismatch::new(
111 DiffTreeNode::Null,
112 DiffTreeNode::Null,
113 DiffTreeNode::Value(a.clone(), b.clone()),
114 ))
115 }
116}
117
118fn process_objects(
119 a: &Map<String, Value>,
120 b: &Map<String, Value>,
121 ignore_keys: &[Regex],
122 sort_arrays: bool,
123) -> Result<Mismatch> {
124 let diff = intersect_maps(a, b, ignore_keys);
125 let mut left_only_keys = get_map_of_keys(diff.left_only);
126 let mut right_only_keys = get_map_of_keys(diff.right_only);
127 let intersection_keys = diff.intersection;
128
129 let mut unequal_keys = DiffTreeNode::Null;
130
131 for key in intersection_keys {
132 let Mismatch {
133 left_only: l,
134 right_only: r,
135 unequal_values: u,
136 } = match_json(
137 a.get(&key).unwrap(),
138 b.get(&key).unwrap(),
139 sort_arrays,
140 ignore_keys,
141 )?;
142 left_only_keys = insert_child_key_map(left_only_keys, l, &key)?;
143 right_only_keys = insert_child_key_map(right_only_keys, r, &key)?;
144 unequal_keys = insert_child_key_map(unequal_keys, u, &key)?;
145 }
146
147 Ok(Mismatch::new(left_only_keys, right_only_keys, unequal_keys))
148}
149
150fn process_arrays(
151 sort_arrays: bool,
152 a: &Vec<Value>,
153 ignore_keys: &[Regex],
154 b: &Vec<Value>,
155) -> Result<Mismatch> {
156 let a = preprocess_array(sort_arrays, a, ignore_keys);
157 let b = preprocess_array(sort_arrays, b, ignore_keys);
158
159 let mut replaced = Vec::new();
160 let mut deleted = Vec::new();
161 let mut inserted = Vec::new();
162
163 let mut diff = Replace::new(ListDiffHandler::new(
164 &mut replaced,
165 &mut deleted,
166 &mut inserted,
167 ));
168 myers::diff(
169 &mut diff,
170 a.as_slice(),
171 0,
172 a.len(),
173 b.as_slice(),
174 0,
175 b.len(),
176 )
177 .unwrap();
178
179 fn extract_one_sided_values(v: Vec<(usize, usize)>, vals: &[Value]) -> Vec<(usize, &Value)> {
180 v.into_iter()
181 .flat_map(|(o, ol)| (o..o + ol).map(|i| (i, &vals[i])))
182 .collect::<Vec<(usize, &Value)>>()
183 }
184
185 let left_only_values: Vec<_> = extract_one_sided_values(deleted, a.as_slice());
186 let right_only_values: Vec<_> = extract_one_sided_values(inserted, b.as_slice());
187
188 let mut left_only_nodes = values_to_node(left_only_values);
189 let mut right_only_nodes = values_to_node(right_only_values);
190 let mut diff = DiffTreeNode::Null;
191
192 for (o, ol, n, nl) in replaced {
193 let max_length = ol.max(nl);
194 for i in 0..max_length {
195 let inner_a = a.get(o + i).unwrap_or(&Value::Null);
196 let inner_b = b.get(n + i).unwrap_or(&Value::Null);
197 let cdiff = match_json(inner_a, inner_b, sort_arrays, ignore_keys)?;
198 let position = o + i;
199 let Mismatch {
200 left_only: l,
201 right_only: r,
202 unequal_values: u,
203 } = cdiff;
204 left_only_nodes = insert_child_key_diff(left_only_nodes, l, position)?;
205 right_only_nodes = insert_child_key_diff(right_only_nodes, r, position)?;
206 diff = insert_child_key_diff(diff, u, position)?;
207 }
208 }
209
210 Ok(Mismatch::new(left_only_nodes, right_only_nodes, diff))
211}
212
213fn get_map_of_keys(set: HashSet<String>) -> DiffTreeNode {
214 if !set.is_empty() {
215 DiffTreeNode::Node(
216 set.iter()
217 .map(|key| (String::from(key), DiffTreeNode::Null))
218 .collect(),
219 )
220 } else {
221 DiffTreeNode::Null
222 }
223}
224
225fn insert_child_key_diff(
226 parent: DiffTreeNode,
227 child: DiffTreeNode,
228 line: usize,
229) -> Result<DiffTreeNode> {
230 if child == DiffTreeNode::Null {
231 return Ok(parent);
232 }
233 if let DiffTreeNode::Array(mut array) = parent {
234 array.push((line, child));
235 Ok(DiffTreeNode::Array(array))
236 } else if let DiffTreeNode::Null = parent {
237 Ok(DiffTreeNode::Array(vec![(line, child)]))
238 } else {
239 Err(format!("Tried to insert child: {child:?} into parent {parent:?} - structure incoherent, expected a parent array - somehow json structure seems broken").into())
240 }
241}
242
243fn insert_child_key_map(
244 parent: DiffTreeNode,
245 child: DiffTreeNode,
246 key: &String,
247) -> Result<DiffTreeNode> {
248 if child == DiffTreeNode::Null {
249 return Ok(parent);
250 }
251 if let DiffTreeNode::Node(mut map) = parent {
252 map.insert(String::from(key), child);
253 Ok(DiffTreeNode::Node(map))
254 } else if let DiffTreeNode::Null = parent {
255 let mut map = HashMap::new();
256 map.insert(String::from(key), child);
257 Ok(DiffTreeNode::Node(map))
258 } else {
259 Err(format!("Tried to insert child: {child:?} into parent {parent:?} - structure incoherent, expected a parent object - somehow json structure seems broken").into())
260 }
261}
262
263struct MapDifference {
264 left_only: HashSet<String>,
265 right_only: HashSet<String>,
266 intersection: HashSet<String>,
267}
268
269impl MapDifference {
270 pub fn new(
271 left_only: HashSet<String>,
272 right_only: HashSet<String>,
273 intersection: HashSet<String>,
274 ) -> Self {
275 Self {
276 right_only,
277 left_only,
278 intersection,
279 }
280 }
281}
282
283fn intersect_maps(
284 a: &Map<String, Value>,
285 b: &Map<String, Value>,
286 ignore_keys: &[Regex],
287) -> MapDifference {
288 let mut intersection = HashSet::new();
289 let mut left = HashSet::new();
290
291 let mut right = HashSet::new();
292 for a_key in a
293 .keys()
294 .filter(|k| ignore_keys.iter().all(|r| !r.is_match(k.as_str())))
295 {
296 if b.contains_key(a_key) {
297 intersection.insert(String::from(a_key));
298 } else {
299 left.insert(String::from(a_key));
300 }
301 }
302 for b_key in b
303 .keys()
304 .filter(|k| ignore_keys.iter().all(|r| !r.is_match(k.as_str())))
305 {
306 if !a.contains_key(b_key) {
307 right.insert(String::from(b_key));
308 }
309 }
310
311 MapDifference::new(left, right, intersection)
312}
313
314#[cfg(test)]
315mod tests {
316 use maplit::hashmap;
317 use serde_json::json;
318
319 use super::*;
320
321 #[test]
322 fn sorting_ignores_ignored_keys() {
323 let data1: Value =
324 serde_json::from_str(r#"[{"a": 1, "b":2 }, { "a": 2, "b" : 1 }]"#).unwrap();
325 let ignore = [Regex::new("a").unwrap()];
326 let sorted_ignores = preprocess_array(true, data1.as_array().unwrap(), &ignore);
327 let sorted_no_ignores = preprocess_array(true, data1.as_array().unwrap(), &[]);
328
329 assert_eq!(
330 sorted_ignores
331 .first()
332 .unwrap()
333 .as_object()
334 .unwrap()
335 .get("b")
336 .unwrap()
337 .as_i64()
338 .unwrap(),
339 1
340 );
341 assert_eq!(
342 sorted_no_ignores
343 .first()
344 .unwrap()
345 .as_object()
346 .unwrap()
347 .get("b")
348 .unwrap()
349 .as_i64()
350 .unwrap(),
351 2
352 );
353 }
354
355 #[test]
356 fn test_arrays_sorted_objects_ignored() {
357 let data1 = r#"[{"c": {"d": "e"} },"b","c"]"#;
358 let data2 = r#"["b","c",{"c": {"d": "f"} }]"#;
359 let ignore = Regex::new("d").unwrap();
360 let diff = compare_strs(data1, data2, true, &[ignore]).unwrap();
361 assert!(diff.is_empty());
362 }
363
364 #[test]
365 fn test_arrays_sorted_simple() {
366 let data1 = r#"["a","b","c"]"#;
367 let data2 = r#"["b","c","a"]"#;
368 let diff = compare_strs(data1, data2, true, &[]).unwrap();
369 assert!(diff.is_empty());
370 }
371
372 #[test]
373 fn test_arrays_sorted_objects() {
374 let data1 = r#"[{"c": {"d": "e"} },"b","c"]"#;
375 let data2 = r#"["b","c",{"c": {"d": "e"} }]"#;
376 let diff = compare_strs(data1, data2, true, &[]).unwrap();
377 assert!(diff.is_empty());
378 }
379
380 #[test]
381 fn test_arrays_deep_sorted_objects() {
382 let data1 = r#"[{"c": ["d","e"] },"b","c"]"#;
383 let data2 = r#"["b","c",{"c": ["e", "d"] }]"#;
384 let diff = compare_strs(data1, data2, true, &[]).unwrap();
385 assert!(diff.is_empty());
386 }
387
388 #[test]
389 fn test_arrays_deep_sorted_objects_with_arrays() {
390 let data1 = r#"[{"a": [{"b": ["3", "1"]}] }, {"a": [{"b": ["2", "3"]}] }]"#;
391 let data2 = r#"[{"a": [{"b": ["2", "3"]}] }, {"a": [{"b": ["1", "3"]}] }]"#;
392 let diff = compare_strs(data1, data2, true, &[]).unwrap();
393 assert!(diff.is_empty());
394 }
395
396 #[test]
397 fn test_arrays_deep_sorted_objects_with_outer_diff() {
398 let data1 = r#"[{"c": ["d","e"] },"b"]"#;
399 let data2 = r#"["b","c",{"c": ["e", "d"] }]"#;
400 let diff = compare_strs(data1, data2, true, &[]).unwrap();
401 assert!(!diff.is_empty());
402 let insertions = diff.right_only.get_diffs();
403 assert_eq!(insertions.len(), 1);
404 assert_eq!(insertions.first().unwrap().to_string(), r#".[2].("c")"#);
405 }
406
407 #[test]
408 fn test_arrays_deep_sorted_objects_with_inner_diff() {
409 let data1 = r#"["a",{"c": ["d","e", "f"] },"b"]"#;
410 let data2 = r#"["b",{"c": ["e","d"] },"a"]"#;
411 let diff = compare_strs(data1, data2, true, &[]).unwrap();
412 assert!(!diff.is_empty());
413 let deletions = diff.left_only.get_diffs();
414
415 assert_eq!(deletions.len(), 1);
416 assert_eq!(
417 deletions.first().unwrap().to_string(),
418 r#".[0].c.[2].("f")"#
419 );
420 }
421
422 #[test]
423 fn test_arrays_deep_sorted_objects_with_inner_diff_mutation() {
424 let data1 = r#"["a",{"c": ["d", "f"] },"b"]"#;
425 let data2 = r#"["b",{"c": ["e","d"] },"a"]"#;
426 let diffs = compare_strs(data1, data2, true, &[]).unwrap();
427 assert!(!diffs.is_empty());
428 let diffs = diffs.unequal_values.get_diffs();
429
430 assert_eq!(diffs.len(), 1);
431 assert_eq!(
432 diffs.first().unwrap().to_string(),
433 r#".[0].c.[1].("f" != "e")"#
434 );
435 }
436
437 #[test]
438 fn test_arrays_simple_diff() {
439 let data1 = r#"["a","b","c"]"#;
440 let data2 = r#"["a","b","d"]"#;
441 let diff = compare_strs(data1, data2, false, &[]).unwrap();
442 assert_eq!(diff.left_only, DiffTreeNode::Null);
443 assert_eq!(diff.right_only, DiffTreeNode::Null);
444 let diff = diff.unequal_values.get_diffs();
445 assert_eq!(diff.len(), 1);
446 assert_eq!(diff.first().unwrap().to_string(), r#".[2].("c" != "d")"#);
447 }
448
449 #[test]
450 fn test_arrays_more_complex_diff() {
451 let data1 = r#"["a","b","c"]"#;
452 let data2 = r#"["a","a","b","d"]"#;
453 let diff = compare_strs(data1, data2, false, &[]).unwrap();
454
455 let changes_diff = diff.unequal_values.get_diffs();
456 assert_eq!(diff.left_only, DiffTreeNode::Null);
457
458 assert_eq!(changes_diff.len(), 1);
459 assert_eq!(
460 changes_diff.first().unwrap().to_string(),
461 r#".[2].("c" != "d")"#
462 );
463 let insertions = diff.right_only.get_diffs();
464 assert_eq!(insertions.len(), 1);
465 assert_eq!(insertions.first().unwrap().to_string(), r#".[0].("a")"#);
466 }
467
468 #[test]
469 fn test_arrays_extra_left() {
470 let data1 = r#"["a","b","c"]"#;
471 let data2 = r#"["a","b"]"#;
472 let diff = compare_strs(data1, data2, false, &[]).unwrap();
473
474 let diffs = diff.left_only.get_diffs();
475 assert_eq!(diffs.len(), 1);
476 assert_eq!(diffs.first().unwrap().to_string(), r#".[2].("c")"#);
477 assert_eq!(diff.unequal_values, DiffTreeNode::Null);
478 assert_eq!(diff.right_only, DiffTreeNode::Null);
479 }
480
481 #[test]
482 fn test_arrays_extra_right() {
483 let data1 = r#"["a","b"]"#;
484 let data2 = r#"["a","b","c"]"#;
485 let diff = compare_strs(data1, data2, false, &[]).unwrap();
486
487 let diffs = diff.right_only.get_diffs();
488 assert_eq!(diffs.len(), 1);
489 assert_eq!(diffs.first().unwrap().to_string(), r#".[2].("c")"#);
490 assert_eq!(diff.unequal_values, DiffTreeNode::Null);
491 assert_eq!(diff.left_only, DiffTreeNode::Null);
492 }
493
494 #[test]
495 fn long_insertion_modification() {
496 let data1 = r#"["a","b","a"]"#;
497 let data2 = r#"["a","c","c","c","a"]"#;
498 let diff = compare_strs(data1, data2, false, &[]).unwrap();
499 let diffs = diff.unequal_values.get_diffs();
500
501 assert_eq!(diffs.len(), 3);
502 let diffs: Vec<_> = diffs.into_iter().map(|d| d.to_string()).collect();
503
504 assert!(diffs.contains(&r#".[3].(null != "c")"#.to_string()));
505 assert!(diffs.contains(&r#".[1].("b" != "c")"#.to_string()));
506 assert!(diffs.contains(&r#".[2].("a" != "c")"#.to_string()));
507 assert_eq!(diff.right_only, DiffTreeNode::Null);
508 assert_eq!(diff.left_only, DiffTreeNode::Null);
509 }
510
511 #[test]
512 fn test_arrays_object_extra() {
513 let data1 = r#"["a","b"]"#;
514 let data2 = r#"["a","b", {"c": {"d": "e"} }]"#;
515 let diff = compare_strs(data1, data2, false, &[]).unwrap();
516
517 let diffs = diff.right_only.get_diffs();
518 assert_eq!(diffs.len(), 1);
519 assert_eq!(
520 diffs.first().unwrap().to_string(),
521 r#".[2].({"c":{"d":"e"}})"#
522 );
523 assert_eq!(diff.unequal_values, DiffTreeNode::Null);
524 assert_eq!(diff.left_only, DiffTreeNode::Null);
525 }
526
527 #[test]
528 fn nested_diff() {
529 let data1 = r#"{
530 "a":"b",
531 "b":{
532 "c":{
533 "d":true,
534 "e":5,
535 "f":9,
536 "h":{
537 "i":true,
538 "j":false
539 }
540 }
541 }
542 }"#;
543 let data2 = r#"{
544 "a":"b",
545 "b":{
546 "c":{
547 "d":true,
548 "e":6,
549 "g":0,
550 "h":{
551 "i":false,
552 "k":false
553 }
554 }
555 }
556 }"#;
557
558 let expected_left = DiffTreeNode::Node(hashmap! {
559 "b".to_string() => DiffTreeNode::Node(hashmap! {
560 "c".to_string() => DiffTreeNode::Node(hashmap! {
561 "f".to_string() => DiffTreeNode::Null,
562 "h".to_string() => DiffTreeNode::Node( hashmap! {
563 "j".to_string() => DiffTreeNode::Null,
564 }
565 ),
566 }
567 ),
568 }),
569 });
570 let expected_right = DiffTreeNode::Node(hashmap! {
571 "b".to_string() => DiffTreeNode::Node(hashmap! {
572 "c".to_string() => DiffTreeNode::Node(hashmap! {
573 "g".to_string() => DiffTreeNode::Null,
574 "h".to_string() => DiffTreeNode::Node(hashmap! {
575 "k".to_string() => DiffTreeNode::Null,
576 }
577 )
578 }
579 )
580 }
581 )
582 });
583 let expected_uneq = DiffTreeNode::Node(hashmap! {
584 "b".to_string() => DiffTreeNode::Node(hashmap! {
585 "c".to_string() => DiffTreeNode::Node(hashmap! {
586 "e".to_string() => DiffTreeNode::Value(json!(5), json!(6)),
587 "h".to_string() => DiffTreeNode::Node(hashmap! {
588 "i".to_string() => DiffTreeNode::Value(json!(true), json!(false)),
589 }
590 )
591 }
592 )
593 }
594 )
595 });
596 let expected = Mismatch::new(expected_left, expected_right, expected_uneq);
597
598 let mismatch = compare_strs(data1, data2, false, &[]).unwrap();
599 assert_eq!(mismatch, expected, "Diff was incorrect.");
600 }
601
602 #[test]
603 fn no_diff() {
604 let data1 = r#"{
605 "a":"b",
606 "b":{
607 "c":{
608 "d":true,
609 "e":5,
610 "f":9,
611 "h":{
612 "i":true,
613 "j":false
614 }
615 }
616 }
617 }"#;
618 let data2 = r#"{
619 "a":"b",
620 "b":{
621 "c":{
622 "d":true,
623 "e":5,
624 "f":9,
625 "h":{
626 "i":true,
627 "j":false
628 }
629 }
630 }
631 }"#;
632
633 assert_eq!(
634 compare_strs(data1, data2, false, &[]).unwrap(),
635 Mismatch::new(DiffTreeNode::Null, DiffTreeNode::Null, DiffTreeNode::Null)
636 );
637 }
638
639 #[test]
640 fn no_json() {
641 let data1 = r#"{}"#;
642 let data2 = r#"{}"#;
643
644 assert_eq!(
645 compare_strs(data1, data2, false, &[]).unwrap(),
646 Mismatch::empty()
647 );
648 }
649
650 #[test]
651 fn parse_err_source_one() {
652 let invalid_json1 = r#"{invalid: json}"#;
653 let valid_json2 = r#"{"a":"b"}"#;
654 compare_strs(invalid_json1, valid_json2, false, &[])
655 .expect_err("Parsing invalid JSON didn't throw an error");
656 }
657
658 #[test]
659 fn parse_err_source_two() {
660 let valid_json1 = r#"{"a":"b"}"#;
661 let invalid_json2 = r#"{invalid: json}"#;
662 compare_strs(valid_json1, invalid_json2, false, &[])
663 .expect_err("Parsing invalid JSON didn't throw an err");
664 }
665}