1use difflib::sequencematcher::SequenceMatcher;
2use serde_json::{Map, Value};
3
4use crate::colorize::colorize_to_array;
5
6#[allow(clippy::module_name_repetitions)]
9pub struct JsonDiff {
10 pub score: f64,
15 pub diff: Option<Value>,
19}
20
21struct BestMatch {
22 score: f64,
23 key: String,
24 index_distance: usize,
25}
26
27impl BestMatch {
28 fn new(score: f64, key: String, index_distance: usize) -> Self {
29 Self {
30 score,
31 key,
32 index_distance,
33 }
34 }
35}
36
37impl JsonDiff {
38 #[must_use]
40 pub fn diff(json1: &Value, json2: &Value, keys_only: bool) -> Self {
41 Self::diff_with_score(json1, json2, keys_only)
42 }
43
44 #[must_use]
47 pub fn diff_string(json1: &Value, json2: &Value, keys_only: bool) -> Option<String> {
48 let Self { score: _, diff } = Self::diff(json1, json2, keys_only);
49 diff.map(|value| colorize_to_array(&value).join("\n") + "\n")
50 }
51
52 fn object_diff(obj1: &Map<String, Value>, obj2: &Map<String, Value>, keys_only: bool) -> Self {
53 let mut result = Map::new();
54 let mut score = 0.;
55
56 for (key, value1) in obj1 {
57 if !obj2.contains_key(key) {
58 let key_deleted = format!("{key}__deleted");
59 result.insert(key_deleted, value1.clone());
60 score -= 30.;
61 }
62 }
63
64 for (key, value2) in obj2 {
65 if !obj1.contains_key(key) {
66 let key_added = format!("{key}__added");
67 result.insert(key_added, value2.clone());
68 score -= 30.;
69 }
70 }
71
72 for (key, value1) in obj1 {
73 if let Some(value2) = obj2.get(key) {
74 score += 20.;
75 let Self {
76 score: subscore,
77 diff: change,
78 } = Self::diff_with_score(value1, value2, keys_only);
79 if let Some(change) = change {
80 result.insert(key.clone(), change);
81 }
82 score += (subscore / 5.).clamp(-10., 20.);
83 }
84 }
85
86 if result.is_empty() {
87 #[allow(clippy::cast_precision_loss)]
88 Self {
89 score: 100. * (obj1.len() as f64).max(0.5),
90 diff: None,
91 }
92 } else {
93 let output = json!(result);
94 Self {
95 score: score.max(0.),
96 diff: Some(output),
97 }
98 }
99 }
100
101 fn check_type(item1: &Value, item2: &Value) -> bool {
102 item1.is_null() == item2.is_null()
103 || item1.is_boolean() == item2.is_boolean()
104 || item1.is_number() == item2.is_number()
105 || item1.is_string() == item2.is_string()
106 || item1.is_array() == item2.is_array()
107 || item1.is_object() == item2.is_object()
108 }
109
110 fn find_matching_object(
111 item: &Value,
112 index: usize,
113 fuzzy_originals: &Map<String, Value>,
114 ) -> Option<BestMatch> {
115 let mut best_match: Option<BestMatch> = None;
116
117 for (match_index, (key, candidate)) in fuzzy_originals.into_iter().enumerate() {
118 if key != "__next" {
119 let index_distance = (match_index).wrapping_sub(index);
120 if Self::check_type(item, candidate) {
121 let Self { score, diff: _ } = Self::diff(item, candidate, false);
122 if best_match.as_ref().map_or(true, |v| score > v.score)
123 || (best_match
124 .as_ref()
125 .map_or(true, |v| (score - v.score).abs() < f64::EPSILON)
126 && best_match
127 .as_ref()
128 .map_or(true, |v| index_distance < v.index_distance))
129 {
130 best_match = Some(BestMatch::new(score, key.clone(), index_distance));
131 }
132 }
133 }
134 }
135
136 best_match
137 }
138
139 fn scalarize(
140 array: &[Value],
141 scalar_values: &mut Map<String, Value>,
142 originals: &mut Map<String, Value>,
143 fuzzy_originals: Option<&Map<String, Value>>,
144 ) -> Vec<String> {
145 let mut output_array: Vec<String> = Vec::new();
146 for (index, item) in array.iter().enumerate() {
147 let mut value = if let Value::Object(_) = item {
148 None
149 } else {
150 let key = item.to_string();
151 scalar_values.insert(key.clone(), item.clone());
152 Some(key)
153 };
154
155 if let Some(fuzzy_originals) = fuzzy_originals {
156 if let Some(best_match) = Self::find_matching_object(item, index, fuzzy_originals) {
157 if best_match.score > 40. && !originals.contains_key(&best_match.key) {
158 originals.insert(best_match.key.clone(), item.to_owned());
159 value = Some(best_match.key);
160 }
161 }
162 }
163
164 if value.is_none() {
165 let original = originals.get_mut("__next").unwrap();
166 let proxy = "__$!SCALAR".to_owned() + &(original).to_string();
167
168 *original = json!(original.as_u64().unwrap() + 1);
169 originals.insert(proxy.clone(), item.to_owned());
170 value = Some(proxy);
171 }
172
173 let final_value = value.unwrap();
174 output_array.push(final_value);
175 }
176 output_array
177 }
178
179 fn is_scalarized(key: &str, originals: &Map<String, Value>) -> bool {
180 originals.contains_key(key)
181 }
182
183 fn get_scalar(key: &str, scalar_values: &Map<String, Value>) -> Value {
184 scalar_values.get(key).unwrap().clone()
185 }
186
187 fn descalarize(
188 key: &str,
189 scalar_values: &Map<String, Value>,
190 originals: &Map<String, Value>,
191 ) -> Value {
192 if let Some(val) = originals.get(key) {
193 val.clone()
194 } else {
195 Self::get_scalar(key, scalar_values)
196 }
197 }
198
199 #[allow(clippy::too_many_lines)]
200 fn array_diff(array1: &[Value], array2: &[Value], keys_only: bool) -> Self {
201 let mut originals1 = Map::new();
202 let mut scalar_values1 = Map::new();
203 originals1.insert("__next".to_owned(), json!(1));
204 let seq1: Vec<String> = Self::scalarize(array1, &mut scalar_values1, &mut originals1, None);
205
206 let mut originals2 = Map::new();
207 let mut scalar_values2 = Map::new();
208 let originals1_value = originals1.get("__next").unwrap();
209 originals2.insert("__next".to_owned(), json!(originals1_value));
210 let seq2: Vec<String> = Self::scalarize(
211 array2,
212 &mut scalar_values2,
213 &mut originals2,
214 Some(&originals1),
215 );
216
217 let opcodes = SequenceMatcher::new(&seq1, &seq2).get_opcodes();
218
219 let mut result: Vec<Value> = Vec::new();
220 let mut score: f64 = 0.;
221 let mut all_equal = true;
222
223 for opcode in &opcodes {
224 if !(opcode.tag == "equal" || (keys_only && opcode.tag == "replace")) {
225 all_equal = false;
226 }
227
228 match opcode.tag.as_str() {
229 "equal" => {
230 for key in seq1.iter().take(opcode.first_end).skip(opcode.first_start) {
231 let is_scalarized1 = Self::is_scalarized(key, &originals1);
232 assert!(!is_scalarized1 || (Self::is_scalarized(key, &originals2)),
233 "Internal bug: the items associated to the key {key} are different in the two dictionaries"
234 );
235 if is_scalarized1 {
236 let item1 = Self::descalarize(key, &scalar_values1, &originals1);
237 let item2 = Self::descalarize(key, &scalar_values2, &originals2);
238 let Self {
239 score: _,
240 diff: change,
241 } = Self::diff(&item1, &item2, keys_only);
242 if let Some(change) = change {
243 result.push(json!([json!('~'), change]));
244 all_equal = false;
245 } else {
246 result.push(json!([json!(' ')]));
247 }
248 } else {
249 result
250 .push(json!([json!(' '), Self::get_scalar(key, &scalar_values1)]));
251 }
252 score += 10.;
253 }
254 }
255 "delete" => {
256 for key in seq1.iter().take(opcode.first_end).skip(opcode.first_start) {
257 result.push(json!([
258 json!('-'),
259 Self::descalarize(key, &scalar_values1, &originals1)
260 ]));
261 score -= 5.;
262 }
263 }
264 "insert" => {
265 for key in seq2
266 .iter()
267 .take(opcode.second_end)
268 .skip(opcode.second_start)
269 {
270 result.push(json!([
271 json!('+'),
272 Self::descalarize(key, &scalar_values2, &originals2)
273 ]));
274 score -= 5.;
275 }
276 }
277 "replace" => {
278 if keys_only {
279 for (key1, key2) in seq1
280 .iter()
281 .take(opcode.first_end)
282 .skip(opcode.first_start)
283 .zip(
284 seq2.iter()
285 .take(
286 opcode.first_end - opcode.first_start + opcode.second_start,
287 )
288 .skip(opcode.second_start),
289 )
290 {
291 let Self {
292 score: _,
293 diff: change,
294 } = Self::diff(
295 &Self::descalarize(key1, &scalar_values1, &originals1),
296 &Self::descalarize(key2, &scalar_values2, &originals2),
297 keys_only,
298 );
299 if let Some(change) = change {
300 result.push(json!([json!('~'), change]));
301 all_equal = false;
302 } else {
303 result.push(json!(' '));
304 }
305 }
306 } else {
307 for key in seq1.iter().take(opcode.first_end).skip(opcode.first_start) {
308 result.push(json!([
309 json!('-'),
310 Self::descalarize(key, &scalar_values1, &originals1)
311 ]));
312 score -= 5.;
313 }
314 for key in seq2
315 .iter()
316 .take(opcode.second_end)
317 .skip(opcode.second_start)
318 {
319 result.push(json!([
320 json!('+'),
321 Self::descalarize(key, &scalar_values2, &originals2)
322 ]));
323 score -= 5.;
324 }
325 }
326 }
327 _ => all_equal = true,
328 }
329 }
330
331 if all_equal || opcodes.is_empty() {
332 Self {
333 score: 100.,
334 diff: None,
335 }
336 } else {
337 Self {
338 score: score.max(0.),
339 diff: Some(json!(result)),
340 }
341 }
342 }
343
344 fn diff_with_score(json1: &Value, json2: &Value, keys_only: bool) -> Self {
345 if let (Value::Object(obj1), Value::Object(obj2)) = (json1, json2) {
346 return Self::object_diff(obj1, obj2, keys_only);
347 }
348 if let (Value::Array(array1), Value::Array(array2)) = (json1, json2) {
349 return Self::array_diff(array1, array2, keys_only);
350 }
351
352 if !keys_only && json1 != json2 {
353 Self {
354 score: 0.,
355 diff: Some(json!({ "__old": json1, "__new": json2 })),
356 }
357 } else {
358 Self {
359 score: 100.,
360 diff: None,
361 }
362 }
363 }
364}
365
366#[cfg(test)]
367mod tests {
368
369 use std::error::Error;
370 use std::fs::File;
371 use std::io::BufReader;
372 use std::path::Path;
373
374 use super::JsonDiff;
375
376 #[test]
377 fn test_scalar() {
378 assert_eq!(JsonDiff::diff(&json!(42), &json!(42), false).diff, None);
379 assert_eq!(
380 JsonDiff::diff(&json!("foo"), &json!("foo"), false).diff,
381 None
382 );
383 assert_eq!(
384 JsonDiff::diff(&json!(42), &json!(10), false).diff,
385 Some(json!({"__old": 42, "__new": 10 }))
386 );
387 }
388
389 #[test]
390 fn test_objects() {
391 assert_eq!(JsonDiff::diff(&json!({}), &json!({}), false).diff, None);
392
393 assert_eq!(
394 JsonDiff::diff(
395 &json!({"foo": 42, "bar": 10 }),
396 &json!({"foo": 42, "bar": 10 }),
397 false
398 )
399 .diff,
400 None
401 );
402
403 assert_eq!(
404 JsonDiff::diff(
405 &json!({"foo": 42, "bar": {"bbbar": 10, "bbboz": 11 }}),
406 &json!({"foo": 42, "bar": {"bbbar": 10, "bbboz": 11 }}),
407 false
408 )
409 .diff,
410 None
411 );
412
413 assert_eq!(
414 JsonDiff::diff(&json!({"foo": 42, "bar": 10 }), &json!({"bar": 10 }), false).diff,
415 Some(json!({"foo__deleted": 42 }))
416 );
417
418 assert_eq!(
419 JsonDiff::diff(&json!({"bar": 10 }), &json!({"foo": 42, "bar": 10 }), false).diff,
420 Some(json!({"foo__added": 42 }))
421 );
422
423 assert_eq!(
424 JsonDiff::diff(&json!({"foo": 42 }), &json!({"foo": 10 }), false).diff,
425 Some(json!({"foo": {"__old": 42, "__new": 10 } }))
426 );
427
428 assert_eq!(
429 JsonDiff::diff(
430 &json!({"foo": 42, "bar": {"bbbar": 10, "bbboz": 11 }}),
431 &json!({"foo": 42, "bar": {"bbbar": 12 }}),
432 false
433 )
434 .diff,
435 Some(json!(
436 {
437 "bar": {
438 "bbboz__deleted": 11,
439 "bbbar": {"__old": 10, "__new": 12 }
440 }
441 }
442 ))
443 );
444 }
445
446 #[test]
447 fn test_array_of_scalars() {
448 assert_eq!(
449 JsonDiff::diff(&json!([10, 20, 30]), &json!([10, 20, 30]), false).diff,
450 None
451 );
452
453 assert_eq!(
454 JsonDiff::diff(&json!([10, 20, 30]), &json!([10, 30]), false).diff,
455 Some(json!([[' ', 10], ['-', 20], [' ', 30]]))
456 );
457
458 assert_eq!(
459 JsonDiff::diff(&json!([10, 30]), &json!([10, 20, 30]), false).diff,
460 Some(json!([[' ', 10], ['+', 20], [' ', 30]]))
461 );
462
463 assert_eq!(
464 JsonDiff::diff(&json!([10, 20]), &json!([10, 20, 30]), false).diff,
465 Some(json!([[' ', 10], [' ', 20], ['+', 30]]))
466 );
467 }
468
469 #[test]
470 fn test_array_of_objects() {
471 assert_eq!(
472 JsonDiff::diff(
473 &json!([{"foo": 10 }, {"foo": 20 }, {"foo": 30 }]),
474 &json!([{"foo": 10 }, {"foo": 20 }, {"foo": 30 }]),
475 false
476 )
477 .diff,
478 None
479 );
480
481 assert_eq!(JsonDiff::diff(&json!([{}]), &json!([{}]), false).diff, None);
482
483 assert_eq!(JsonDiff::diff(&json!([[]]), &json!([[]]), false).diff, None);
484
485 assert_eq!(
486 JsonDiff::diff(&json!([1, null, null]), &json!([1, null, null]), false).diff,
487 None
488 );
489
490 assert_eq!(
491 JsonDiff::diff(
492 &json!([{"a": 1, "b": 2 }, {"a": 1, "b": 2 }]),
493 &json!([{"a": 1, "b": 2 }, {"a": 1, "b": 2 }]),
494 false
495 )
496 .diff,
497 None
498 );
499
500 assert_eq!(
501 JsonDiff::diff(
502 &json!([{"foo": 10 }, {"foo": 20 }, {"foo": 30 }]),
503 &json!([{"foo": 10 }, {"foo": 30 }]),
504 false
505 )
506 .diff,
507 Some(json!([[' '], ['-', { "foo": 20 }], [' ']]))
508 );
509
510 assert_eq!(
511 JsonDiff::diff(
512 &json!([{"foo": 10 }, {"foo": 30 }]),
513 &json!([{"foo": 10 }, {"foo": 20 }, {"foo": 30 }]),
514 false
515 )
516 .diff,
517 Some(json!([[' '], ['+', {"foo": 20 }], [' ']]))
518 );
519
520 assert_eq!(
521 JsonDiff::diff(
522 &json!(
523 [
524 {"name": "Foo", "a": 3, "b": 1 },
525 { "foo": 10 }
526 ]
527 ),
528 &json!(
529 [
530 {"name": "Foo", "a": 3, "b": 1 },
531 {"name": "Foo", "a": 3, "b": 1, "c": 1 },
532 {"foo": 10 }
533 ]
534 ),
535 false
536 )
537 .diff,
538 Some(json!(
539 [
540 [' '],
541 ['+', {"name": "Foo", "a": 3, "b": 1, "c": 1 }],
542 [' ']
543 ]
544 ))
545 );
546
547 assert_eq!(
548 JsonDiff::diff(
549 &json!(
550 [
551 {"foo": 10, "bar": {"bbbar": 10, "bbboz": 11 } },
552 {"foo": 20, "bar": {"bbbar": 50, "bbboz": 25 } },
553 {"foo": 30, "bar": {"bbbar": 92, "bbboz": 34 } }
554 ]
555 ),
556 &json!(
557 [
558 {"foo": 10, "bar": {"bbbar": 10, "bbboz": 11 } },
559 {"foo": 21, "bar": {"bbbar": 50, "bbboz": 25 } },
560 {"foo": 30, "bar": {"bbbar": 92, "bbboz": 34 } }
561 ]
562 ),
563 false
564 )
565 .diff,
566 Some(json!(
567 [
568 [' '],
569 ['~', {"foo": { "__old": 20, "__new": 21 } }],
570 [' ']
571 ]
572 ))
573 );
574 }
575
576 #[test]
577 fn test_scalar_keys() {
578 assert_eq!(JsonDiff::diff(&json!(42), &json!(42), true).diff, None);
579
580 assert_eq!(
581 JsonDiff::diff(&json!("foo"), &json!("foo"), true).diff,
582 None
583 );
584
585 assert_eq!(JsonDiff::diff(&json!(42), &json!(10), true).diff, None);
586 }
587
588 #[test]
589 fn test_objects_keys() {
590 assert_eq!(JsonDiff::diff(&json!({}), &json!({}), true).diff, None);
591
592 assert_eq!(
593 JsonDiff::diff(
594 &json!({"foo": 42, "bar": 10 }),
595 &json!({"foo": 42, "bar": 10 }),
596 true
597 )
598 .diff,
599 None
600 );
601
602 assert_eq!(
603 JsonDiff::diff(
604 &json!({"foo": 42, "bar": {"bbbar": 10, "bbboz": 11 } }),
605 &json!({"foo": 42, "bar": {"bbbar": 10, "bbboz": 11 } }),
606 true
607 )
608 .diff,
609 None
610 );
611
612 assert_eq!(
613 JsonDiff::diff(&json!({"foo": 42, "bar": 10 }), &json!({"bar": 10 }), true).diff,
614 Some(json!({"foo__deleted": 42 }))
615 );
616
617 assert_eq!(
618 JsonDiff::diff(&json!({"bar": 10 }), &json!({"foo": 42, "bar": 10 }), true).diff,
619 Some(json!({"foo__added": 42 }))
620 );
621
622 assert_eq!(
623 JsonDiff::diff(&json!({"foo": 42 }), &json!({"foo": 10 }), true).diff,
624 None
625 );
626
627 assert_eq!(
628 JsonDiff::diff(
629 &json!({"foo": 42, "bar": {"bbbar": 10 }}),
630 &json!({"foo": 42, "bar": {"bbbar": 12 }}),
631 true
632 )
633 .diff,
634 None
635 );
636
637 assert_eq!(
638 JsonDiff::diff(
639 &json!({"foo": 42, "bar": {"bbbar": 10, "bbboz": 11 } }),
640 &json!({"foo": 42, "bar": {"bbbar": 12 } }),
641 true
642 )
643 .diff,
644 Some(json!({"bar": {"bbboz__deleted": 11 } }))
645 );
646 }
647
648 #[test]
649 fn test_array_of_scalars_keys() {
650 assert_eq!(
651 JsonDiff::diff(&json!([10, 20, 30]), &json!([10, 20, 30]), true).diff,
652 None
653 );
654
655 assert_eq!(
656 JsonDiff::diff(&json!([10, 20, 30]), &json!([10, 42, 30]), true).diff,
657 None
658 );
659
660 assert_eq!(
661 JsonDiff::diff(&json!([10, 20, 30]), &json!([10, 30]), true).diff,
662 Some(json!([[' ', 10], ['-', 20], [' ', 30]]))
663 );
664
665 assert_eq!(
666 JsonDiff::diff(&json!([10, 30]), &json!([10, 20, 30]), true).diff,
667 Some(json!([[' ', 10], ['+', 20], [' ', 30]]))
668 );
669
670 assert_eq!(
671 JsonDiff::diff(&json!([10, 20]), &json!([10, 20, 30]), true).diff,
672 Some(json!([[' ', 10], [' ', 20], ['+', 30]]))
673 );
674 }
675
676 #[test]
677 fn test_array_of_objects_keys() {
678 assert_eq!(
679 JsonDiff::diff(
680 &json!([{"foo": 10, "foo": 20, "foo": 30}]),
681 &json!([{"foo": 10, "foo": 20, "foo": 30}]),
682 true
683 )
684 .diff,
685 None
686 );
687
688 assert_eq!(JsonDiff::diff(&json!([{}]), &json!([{}]), true).diff, None);
689
690 assert_eq!(JsonDiff::diff(&json!([[]]), &json!([[]]), true).diff, None);
691
692 assert_eq!(
693 JsonDiff::diff(
694 &json!([{"a": 1, "b": 2 }, {"a": 1, "b": 2 }]),
695 &json!([{"a": 1, "b": 2 }, {"a": 1, "b": 2 }]),
696 true
697 )
698 .diff,
699 None
700 );
701
702 assert_eq!(
703 JsonDiff::diff(
704 &json!([{"foo": 10 }, {"foo": 20 }, {"foo": 30 }]),
705 &json!([{"foo": 10 }, {"foo": 30 }]),
706 true
707 )
708 .diff,
709 Some(json!([[' '], ['-', {"foo": 20 }], [' ']]))
710 );
711
712 assert_eq!(
713 JsonDiff::diff(
714 &json!([{"foo": 10 }, {"foo": 30 }]),
715 &json!([{"foo": 10 }, {"foo": 20 }, {"foo": 30 }]),
716 true
717 )
718 .diff,
719 Some(json!([[' '], ['+', {"foo": 20 }], [' ']]))
720 );
721
722 assert_eq!(
723 JsonDiff::diff(
724 &json!(
725 [
726 {"foo": 10, "bar": {"bbbar": 10, "bbboz": 11 } },
727 {"foo": 20, "bar": {"bbbar": 50, "bbboz": 25 } },
728 {"foo": 30, "bar": {"bbbar": 92, "bbboz": 34 } }
729 ]
730 ),
731 &json!(
732 [
733 {"foo": 10, "bar": {"bbbar": 10, "bbboz": 11 } },
734 {"foo": 21, "bar": {"bbbar": 50, "bbboz": 25 } },
735 {"foo": 30, "bar": {"bbbar": 92, "bbboz": 34 } }
736 ]
737 ),
738 true
739 )
740 .diff,
741 None
742 );
743 }
744
745 #[test]
746 fn test_diff_string() {
747 fn read_json_file(filename: &str) -> Result<serde_json::Value, Box<dyn Error>> {
748 let path = Path::new(filename);
750
751 let file = File::open(path)?;
753 let reader = BufReader::new(file);
754
755 let value = serde_json::from_reader(reader)?;
757
758 Ok(value)
760 }
761
762 let json1 = read_json_file("data/a.json").unwrap();
763 let json2 = read_json_file("data/b.json").unwrap();
764
765 assert_eq!(
766 JsonDiff::diff_string(&json1, &json2, false).unwrap(),
767 std::fs::read_to_string("data/result.jsdiff")
768 .unwrap()
769 .replace("\r\n", "\n")
770 );
771
772 assert_eq!(JsonDiff::diff_string(&json1, &json1, false), None);
773 }
774}