1use regex::Regex;
2use serde::Serialize;
3use serde_json::Value;
4use std::collections::HashMap;
5use anyhow::{anyhow, Result};
7use csv::ReaderBuilder;
8use quick_xml::de::from_str;
9use std::fs::File;
10use std::io::{BufReader, Read};
11use std::path::Path;
12#[derive(Debug, PartialEq, Serialize)]
15pub enum DiffResult {
16 Added(String, Value),
17 Removed(String, Value),
18 Modified(String, Value, Value),
19 TypeChanged(String, Value, Value),
20}
21
22#[derive(Debug, PartialEq, Serialize)]
24pub enum LightweightDiffResult {
25 Added(String, String), Removed(String, String), Modified(String, String, String), TypeChanged(String, String, String), }
30
31impl From<&DiffResult> for LightweightDiffResult {
32 fn from(diff: &DiffResult) -> Self {
33 match diff {
34 DiffResult::Added(path, value) => {
35 LightweightDiffResult::Added(path.clone(), value.to_string())
36 }
37 DiffResult::Removed(path, value) => {
38 LightweightDiffResult::Removed(path.clone(), value.to_string())
39 }
40 DiffResult::Modified(path, old, new) => {
41 LightweightDiffResult::Modified(path.clone(), old.to_string(), new.to_string())
42 }
43 DiffResult::TypeChanged(path, old, new) => {
44 LightweightDiffResult::TypeChanged(path.clone(), old.to_string(), new.to_string())
45 }
46 }
47 }
48}
49
50#[derive(Debug, Clone)]
52pub struct DiffConfig {
53 pub ignore_keys_regex: Option<regex::Regex>,
54 pub epsilon: Option<f64>,
55 pub array_id_key: Option<String>,
56 pub use_memory_optimization: bool, pub batch_size: usize,
58 pub ignore_whitespace: bool,
59 pub ignore_case: bool,
60}
61
62impl Default for DiffConfig {
63 fn default() -> Self {
64 Self {
65 ignore_keys_regex: None,
66 epsilon: None,
67 array_id_key: None,
68 use_memory_optimization: false, batch_size: 1000,
70 ignore_whitespace: false,
71 ignore_case: false,
72 }
73 }
74}
75
76pub fn diff_standard(
80 v1: &Value,
81 v2: &Value,
82 ignore_keys_regex: Option<&Regex>,
83 epsilon: Option<f64>,
84 array_id_key: Option<&str>,
85) -> Vec<DiffResult> {
86 diff_standard_implementation(
87 v1,
88 v2,
89 ignore_keys_regex,
90 epsilon,
91 array_id_key,
92 false,
93 false,
94 )
95}
96
97pub fn diff_standard_with_config(v1: &Value, v2: &Value, config: &DiffConfig) -> Vec<DiffResult> {
99 diff_standard_implementation(
100 v1,
101 v2,
102 config.ignore_keys_regex.as_ref(),
103 config.epsilon,
104 config.array_id_key.as_deref(),
105 config.ignore_whitespace,
106 config.ignore_case,
107 )
108}
109
110fn diff_standard_implementation(
112 v1: &Value,
113 v2: &Value,
114 ignore_keys_regex: Option<&Regex>,
115 epsilon: Option<f64>,
116 array_id_key: Option<&str>,
117 ignore_whitespace: bool,
118 ignore_case: bool,
119) -> Vec<DiffResult> {
120 let mut results = Vec::new();
121
122 if !values_are_equal_with_config(v1, v2, epsilon, ignore_whitespace, ignore_case) {
124 let type_match = matches!(
125 (v1, v2),
126 (Value::Null, Value::Null)
127 | (Value::Bool(_), Value::Bool(_))
128 | (Value::Number(_), Value::Number(_))
129 | (Value::String(_), Value::String(_))
130 | (Value::Array(_), Value::Array(_))
131 | (Value::Object(_), Value::Object(_))
132 );
133
134 if !type_match {
135 results.push(DiffResult::TypeChanged(
136 "".to_string(),
137 v1.clone(),
138 v2.clone(),
139 ));
140 return results;
141 } else if v1.is_object() && v2.is_object() {
142 diff_objects(
143 "",
144 v1.as_object().unwrap(),
145 v2.as_object().unwrap(),
146 &mut results,
147 ignore_keys_regex,
148 epsilon,
149 array_id_key,
150 ignore_whitespace,
151 ignore_case,
152 );
153 } else if v1.is_array() && v2.is_array() {
154 diff_arrays(
155 "",
156 v1.as_array().unwrap(),
157 v2.as_array().unwrap(),
158 &mut results,
159 ignore_keys_regex,
160 epsilon,
161 array_id_key,
162 ignore_whitespace,
163 ignore_case,
164 );
165 } else {
166 results.push(DiffResult::Modified("".to_string(), v1.clone(), v2.clone()));
167 }
168 }
169
170 results
171}
172
173pub fn diff_optimized(
175 v1: &Value,
176 v2: &Value,
177 ignore_keys_regex: Option<&Regex>,
178 epsilon: Option<f64>,
179 array_id_key: Option<&str>,
180) -> Vec<DiffResult> {
181 let mut results = Vec::new();
182 memory_efficient_diff(
183 v1,
184 v2,
185 &mut results,
186 ignore_keys_regex,
187 epsilon,
188 array_id_key,
189 false,
190 false,
191 );
192 results
193}
194
195pub fn diff_optimized_with_config(v1: &Value, v2: &Value, config: &DiffConfig) -> Vec<DiffResult> {
197 let mut results = Vec::new();
198 memory_efficient_diff(
199 v1,
200 v2,
201 &mut results,
202 config.ignore_keys_regex.as_ref(),
203 config.epsilon,
204 config.array_id_key.as_deref(),
205 config.ignore_whitespace,
206 config.ignore_case,
207 );
208 results
209}
210
211pub fn diff_with_config(v1: &Value, v2: &Value, config: &DiffConfig) -> Vec<DiffResult> {
213 if config.use_memory_optimization {
215 diff_optimized_with_config(v1, v2, config)
216 } else {
217 diff_standard_with_config(v1, v2, config)
218 }
219}
220
221pub fn diff(
223 v1: &Value,
224 v2: &Value,
225 ignore_keys_regex: Option<&Regex>,
226 epsilon: Option<f64>,
227 array_id_key: Option<&str>,
228) -> Vec<DiffResult> {
229 diff_standard(v1, v2, ignore_keys_regex, epsilon, array_id_key)
231}
232
233#[allow(clippy::too_many_arguments)]
234fn diff_recursive(
235 path: &str,
236 v1: &Value,
237 v2: &Value,
238 results: &mut Vec<DiffResult>,
239 ignore_keys_regex: Option<&Regex>,
240 epsilon: Option<f64>,
241 array_id_key: Option<&str>,
242 ignore_whitespace: bool,
243 ignore_case: bool,
244) {
245 match (v1, v2) {
246 (Value::Object(map1), Value::Object(map2)) => {
247 diff_objects(
248 path,
249 map1,
250 map2,
251 results,
252 ignore_keys_regex,
253 epsilon,
254 array_id_key,
255 ignore_whitespace,
256 ignore_case,
257 );
258 }
259 (Value::Array(arr1), Value::Array(arr2)) => {
260 diff_arrays(
261 path,
262 arr1,
263 arr2,
264 results,
265 ignore_keys_regex,
266 epsilon,
267 array_id_key,
268 ignore_whitespace,
269 ignore_case,
270 );
271 }
272 _ => { }
273 }
274}
275
276#[allow(clippy::too_many_arguments)]
277fn diff_objects(
278 path: &str,
279 map1: &serde_json::Map<String, Value>,
280 map2: &serde_json::Map<String, Value>,
281 results: &mut Vec<DiffResult>,
282 ignore_keys_regex: Option<&Regex>,
283 epsilon: Option<f64>,
284 array_id_key: Option<&str>,
285 ignore_whitespace: bool,
286 ignore_case: bool,
287) {
288 for (key, value1) in map1 {
290 let current_path = if path.is_empty() {
291 key.clone()
292 } else {
293 format!("{path}.{key}")
294 };
295 if let Some(regex) = ignore_keys_regex {
296 if regex.is_match(key) {
297 continue;
298 }
299 }
300 match map2.get(key) {
301 Some(value2) => {
302 if value1.is_object() && value2.is_object()
304 || value1.is_array() && value2.is_array()
305 {
306 diff_recursive(
307 ¤t_path,
308 value1,
309 value2,
310 results,
311 ignore_keys_regex,
312 epsilon,
313 array_id_key,
314 ignore_whitespace,
315 ignore_case,
316 );
317 } else if !values_are_equal_with_config(
318 value1,
319 value2,
320 epsilon,
321 ignore_whitespace,
322 ignore_case,
323 ) {
324 let type_match = matches!(
325 (value1, value2),
326 (Value::Null, Value::Null)
327 | (Value::Bool(_), Value::Bool(_))
328 | (Value::Number(_), Value::Number(_))
329 | (Value::String(_), Value::String(_))
330 | (Value::Array(_), Value::Array(_))
331 | (Value::Object(_), Value::Object(_))
332 );
333
334 if !type_match {
335 results.push(DiffResult::TypeChanged(
336 current_path,
337 value1.clone(),
338 value2.clone(),
339 ));
340 } else {
341 results.push(DiffResult::Modified(
342 current_path,
343 value1.clone(),
344 value2.clone(),
345 ));
346 }
347 }
348 }
349 None => {
350 results.push(DiffResult::Removed(current_path, value1.clone()));
351 }
352 }
353 }
354
355 for (key, value2) in map2 {
357 if !map1.contains_key(key) {
358 let current_path = if path.is_empty() {
359 (*key).clone()
360 } else {
361 format!("{path}.{key}")
362 };
363 results.push(DiffResult::Added(current_path, value2.clone()));
364 }
365 }
366}
367
368#[allow(clippy::too_many_arguments)]
369fn diff_arrays(
370 path: &str,
371 arr1: &[Value],
372 arr2: &[Value],
373 results: &mut Vec<DiffResult>,
374 ignore_keys_regex: Option<&Regex>,
375 epsilon: Option<f64>,
376 array_id_key: Option<&str>,
377 ignore_whitespace: bool,
378 ignore_case: bool,
379) {
380 if let Some(id_key) = array_id_key {
381 let mut map1: HashMap<Value, &Value> = HashMap::new();
382 let mut no_id_elements1: Vec<(usize, &Value)> = Vec::new();
383 for (i, val) in arr1.iter().enumerate() {
384 if let Some(id_val) = val.get(id_key) {
385 map1.insert(id_val.clone(), val);
386 } else {
387 no_id_elements1.push((i, val));
388 }
389 }
390
391 let mut map2: HashMap<Value, &Value> = HashMap::new();
392 let mut no_id_elements2: Vec<(usize, &Value)> = Vec::new();
393 for (i, val) in arr2.iter().enumerate() {
394 if let Some(id_val) = val.get(id_key) {
395 map2.insert(id_val.clone(), val);
396 } else {
397 no_id_elements2.push((i, val));
398 }
399 }
400
401 for (id_val, val1) in &map1 {
403 let current_path = format!("{path}[{id_key}={id_val}]");
404 match map2.get(id_val) {
405 Some(val2) => {
406 if val1.is_object() && val2.is_object() || val1.is_array() && val2.is_array() {
408 diff_recursive(
409 ¤t_path,
410 val1,
411 val2,
412 results,
413 ignore_keys_regex,
414 epsilon,
415 array_id_key,
416 ignore_whitespace,
417 ignore_case,
418 );
419 } else if !values_are_equal_with_config(
420 val1,
421 val2,
422 epsilon,
423 ignore_whitespace,
424 ignore_case,
425 ) {
426 let type_match = matches!(
427 (val1, val2),
428 (Value::Null, Value::Null)
429 | (Value::Bool(_), Value::Bool(_))
430 | (Value::Number(_), Value::Number(_))
431 | (Value::String(_), Value::String(_))
432 | (Value::Array(_), Value::Array(_))
433 | (Value::Object(_), Value::Object(_))
434 );
435
436 if !type_match {
437 results.push(DiffResult::TypeChanged(
438 current_path,
439 (*val1).clone(),
440 (*val2).clone(),
441 ));
442 } else {
443 results.push(DiffResult::Modified(
444 current_path,
445 (*val1).clone(),
446 (*val2).clone(),
447 ));
448 }
449 }
450 }
451 None => {
452 results.push(DiffResult::Removed(current_path, (*val1).clone()));
453 }
454 }
455 }
456
457 for (id_val, val2) in map2 {
459 if !map1.contains_key(&id_val) {
460 let current_path = format!("{path}[{id_key}={id_val}]");
461 results.push(DiffResult::Added(current_path, val2.clone()));
462 }
463 }
464
465 let max_len = no_id_elements1.len().max(no_id_elements2.len());
467 for i in 0..max_len {
468 match (no_id_elements1.get(i), no_id_elements2.get(i)) {
469 (Some((idx1, val1)), Some((_idx2, val2))) => {
470 let current_path = format!("{path}[{idx1}]");
471 if val1.is_object() && val2.is_object() || val1.is_array() && val2.is_array() {
472 diff_recursive(
473 ¤t_path,
474 val1,
475 val2,
476 results,
477 ignore_keys_regex,
478 epsilon,
479 array_id_key,
480 ignore_whitespace,
481 ignore_case,
482 );
483 } else if !values_are_equal_with_config(
484 val1,
485 val2,
486 epsilon,
487 ignore_whitespace,
488 ignore_case,
489 ) {
490 let type_match = matches!(
491 (val1, val2),
492 (Value::Null, Value::Null)
493 | (Value::Bool(_), Value::Bool(_))
494 | (Value::Number(_), Value::Number(_))
495 | (Value::String(_), Value::String(_))
496 | (Value::Array(_), Value::Array(_))
497 | (Value::Object(_), Value::Object(_))
498 );
499
500 if !type_match {
501 results.push(DiffResult::TypeChanged(
502 current_path,
503 (*val1).clone(),
504 (*val2).clone(),
505 ));
506 } else {
507 results.push(DiffResult::Modified(
508 current_path,
509 (*val1).clone(),
510 (*val2).clone(),
511 ));
512 }
513 }
514 }
515 (Some((idx1, val1)), None) => {
516 let current_path = format!("{path}[{idx1}]");
517 results.push(DiffResult::Removed(current_path, (*val1).clone()));
518 }
519 (None, Some((idx2, val2))) => {
520 let current_path = format!("{path}[{idx2}]");
521 results.push(DiffResult::Added(current_path, (*val2).clone()));
522 }
523 (None, None) => break,
524 }
525 }
526 } else {
527 let max_len = arr1.len().max(arr2.len());
529 for i in 0..max_len {
530 let current_path = format!("{path}[{i}]");
531 match (arr1.get(i), arr2.get(i)) {
532 (Some(val1), Some(val2)) => {
533 if val1.is_object() && val2.is_object() || val1.is_array() && val2.is_array() {
535 diff_recursive(
536 ¤t_path,
537 val1,
538 val2,
539 results,
540 ignore_keys_regex,
541 epsilon,
542 array_id_key,
543 ignore_whitespace,
544 ignore_case,
545 );
546 } else if !values_are_equal_with_config(
547 val1,
548 val2,
549 epsilon,
550 ignore_whitespace,
551 ignore_case,
552 ) {
553 let type_match = matches!(
554 (val1, val2),
555 (Value::Null, Value::Null)
556 | (Value::Bool(_), Value::Bool(_))
557 | (Value::Number(_), Value::Number(_))
558 | (Value::String(_), Value::String(_))
559 | (Value::Array(_), Value::Array(_))
560 | (Value::Object(_), Value::Object(_))
561 );
562
563 if !type_match {
564 results.push(DiffResult::TypeChanged(
565 current_path,
566 val1.clone(),
567 val2.clone(),
568 ));
569 } else {
570 results.push(DiffResult::Modified(
571 current_path,
572 val1.clone(),
573 val2.clone(),
574 ));
575 }
576 }
577 }
578 (Some(val1), None) => {
579 results.push(DiffResult::Removed(current_path, val1.clone()));
580 }
581 (None, Some(val2)) => {
582 results.push(DiffResult::Added(current_path, val2.clone()));
583 }
584 (None, None) => { }
585 }
586 }
587 }
588}
589
590fn values_are_equal_with_config(
591 v1: &Value,
592 v2: &Value,
593 epsilon: Option<f64>,
594 ignore_whitespace: bool,
595 ignore_case: bool,
596) -> bool {
597 if let (Some(e), Value::Number(n1), Value::Number(n2)) = (epsilon, v1, v2) {
599 if let (Some(f1), Some(f2)) = (n1.as_f64(), n2.as_f64()) {
600 return (f1 - f2).abs() < e;
601 }
602 }
603
604 if let (Value::String(s1), Value::String(s2)) = (v1, v2) {
606 let mut str1 = s1.as_str();
607 let mut str2 = s2.as_str();
608
609 let owned_s1;
610 let owned_s2;
611
612 if ignore_whitespace {
614 owned_s1 = normalize_whitespace(str1);
615 owned_s2 = normalize_whitespace(str2);
616 str1 = &owned_s1;
617 str2 = &owned_s2;
618 }
619
620 if ignore_case {
622 return str1.to_lowercase() == str2.to_lowercase();
623 } else {
624 return str1 == str2;
625 }
626 }
627
628 v1 == v2
630}
631
632fn normalize_whitespace(s: &str) -> String {
633 s.split_whitespace().collect::<Vec<&str>>().join(" ")
635}
636
637pub fn value_type_name(value: &Value) -> &str {
638 match value {
639 Value::Null => "Null",
640 Value::Bool(_) => "Boolean",
641 Value::Number(_) => "Number",
642 Value::String(_) => "String",
643 Value::Array(_) => "Array",
644 Value::Object(_) => "Object",
645 }
646}
647
648pub fn estimate_memory_usage(value: &Value) -> usize {
650 match value {
651 Value::Null => 0,
652 Value::Bool(_) => 1,
653 Value::Number(_) => 8, Value::String(s) => s.len(),
655 Value::Array(arr) => {
656 arr.iter().map(estimate_memory_usage).sum::<usize>() + (arr.len() * 8)
657 }
659 Value::Object(obj) => {
660 obj.iter()
661 .map(|(k, v)| k.len() + estimate_memory_usage(v))
662 .sum::<usize>()
663 + (obj.len() * 16) }
665 }
666}
667
668pub fn would_exceed_memory_limit(v1: &Value, v2: &Value) -> bool {
670 const MAX_MEMORY_USAGE: usize = 1024 * 1024 * 1024; let usage1 = estimate_memory_usage(v1);
673 let usage2 = estimate_memory_usage(v2);
674
675 (usage1 + usage2) * 3 > MAX_MEMORY_USAGE
677}
678
679pub fn parse_ini(content: &str) -> Result<Value> {
680 use configparser::ini::Ini;
681
682 let mut ini = Ini::new();
683 ini.read(content.to_string())
684 .map_err(|e| anyhow!("Failed to parse INI: {}", e))?;
685
686 let mut root_map = serde_json::Map::new();
687
688 for section_name in ini.sections() {
689 let mut section_map = serde_json::Map::new();
690
691 if let Some(section) = ini.get_map_ref().get(§ion_name) {
692 for (key, value) in section {
693 if let Some(v) = value {
694 section_map.insert(key.clone(), Value::String(v.clone()));
695 } else {
696 section_map.insert(key.clone(), Value::Null);
697 }
698 }
699 }
700
701 root_map.insert(section_name, Value::Object(section_map));
702 }
703
704 Ok(Value::Object(root_map))
705}
706
707pub fn parse_xml(content: &str) -> Result<Value> {
708 let value: Value = from_str(content)?;
709 Ok(value)
710}
711
712pub fn parse_csv(content: &str) -> Result<Value> {
713 let mut reader = ReaderBuilder::new().from_reader(content.as_bytes());
714 let mut records = Vec::new();
715
716 let headers = reader.headers()?.clone();
717 let has_headers = !headers.is_empty();
718
719 for result in reader.into_records() {
720 let record = result?;
721 if has_headers {
722 let mut obj = serde_json::Map::new();
723 for (i, header) in headers.iter().enumerate() {
724 if let Some(value) = record.get(i) {
725 obj.insert(header.to_string(), Value::String(value.to_string()));
726 }
727 }
728 records.push(Value::Object(obj));
729 } else {
730 let mut arr = Vec::new();
731 for field in record.iter() {
732 arr.push(Value::String(field.to_string()));
733 }
734 records.push(Value::Array(arr));
735 }
736 }
737 Ok(Value::Array(records))
738}
739
740pub fn parse_large_file<P: AsRef<Path>>(path: P) -> Result<Option<Value>> {
743 let file = File::open(&path)?;
744 let metadata = file.metadata()?;
745 let file_size = metadata.len();
746
747 const MAX_MEMORY_SIZE: u64 = 100 * 1024 * 1024;
749
750 if file_size > MAX_MEMORY_SIZE {
751 return Ok(None); }
753
754 let mut reader = BufReader::new(file);
755 let mut content = String::new();
756 reader.read_to_string(&mut content)?;
757
758 let path_str = path.as_ref().to_string_lossy();
760 if path_str.ends_with(".json") {
761 Ok(Some(serde_json::from_str(&content)?))
762 } else if path_str.ends_with(".yaml") || path_str.ends_with(".yml") {
763 Ok(Some(serde_yml::from_str(&content)?))
764 } else if path_str.ends_with(".toml") {
765 Ok(Some(toml::from_str(&content)?))
766 } else {
767 Err(anyhow!("Unsupported file format for large file parsing"))
768 }
769}
770
771pub fn diff_large_files<P: AsRef<Path>>(
773 path1: P,
774 path2: P,
775 ignore_keys_regex: Option<&Regex>,
776 epsilon: Option<f64>,
777 array_id_key: Option<&str>,
778) -> Result<Vec<DiffResult>> {
779 let v1_opt = parse_large_file(&path1)?;
781 let v2_opt = parse_large_file(&path2)?;
782
783 match (v1_opt, v2_opt) {
784 (Some(v1), Some(v2)) => {
785 Ok(diff(&v1, &v2, ignore_keys_regex, epsilon, array_id_key))
787 }
788 _ => {
789 streaming_diff(&path1, &path2, ignore_keys_regex, epsilon, array_id_key)
791 }
792 }
793}
794
795fn streaming_diff<P: AsRef<Path>>(
797 path1: P,
798 path2: P,
799 ignore_keys_regex: Option<&Regex>,
800 epsilon: Option<f64>,
801 array_id_key: Option<&str>,
802) -> Result<Vec<DiffResult>> {
803 let mut results = Vec::new();
806
807 let file1 = File::open(&path1)?;
809 let file2 = File::open(&path2)?;
810
811 let mut reader1 = BufReader::new(file1);
812 let mut reader2 = BufReader::new(file2);
813
814 let mut buffer1 = String::new();
815 let mut buffer2 = String::new();
816
817 reader1.read_to_string(&mut buffer1)?;
819 reader2.read_to_string(&mut buffer2)?;
820
821 let v1: Value = serde_json::from_str(&buffer1)
823 .or_else(|_| serde_yml::from_str(&buffer1))
824 .or_else(|_| toml::from_str(&buffer1))
825 .map_err(|e| anyhow!("Failed to parse file 1: {}", e))?;
826
827 let v2: Value = serde_json::from_str(&buffer2)
828 .or_else(|_| serde_yml::from_str(&buffer2))
829 .or_else(|_| toml::from_str(&buffer2))
830 .map_err(|e| anyhow!("Failed to parse file 2: {}", e))?;
831
832 drop(buffer1);
834 drop(buffer2);
835
836 memory_efficient_diff(
838 &v1,
839 &v2,
840 &mut results,
841 ignore_keys_regex,
842 epsilon,
843 array_id_key,
844 false, false, );
847
848 Ok(results)
849}
850
851#[allow(clippy::too_many_arguments)]
853fn memory_efficient_diff(
854 v1: &Value,
855 v2: &Value,
856 results: &mut Vec<DiffResult>,
857 ignore_keys_regex: Option<&Regex>,
858 epsilon: Option<f64>,
859 array_id_key: Option<&str>,
860 ignore_whitespace: bool,
861 ignore_case: bool,
862) {
863 if !values_are_equal_with_config(v1, v2, epsilon, ignore_whitespace, ignore_case) {
865 let type_match = matches!(
866 (v1, v2),
867 (Value::Null, Value::Null)
868 | (Value::Bool(_), Value::Bool(_))
869 | (Value::Number(_), Value::Number(_))
870 | (Value::Array(_), Value::Array(_))
871 | (Value::Object(_), Value::Object(_))
872 );
873
874 if !type_match {
875 results.push(DiffResult::TypeChanged(
876 "".to_string(),
877 v1.clone(),
878 v2.clone(),
879 ));
880 } else if v1.is_object() && v2.is_object() {
881 memory_efficient_diff_objects(
882 "",
883 v1.as_object().unwrap(),
884 v2.as_object().unwrap(),
885 results,
886 ignore_keys_regex,
887 epsilon,
888 array_id_key,
889 ignore_whitespace,
890 ignore_case,
891 );
892 } else if v1.is_array() && v2.is_array() {
893 memory_efficient_diff_arrays(
894 "",
895 v1.as_array().unwrap(),
896 v2.as_array().unwrap(),
897 results,
898 ignore_keys_regex,
899 epsilon,
900 array_id_key,
901 ignore_whitespace,
902 ignore_case,
903 );
904 } else {
905 results.push(DiffResult::Modified("".to_string(), v1.clone(), v2.clone()));
906 }
907 }
908}
909
910#[allow(clippy::too_many_arguments)]
912fn memory_efficient_diff_objects(
913 path: &str,
914 map1: &serde_json::Map<String, Value>,
915 map2: &serde_json::Map<String, Value>,
916 results: &mut Vec<DiffResult>,
917 ignore_keys_regex: Option<&Regex>,
918 epsilon: Option<f64>,
919 array_id_key: Option<&str>,
920 ignore_whitespace: bool,
921 ignore_case: bool,
922) {
923 const BATCH_SIZE: usize = 1000;
925
926 let keys1: Vec<_> = map1.keys().collect();
927 let keys2: Vec<_> = map2.keys().collect();
928
929 for chunk in keys1.chunks(BATCH_SIZE) {
931 for key in chunk {
932 if let Some(regex) = ignore_keys_regex {
933 if regex.is_match(key) {
934 continue;
935 }
936 }
937
938 let current_path = if path.is_empty() {
939 (*key).clone()
940 } else {
941 format!("{path}.{key}")
942 };
943
944 match (map1.get(*key), map2.get(*key)) {
945 (Some(value1), Some(value2)) => {
946 if value1.is_object() && value2.is_object() {
947 memory_efficient_diff_objects(
948 ¤t_path,
949 value1.as_object().unwrap(),
950 value2.as_object().unwrap(),
951 results,
952 ignore_keys_regex,
953 epsilon,
954 array_id_key,
955 ignore_whitespace,
956 ignore_case,
957 );
958 } else if value1.is_array() && value2.is_array() {
959 memory_efficient_diff_arrays(
960 ¤t_path,
961 value1.as_array().unwrap(),
962 value2.as_array().unwrap(),
963 results,
964 ignore_keys_regex,
965 epsilon,
966 array_id_key,
967 ignore_whitespace,
968 ignore_case,
969 );
970 } else if !values_are_equal_with_config(
971 value1,
972 value2,
973 epsilon,
974 ignore_whitespace,
975 ignore_case,
976 ) {
977 let type_match = matches!(
978 (value1, value2),
979 (Value::Null, Value::Null)
980 | (Value::Bool(_), Value::Bool(_))
981 | (Value::Number(_), Value::Number(_))
982 | (Value::String(_), Value::String(_))
983 | (Value::Array(_), Value::Array(_))
984 | (Value::Object(_), Value::Object(_))
985 );
986
987 if !type_match {
988 results.push(DiffResult::TypeChanged(
989 current_path,
990 value1.clone(),
991 value2.clone(),
992 ));
993 } else {
994 results.push(DiffResult::Modified(
995 current_path,
996 value1.clone(),
997 value2.clone(),
998 ));
999 }
1000 }
1001 }
1002 (Some(value1), None) => {
1003 results.push(DiffResult::Removed(current_path, value1.clone()));
1004 }
1005 (None, Some(_)) => {
1006 }
1008 (None, None) => {
1009 }
1011 }
1012 }
1013 }
1014
1015 for chunk in keys2.chunks(BATCH_SIZE) {
1017 for key in chunk {
1018 if !map1.contains_key(*key) {
1019 let current_path = if path.is_empty() {
1020 (*key).clone()
1021 } else {
1022 format!("{path}.{key}")
1023 };
1024 if let Some(value2) = map2.get(*key) {
1025 results.push(DiffResult::Added(current_path, value2.clone()));
1026 }
1027 }
1028 }
1029 }
1030}
1031
1032#[allow(clippy::too_many_arguments)]
1034fn memory_efficient_diff_arrays(
1035 path: &str,
1036 arr1: &[Value],
1037 arr2: &[Value],
1038 results: &mut Vec<DiffResult>,
1039 ignore_keys_regex: Option<&Regex>,
1040 epsilon: Option<f64>,
1041 array_id_key: Option<&str>,
1042 ignore_whitespace: bool,
1043 ignore_case: bool,
1044) {
1045 const BATCH_SIZE: usize = 10000;
1047
1048 if arr1.len() > BATCH_SIZE || arr2.len() > BATCH_SIZE {
1049 let max_len = arr1.len().max(arr2.len());
1051 for chunk_start in (0..max_len).step_by(BATCH_SIZE) {
1052 let chunk_end = (chunk_start + BATCH_SIZE).min(max_len);
1053 let chunk1 = arr1.get(chunk_start..chunk_end).unwrap_or(&[]);
1054 let chunk2 = arr2.get(chunk_start..chunk_end).unwrap_or(&[]);
1055
1056 diff_arrays(
1058 path,
1059 chunk1,
1060 chunk2,
1061 results,
1062 ignore_keys_regex,
1063 epsilon,
1064 array_id_key,
1065 ignore_whitespace,
1066 ignore_case,
1067 );
1068 }
1069 } else {
1070 diff_arrays(
1072 path,
1073 arr1,
1074 arr2,
1075 results,
1076 ignore_keys_regex,
1077 epsilon,
1078 array_id_key,
1079 ignore_whitespace,
1080 ignore_case,
1081 );
1082 }
1083}
1084
1085