diffx_core/
lib.rs

1use regex::Regex;
2use serde::Serialize;
3use serde_json::Value;
4use std::collections::HashMap;
5// use ini::Ini;
6use anyhow::{anyhow, Result};
7use csv::ReaderBuilder;
8use quick_xml::de::from_str;
9
10#[derive(Debug, PartialEq, Serialize)]
11pub enum DiffResult {
12    Added(String, Value),
13    Removed(String, Value),
14    Modified(String, Value, Value),
15    TypeChanged(String, Value, Value),
16}
17
18pub fn diff(
19    v1: &Value,
20    v2: &Value,
21    ignore_keys_regex: Option<&Regex>,
22    epsilon: Option<f64>,
23    array_id_key: Option<&str>,
24) -> Vec<DiffResult> {
25    let mut results = Vec::new();
26
27    // Handle root level type or value change first
28    if !values_are_equal(v1, v2, epsilon) {
29        let type_match = matches!(
30            (v1, v2),
31            (Value::Null, Value::Null)
32                | (Value::Bool(_), Value::Bool(_))
33                | (Value::Number(_), Value::Number(_))
34                | (Value::String(_), Value::String(_))
35                | (Value::Array(_), Value::Array(_))
36                | (Value::Object(_), Value::Object(_))
37        );
38
39        if !type_match {
40            results.push(DiffResult::TypeChanged(
41                "".to_string(),
42                v1.clone(),
43                v2.clone(),
44            ));
45            return results; // If root type changed, no further diffing needed
46        } else if v1.is_object() && v2.is_object() {
47            diff_objects(
48                "",
49                v1.as_object().unwrap(),
50                v2.as_object().unwrap(),
51                &mut results,
52                ignore_keys_regex,
53                epsilon,
54                array_id_key,
55            );
56        } else if v1.is_array() && v2.is_array() {
57            diff_arrays(
58                "",
59                v1.as_array().unwrap(),
60                v2.as_array().unwrap(),
61                &mut results,
62                ignore_keys_regex,
63                epsilon,
64                array_id_key,
65            );
66        } else {
67            // Simple value modification at root
68            results.push(DiffResult::Modified("".to_string(), v1.clone(), v2.clone()));
69            return results;
70        }
71    }
72
73    results
74}
75
76fn diff_recursive(
77    path: &str,
78    v1: &Value,
79    v2: &Value,
80    results: &mut Vec<DiffResult>,
81    ignore_keys_regex: Option<&Regex>,
82    epsilon: Option<f64>,
83    array_id_key: Option<&str>,
84) {
85    match (v1, v2) {
86        (Value::Object(map1), Value::Object(map2)) => {
87            diff_objects(
88                path,
89                map1,
90                map2,
91                results,
92                ignore_keys_regex,
93                epsilon,
94                array_id_key,
95            );
96        }
97        (Value::Array(arr1), Value::Array(arr2)) => {
98            diff_arrays(
99                path,
100                arr1,
101                arr2,
102                results,
103                ignore_keys_regex,
104                epsilon,
105                array_id_key,
106            );
107        }
108        _ => { /* Should not happen if called correctly from diff_objects/diff_arrays */ }
109    }
110}
111
112fn diff_objects(
113    path: &str,
114    map1: &serde_json::Map<String, Value>,
115    map2: &serde_json::Map<String, Value>,
116    results: &mut Vec<DiffResult>,
117    ignore_keys_regex: Option<&Regex>,
118    epsilon: Option<f64>,
119    array_id_key: Option<&str>,
120) {
121    // Check for modified or removed keys
122    for (key, value1) in map1 {
123        let current_path = if path.is_empty() {
124            key.clone()
125        } else {
126            format!("{path}.{key}")
127        };
128        if let Some(regex) = ignore_keys_regex {
129            if regex.is_match(key) {
130                continue;
131            }
132        }
133        match map2.get(key) {
134            Some(value2) => {
135                // Recurse for nested objects/arrays
136                if value1.is_object() && value2.is_object()
137                    || value1.is_array() && value2.is_array()
138                {
139                    diff_recursive(
140                        &current_path,
141                        value1,
142                        value2,
143                        results,
144                        ignore_keys_regex,
145                        epsilon,
146                        array_id_key,
147                    );
148                } else if !values_are_equal(value1, value2, epsilon) {
149                    let type_match = matches!(
150                        (value1, value2),
151                        (Value::Null, Value::Null)
152                            | (Value::Bool(_), Value::Bool(_))
153                            | (Value::Number(_), Value::Number(_))
154                            | (Value::String(_), Value::String(_))
155                            | (Value::Array(_), Value::Array(_))
156                            | (Value::Object(_), Value::Object(_))
157                    );
158
159                    if !type_match {
160                        results.push(DiffResult::TypeChanged(
161                            current_path,
162                            value1.clone(),
163                            value2.clone(),
164                        ));
165                    } else {
166                        results.push(DiffResult::Modified(
167                            current_path,
168                            value1.clone(),
169                            value2.clone(),
170                        ));
171                    }
172                }
173            }
174            None => {
175                results.push(DiffResult::Removed(current_path, value1.clone()));
176            }
177        }
178    }
179
180    // Check for added keys
181    for (key, value2) in map2 {
182        if !map1.contains_key(key) {
183            let current_path = if path.is_empty() {
184                key.clone()
185            } else {
186                format!("{path}.{key}")
187            };
188            results.push(DiffResult::Added(current_path, value2.clone()));
189        }
190    }
191}
192
193fn diff_arrays(
194    path: &str,
195    arr1: &[Value],
196    arr2: &[Value],
197    results: &mut Vec<DiffResult>,
198    ignore_keys_regex: Option<&Regex>,
199    epsilon: Option<f64>,
200    array_id_key: Option<&str>,
201) {
202    if let Some(id_key) = array_id_key {
203        let mut map1: HashMap<Value, &Value> = HashMap::new();
204        let mut no_id_elements1: Vec<(usize, &Value)> = Vec::new();
205        for (i, val) in arr1.iter().enumerate() {
206            if let Some(id_val) = val.get(id_key) {
207                map1.insert(id_val.clone(), val);
208            } else {
209                no_id_elements1.push((i, val));
210            }
211        }
212
213        let mut map2: HashMap<Value, &Value> = HashMap::new();
214        let mut no_id_elements2: Vec<(usize, &Value)> = Vec::new();
215        for (i, val) in arr2.iter().enumerate() {
216            if let Some(id_val) = val.get(id_key) {
217                map2.insert(id_val.clone(), val);
218            } else {
219                no_id_elements2.push((i, val));
220            }
221        }
222
223        // Check for modified or removed elements
224        for (id_val, val1) in &map1 {
225            let current_path = format!("{path}[{id_key}={id_val}]");
226            match map2.get(id_val) {
227                Some(val2) => {
228                    // Recurse for nested objects/arrays
229                    if val1.is_object() && val2.is_object() || val1.is_array() && val2.is_array() {
230                        diff_recursive(
231                            &current_path,
232                            val1,
233                            val2,
234                            results,
235                            ignore_keys_regex,
236                            epsilon,
237                            array_id_key,
238                        );
239                    } else if !values_are_equal(val1, val2, epsilon) {
240                        let type_match = matches!(
241                            (val1, val2),
242                            (Value::Null, Value::Null)
243                                | (Value::Bool(_), Value::Bool(_))
244                                | (Value::Number(_), Value::Number(_))
245                                | (Value::String(_), Value::String(_))
246                                | (Value::Array(_), Value::Array(_))
247                                | (Value::Object(_), Value::Object(_))
248                        );
249
250                        if !type_match {
251                            results.push(DiffResult::TypeChanged(
252                                current_path,
253                                (*val1).clone(),
254                                (*val2).clone(),
255                            ));
256                        } else {
257                            results.push(DiffResult::Modified(
258                                current_path,
259                                (*val1).clone(),
260                                (*val2).clone(),
261                            ));
262                        }
263                    }
264                }
265                None => {
266                    results.push(DiffResult::Removed(current_path, (*val1).clone()));
267                }
268            }
269        }
270
271        // Check for added elements with ID
272        for (id_val, val2) in map2 {
273            if !map1.contains_key(&id_val) {
274                let current_path = format!("{path}[{id_key}={id_val}]");
275                results.push(DiffResult::Added(current_path, val2.clone()));
276            }
277        }
278
279        // Handle elements without ID using index-based comparison
280        let max_len = no_id_elements1.len().max(no_id_elements2.len());
281        for i in 0..max_len {
282            match (no_id_elements1.get(i), no_id_elements2.get(i)) {
283                (Some((idx1, val1)), Some((_idx2, val2))) => {
284                    let current_path = format!("{path}[{idx1}]");
285                    if val1.is_object() && val2.is_object() || val1.is_array() && val2.is_array() {
286                        diff_recursive(
287                            &current_path,
288                            val1,
289                            val2,
290                            results,
291                            ignore_keys_regex,
292                            epsilon,
293                            array_id_key,
294                        );
295                    } else if !values_are_equal(val1, val2, epsilon) {
296                        let type_match = matches!(
297                            (val1, val2),
298                            (Value::Null, Value::Null)
299                                | (Value::Bool(_), Value::Bool(_))
300                                | (Value::Number(_), Value::Number(_))
301                                | (Value::String(_), Value::String(_))
302                                | (Value::Array(_), Value::Array(_))
303                                | (Value::Object(_), Value::Object(_))
304                        );
305
306                        if !type_match {
307                            results.push(DiffResult::TypeChanged(
308                                current_path,
309                                (*val1).clone(),
310                                (*val2).clone(),
311                            ));
312                        } else {
313                            results.push(DiffResult::Modified(
314                                current_path,
315                                (*val1).clone(),
316                                (*val2).clone(),
317                            ));
318                        }
319                    }
320                }
321                (Some((idx1, val1)), None) => {
322                    let current_path = format!("{path}[{idx1}]");
323                    results.push(DiffResult::Removed(current_path, (*val1).clone()));
324                }
325                (None, Some((idx2, val2))) => {
326                    let current_path = format!("{path}[{idx2}]");
327                    results.push(DiffResult::Added(current_path, (*val2).clone()));
328                }
329                (None, None) => break,
330            }
331        }
332    } else {
333        // Fallback to index-based comparison if no id_key is provided
334        let max_len = arr1.len().max(arr2.len());
335        for i in 0..max_len {
336            let current_path = format!("{path}[{i}]");
337            match (arr1.get(i), arr2.get(i)) {
338                (Some(val1), Some(val2)) => {
339                    // Recurse for nested objects/arrays within arrays
340                    if val1.is_object() && val2.is_object() || val1.is_array() && val2.is_array() {
341                        diff_recursive(
342                            &current_path,
343                            val1,
344                            val2,
345                            results,
346                            ignore_keys_regex,
347                            epsilon,
348                            array_id_key,
349                        );
350                    } else if !values_are_equal(val1, val2, epsilon) {
351                        let type_match = matches!(
352                            (val1, val2),
353                            (Value::Null, Value::Null)
354                                | (Value::Bool(_), Value::Bool(_))
355                                | (Value::Number(_), Value::Number(_))
356                                | (Value::String(_), Value::String(_))
357                                | (Value::Array(_), Value::Array(_))
358                                | (Value::Object(_), Value::Object(_))
359                        );
360
361                        if !type_match {
362                            results.push(DiffResult::TypeChanged(
363                                current_path,
364                                val1.clone(),
365                                val2.clone(),
366                            ));
367                        } else {
368                            results.push(DiffResult::Modified(
369                                current_path,
370                                val1.clone(),
371                                val2.clone(),
372                            ));
373                        }
374                    }
375                }
376                (Some(val1), None) => {
377                    results.push(DiffResult::Removed(current_path, val1.clone()));
378                }
379                (None, Some(val2)) => {
380                    results.push(DiffResult::Added(current_path, val2.clone()));
381                }
382                (None, None) => { /* Should not happen */ }
383            }
384        }
385    }
386}
387
388fn values_are_equal(v1: &Value, v2: &Value, epsilon: Option<f64>) -> bool {
389    if let (Some(e), Value::Number(n1), Value::Number(n2)) = (epsilon, v1, v2) {
390        if let (Some(f1), Some(f2)) = (n1.as_f64(), n2.as_f64()) {
391            return (f1 - f2).abs() < e;
392        }
393    }
394    v1 == v2
395}
396
397pub fn value_type_name(value: &Value) -> &str {
398    match value {
399        Value::Null => "Null",
400        Value::Bool(_) => "Boolean",
401        Value::Number(_) => "Number",
402        Value::String(_) => "String",
403        Value::Array(_) => "Array",
404        Value::Object(_) => "Object",
405    }
406}
407
408pub fn parse_ini(content: &str) -> Result<Value> {
409    use configparser::ini::Ini;
410
411    let mut ini = Ini::new();
412    ini.read(content.to_string())
413        .map_err(|e| anyhow!("Failed to parse INI: {}", e))?;
414
415    let mut root_map = serde_json::Map::new();
416
417    for section_name in ini.sections() {
418        let mut section_map = serde_json::Map::new();
419
420        if let Some(section) = ini.get_map_ref().get(&section_name) {
421            for (key, value) in section {
422                if let Some(v) = value {
423                    section_map.insert(key.clone(), Value::String(v.clone()));
424                } else {
425                    section_map.insert(key.clone(), Value::Null);
426                }
427            }
428        }
429
430        root_map.insert(section_name, Value::Object(section_map));
431    }
432
433    Ok(Value::Object(root_map))
434}
435
436pub fn parse_xml(content: &str) -> Result<Value> {
437    let value: Value = from_str(content)?;
438    Ok(value)
439}
440
441pub fn parse_csv(content: &str) -> Result<Value> {
442    let mut reader = ReaderBuilder::new().from_reader(content.as_bytes());
443    let mut records = Vec::new();
444
445    let headers = reader.headers()?.clone();
446    let has_headers = !headers.is_empty();
447
448    for result in reader.into_records() {
449        let record = result?;
450        if has_headers {
451            let mut obj = serde_json::Map::new();
452            for (i, header) in headers.iter().enumerate() {
453                if let Some(value) = record.get(i) {
454                    obj.insert(header.to_string(), Value::String(value.to_string()));
455                }
456            }
457            records.push(Value::Object(obj));
458        } else {
459            let mut arr = Vec::new();
460            for field in record.iter() {
461                arr.push(Value::String(field.to_string()));
462            }
463            records.push(Value::Array(arr));
464        }
465    }
466    Ok(Value::Array(records))
467}