diffx_core/
lib.rs

1use anyhow::{anyhow, Result};
2use csv::ReaderBuilder;
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6use std::collections::HashMap;
7use std::fs;
8use std::path::Path;
9
10// ============================================================================
11// UNIFIED API - Core Types
12// ============================================================================
13
14#[derive(Debug, PartialEq, Serialize, Clone)]
15pub enum DiffResult {
16    Added(String, Value),
17    Removed(String, Value),
18    Modified(String, Value, Value),
19    TypeChanged(String, Value, Value),
20}
21
22impl std::fmt::Display for DiffResult {
23    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24        match self {
25            DiffResult::Added(key, value) => {
26                write!(f, "  + {key}: {value}")
27            }
28            DiffResult::Removed(key, value) => {
29                write!(f, "  - {key}: {value}")
30            }
31            DiffResult::Modified(key, value1, value2) => {
32                write!(f, "  ~ {key}: {value1} -> {value2}")
33            }
34            DiffResult::TypeChanged(key, value1, value2) => {
35                write!(f, "  # {key}: {value1} -> {value2} (type changed)")
36            }
37        }
38    }
39}
40
41// Lightweight diff result for memory-constrained operations
42#[derive(Debug, PartialEq, Serialize)]
43pub enum LightweightDiffResult {
44    Added(String, String),
45    Removed(String, String),
46    Modified(String, String, String),
47    TypeChanged(String, String, String),
48}
49
50impl From<&DiffResult> for LightweightDiffResult {
51    fn from(result: &DiffResult) -> Self {
52        match result {
53            DiffResult::Added(path, value) => LightweightDiffResult::Added(
54                path.clone(),
55                serde_json::to_string(value).unwrap_or_default(),
56            ),
57            DiffResult::Removed(path, value) => LightweightDiffResult::Removed(
58                path.clone(),
59                serde_json::to_string(value).unwrap_or_default(),
60            ),
61            DiffResult::Modified(path, old, new) => LightweightDiffResult::Modified(
62                path.clone(),
63                serde_json::to_string(old).unwrap_or_default(),
64                serde_json::to_string(new).unwrap_or_default(),
65            ),
66            DiffResult::TypeChanged(path, old, new) => LightweightDiffResult::TypeChanged(
67                path.clone(),
68                serde_json::to_string(old).unwrap_or_default(),
69                serde_json::to_string(new).unwrap_or_default(),
70            ),
71        }
72    }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
76pub enum OutputFormat {
77    #[serde(rename = "diffx")]
78    #[default]
79    Diffx,
80    #[serde(rename = "json")]
81    Json,
82    #[serde(rename = "yaml")]
83    Yaml,
84}
85
86// Manual ValueEnum implementation since it's not available in core
87impl OutputFormat {
88    pub fn value_variants() -> &'static [Self] {
89        &[Self::Diffx, Self::Json, Self::Yaml]
90    }
91
92    pub fn parse_format(s: &str) -> Result<Self> {
93        match s.to_lowercase().as_str() {
94            "diffx" => Ok(Self::Diffx),
95            "json" => Ok(Self::Json),
96            "yaml" | "yml" => Ok(Self::Yaml),
97            _ => Err(anyhow!("Invalid output format: {}", s)),
98        }
99    }
100}
101
102#[derive(Debug, Clone, Default)]
103pub struct DiffxSpecificOptions {
104    pub ignore_whitespace: Option<bool>,
105    pub ignore_case: Option<bool>,
106    pub brief_mode: Option<bool>,
107    pub quiet_mode: Option<bool>,
108}
109
110#[derive(Debug, Clone, Default)]
111pub struct DiffOptions {
112    // Core comparison options
113    pub epsilon: Option<f64>,
114    pub array_id_key: Option<String>,
115    pub ignore_keys_regex: Option<Regex>,
116    pub path_filter: Option<String>,
117
118    // Output control
119    pub output_format: Option<OutputFormat>,
120    pub show_unchanged: Option<bool>,
121    pub show_types: Option<bool>,
122
123    // Memory optimization
124    pub use_memory_optimization: Option<bool>,
125    pub batch_size: Option<usize>,
126
127    // diffx-specific options
128    pub diffx_options: Option<DiffxSpecificOptions>,
129}
130
131// ============================================================================
132// UNIFIED API - Main Function
133// ============================================================================
134
135/// Unified diff function for diffx (path-based entry point)
136///
137/// This is the main entry point that handles both files and directories automatically.
138/// - File vs File: Regular file comparison
139/// - Directory vs Directory: Recursive directory comparison  
140/// - File vs Directory: Returns error
141pub fn diff_paths(
142    old_path: &str,
143    new_path: &str,
144    options: Option<&DiffOptions>,
145) -> Result<Vec<DiffResult>> {
146    use std::path::Path;
147
148    let path1 = Path::new(old_path);
149    let path2 = Path::new(new_path);
150
151    match (path1.is_dir(), path2.is_dir()) {
152        (true, true) => diff_directories(path1, path2, options),
153        (false, false) => diff_files(path1, path2, options),
154        (true, false) => Err(anyhow!(
155            "Cannot compare directory '{}' with file '{}'",
156            old_path,
157            new_path
158        )),
159        (false, true) => Err(anyhow!(
160            "Cannot compare file '{}' with directory '{}'",
161            old_path,
162            new_path
163        )),
164    }
165}
166
167/// Unified diff function for diffx (Value-based)
168///
169/// This function operates on pre-parsed JSON values.
170/// For file/directory operations, use diff_paths() instead.
171pub fn diff(old: &Value, new: &Value, options: Option<&DiffOptions>) -> Result<Vec<DiffResult>> {
172    let default_options = DiffOptions::default();
173    let opts = options.unwrap_or(&default_options);
174
175    // Apply memory optimization if requested
176    if opts.use_memory_optimization.unwrap_or(false) {
177        diff_optimized_implementation(old, new, opts)
178    } else {
179        diff_standard_implementation(old, new, opts)
180    }
181}
182
183fn diff_standard_implementation(
184    old: &Value,
185    new: &Value,
186    options: &DiffOptions,
187) -> Result<Vec<DiffResult>> {
188    let mut results = Vec::new();
189    diff_recursive(old, new, "", &mut results, options);
190    Ok(results)
191}
192
193fn diff_optimized_implementation(
194    old: &Value,
195    new: &Value,
196    options: &DiffOptions,
197) -> Result<Vec<DiffResult>> {
198    // Check memory limits
199    if would_exceed_memory_limit(old, new) {
200        return Err(anyhow!("Input too large for memory optimization"));
201    }
202
203    diff_standard_implementation(old, new, options)
204}
205
206fn diff_files(
207    path1: &Path,
208    path2: &Path,
209    options: Option<&DiffOptions>,
210) -> Result<Vec<DiffResult>> {
211    // Read file contents
212    let content1 = fs::read_to_string(path1)?;
213    let content2 = fs::read_to_string(path2)?;
214
215    // Detect formats based on file extensions
216    let format1 = detect_format_from_path(path1);
217    let format2 = detect_format_from_path(path2);
218
219    // Parse content based on detected formats
220    let value1 = parse_content_by_format(&content1, format1)?;
221    let value2 = parse_content_by_format(&content2, format2)?;
222
223    // Use existing diff implementation
224    diff(&value1, &value2, options)
225}
226
227fn diff_directories(
228    dir1: &Path,
229    dir2: &Path,
230    options: Option<&DiffOptions>,
231) -> Result<Vec<DiffResult>> {
232    let mut results = Vec::new();
233
234    // Get all files in both directories recursively
235    let files1 = get_all_files_recursive(dir1)?;
236    let files2 = get_all_files_recursive(dir2)?;
237
238    // Create maps for easier lookup (relative path -> absolute path)
239    let files1_map: HashMap<String, &Path> = files1
240        .iter()
241        .filter_map(|path| {
242            path.strip_prefix(dir1)
243                .ok()
244                .map(|rel| (rel.to_string_lossy().to_string(), path.as_path()))
245        })
246        .collect();
247
248    let files2_map: HashMap<String, &Path> = files2
249        .iter()
250        .filter_map(|path| {
251            path.strip_prefix(dir2)
252                .ok()
253                .map(|rel| (rel.to_string_lossy().to_string(), path.as_path()))
254        })
255        .collect();
256
257    // Find files that exist in dir1 but not in dir2 (removed)
258    for (rel_path, abs_path1) in &files1_map {
259        if !files2_map.contains_key(rel_path) {
260            let content = fs::read_to_string(abs_path1).unwrap_or_default();
261            if let Ok(value) = parse_content_by_format(&content, detect_format_from_path(abs_path1))
262            {
263                results.push(DiffResult::Removed(rel_path.clone(), value));
264            }
265        }
266    }
267
268    // Find files that exist in dir2 but not in dir1 (added)
269    for (rel_path, abs_path2) in &files2_map {
270        if !files1_map.contains_key(rel_path) {
271            let content = fs::read_to_string(abs_path2).unwrap_or_default();
272            if let Ok(value) = parse_content_by_format(&content, detect_format_from_path(abs_path2))
273            {
274                results.push(DiffResult::Added(rel_path.clone(), value));
275            }
276        }
277    }
278
279    // Find files that exist in both directories (compare contents)
280    for (rel_path, abs_path1) in &files1_map {
281        if let Some(abs_path2) = files2_map.get(rel_path) {
282            match diff_files(abs_path1, abs_path2, options) {
283                Ok(mut file_results) => {
284                    // Prefix all paths with the relative path
285                    for result in &mut file_results {
286                        match result {
287                            DiffResult::Added(path, _) => *path = format!("{rel_path}/{path}"),
288                            DiffResult::Removed(path, _) => *path = format!("{rel_path}/{path}"),
289                            DiffResult::Modified(path, _, _) => {
290                                *path = format!("{rel_path}/{path}")
291                            }
292                            DiffResult::TypeChanged(path, _, _) => {
293                                *path = format!("{rel_path}/{path}")
294                            }
295                        }
296                    }
297                    results.extend(file_results);
298                }
299                Err(_) => {
300                    // If file comparison fails, skip this file
301                    continue;
302                }
303            }
304        }
305    }
306
307    Ok(results)
308}
309
310fn get_all_files_recursive(dir: &Path) -> Result<Vec<std::path::PathBuf>> {
311    let mut files = Vec::new();
312
313    if dir.is_dir() {
314        for entry in fs::read_dir(dir)? {
315            let entry = entry?;
316            let path = entry.path();
317
318            if path.is_dir() {
319                files.extend(get_all_files_recursive(&path)?);
320            } else if path.is_file() {
321                files.push(path);
322            }
323        }
324    }
325
326    Ok(files)
327}
328
329#[derive(Debug, Clone, Copy)]
330enum FileFormat {
331    Json,
332    Yaml,
333    Csv,
334    Toml,
335    Ini,
336    Xml,
337}
338
339fn detect_format_from_path(path: &Path) -> FileFormat {
340    match path.extension().and_then(|ext| ext.to_str()) {
341        Some("json") => FileFormat::Json,
342        Some("yaml") | Some("yml") => FileFormat::Yaml,
343        Some("csv") => FileFormat::Csv,
344        Some("toml") => FileFormat::Toml,
345        Some("ini") | Some("cfg") => FileFormat::Ini,
346        Some("xml") => FileFormat::Xml,
347        _ => FileFormat::Json, // Default fallback
348    }
349}
350
351fn parse_content_by_format(content: &str, format: FileFormat) -> Result<Value> {
352    match format {
353        FileFormat::Json => parse_json(content),
354        FileFormat::Yaml => parse_yaml(content),
355        FileFormat::Csv => parse_csv(content),
356        FileFormat::Toml => parse_toml(content),
357        FileFormat::Ini => parse_ini(content),
358        FileFormat::Xml => parse_xml(content),
359    }
360}
361
362// Helper function to add result with path filtering
363fn add_diff_result(result: DiffResult, results: &mut Vec<DiffResult>, options: &DiffOptions) {
364    // Apply path filter if specified
365    if let Some(filter) = &options.path_filter {
366        let path = match &result {
367            DiffResult::Added(path, _) => path,
368            DiffResult::Removed(path, _) => path,
369            DiffResult::Modified(path, _, _) => path,
370            DiffResult::TypeChanged(path, _, _) => path,
371        };
372        if !path.contains(filter) {
373            return;
374        }
375    }
376    results.push(result);
377}
378
379fn diff_recursive(
380    old: &Value,
381    new: &Value,
382    path: &str,
383    results: &mut Vec<DiffResult>,
384    options: &DiffOptions,
385) {
386    match (old, new) {
387        (Value::Object(old_obj), Value::Object(new_obj)) => {
388            diff_objects(old_obj, new_obj, path, results, options);
389        }
390        (Value::Array(old_arr), Value::Array(new_arr)) => {
391            diff_arrays(old_arr, new_arr, path, results, options);
392        }
393        (Value::Number(old_num), Value::Number(new_num)) => {
394            if let Some(epsilon) = options.epsilon {
395                let old_f = old_num.as_f64().unwrap_or(0.0);
396                let new_f = new_num.as_f64().unwrap_or(0.0);
397                if (old_f - new_f).abs() > epsilon {
398                    add_diff_result(
399                        DiffResult::Modified(path.to_string(), old.clone(), new.clone()),
400                        results,
401                        options,
402                    );
403                }
404            } else if old != new {
405                add_diff_result(
406                    DiffResult::Modified(path.to_string(), old.clone(), new.clone()),
407                    results,
408                    options,
409                );
410            }
411        }
412        (Value::String(old_str), Value::String(new_str)) => {
413            let mut old_processed = old_str.clone();
414            let mut new_processed = new_str.clone();
415
416            // Apply string transformations based on options
417            if let Some(diffx_opts) = &options.diffx_options {
418                if diffx_opts.ignore_whitespace.unwrap_or(false) {
419                    old_processed = old_processed
420                        .chars()
421                        .filter(|c| !c.is_whitespace())
422                        .collect();
423                    new_processed = new_processed
424                        .chars()
425                        .filter(|c| !c.is_whitespace())
426                        .collect();
427                }
428                if diffx_opts.ignore_case.unwrap_or(false) {
429                    old_processed = old_processed.to_lowercase();
430                    new_processed = new_processed.to_lowercase();
431                }
432            }
433
434            if old_processed != new_processed {
435                add_diff_result(
436                    DiffResult::Modified(path.to_string(), old.clone(), new.clone()),
437                    results,
438                    options,
439                );
440            }
441        }
442        _ => {
443            if old != new {
444                if old.type_name() != new.type_name() {
445                    add_diff_result(
446                        DiffResult::TypeChanged(path.to_string(), old.clone(), new.clone()),
447                        results,
448                        options,
449                    );
450                } else {
451                    // For other types, just do regular comparison
452                    add_diff_result(
453                        DiffResult::Modified(path.to_string(), old.clone(), new.clone()),
454                        results,
455                        options,
456                    );
457                }
458            }
459        }
460    }
461}
462
463fn diff_objects(
464    old_obj: &serde_json::Map<String, Value>,
465    new_obj: &serde_json::Map<String, Value>,
466    path: &str,
467    results: &mut Vec<DiffResult>,
468    options: &DiffOptions,
469) {
470    // Handle ignore_keys_regex
471    let should_ignore_key = |key: &str| -> bool {
472        if let Some(regex) = &options.ignore_keys_regex {
473            regex.is_match(key)
474        } else {
475            false
476        }
477    };
478
479    // Check for removed keys
480    for (key, old_value) in old_obj {
481        if should_ignore_key(key) {
482            continue;
483        }
484
485        let new_path = if path.is_empty() {
486            key.clone()
487        } else {
488            format!("{path}.{key}")
489        };
490
491        if !new_obj.contains_key(key) {
492            add_diff_result(
493                DiffResult::Removed(new_path, old_value.clone()),
494                results,
495                options,
496            );
497        }
498    }
499
500    // Check for added and modified keys
501    for (key, new_value) in new_obj {
502        if should_ignore_key(key) {
503            continue;
504        }
505
506        let new_path = if path.is_empty() {
507            key.clone()
508        } else {
509            format!("{path}.{key}")
510        };
511
512        match old_obj.get(key) {
513            None => {
514                add_diff_result(
515                    DiffResult::Added(new_path, new_value.clone()),
516                    results,
517                    options,
518                );
519            }
520            Some(old_value) => {
521                diff_recursive(old_value, new_value, &new_path, results, options);
522            }
523        }
524    }
525}
526
527fn diff_arrays(
528    old_arr: &[Value],
529    new_arr: &[Value],
530    path: &str,
531    results: &mut Vec<DiffResult>,
532    options: &DiffOptions,
533) {
534    if let Some(id_key) = &options.array_id_key {
535        diff_arrays_with_id(old_arr, new_arr, path, results, options, id_key);
536    } else {
537        diff_arrays_by_index(old_arr, new_arr, path, results, options);
538    }
539}
540
541fn diff_arrays_with_id(
542    old_arr: &[Value],
543    new_arr: &[Value],
544    path: &str,
545    results: &mut Vec<DiffResult>,
546    options: &DiffOptions,
547    id_key: &str,
548) {
549    let mut old_by_id: HashMap<String, (usize, &Value)> = HashMap::new();
550    let mut new_by_id: HashMap<String, (usize, &Value)> = HashMap::new();
551    let mut old_without_id: Vec<(usize, &Value)> = Vec::new();
552    let mut new_without_id: Vec<(usize, &Value)> = Vec::new();
553
554    // Separate items with IDs from those without
555    for (index, item) in old_arr.iter().enumerate() {
556        if let Some(id_value) = item.get(id_key) {
557            let id_str = match id_value {
558                Value::String(s) => format!("\"{s}\""), // Add quotes for strings
559                Value::Number(n) => n.to_string(),
560                Value::Bool(b) => b.to_string(),
561                _ => format!("{id_value:?}"),
562            };
563            old_by_id.insert(id_str, (index, item));
564        } else {
565            old_without_id.push((index, item));
566        }
567    }
568
569    for (index, item) in new_arr.iter().enumerate() {
570        if let Some(id_value) = item.get(id_key) {
571            let id_str = match id_value {
572                Value::String(s) => format!("\"{s}\""), // Add quotes for strings
573                Value::Number(n) => n.to_string(),
574                Value::Bool(b) => b.to_string(),
575                _ => format!("{id_value:?}"),
576            };
577            new_by_id.insert(id_str, (index, item));
578        } else {
579            new_without_id.push((index, item));
580        }
581    }
582
583    // Handle items with IDs
584    // Find removed items
585    for (id, (_, old_item)) in &old_by_id {
586        if !new_by_id.contains_key(id) {
587            let item_path = if path.is_empty() {
588                format!("[{id_key}={id}]")
589            } else {
590                format!("{path}[{id_key}={id}]")
591            };
592            results.push(DiffResult::Removed(item_path, (*old_item).clone()));
593        }
594    }
595
596    // Find added and modified items with IDs
597    for (id, (_, new_item)) in &new_by_id {
598        let item_path = if path.is_empty() {
599            format!("[{id_key}={id}]")
600        } else {
601            format!("{path}[{id_key}={id}]")
602        };
603
604        match old_by_id.get(id) {
605            None => {
606                results.push(DiffResult::Added(item_path, (*new_item).clone()));
607            }
608            Some((_, old_item)) => {
609                diff_recursive(old_item, new_item, &item_path, results, options);
610            }
611        }
612    }
613
614    // Handle items without IDs by index
615    let max_len = old_without_id.len().max(new_without_id.len());
616    for i in 0..max_len {
617        match (old_without_id.get(i), new_without_id.get(i)) {
618            (Some((old_index, old_item)), Some((_, new_item))) => {
619                let item_path = if path.is_empty() {
620                    format!("[{old_index}]")
621                } else {
622                    format!("{path}[{old_index}]")
623                };
624                diff_recursive(old_item, new_item, &item_path, results, options);
625            }
626            (Some((old_index, old_item)), None) => {
627                let item_path = if path.is_empty() {
628                    format!("[{old_index}]")
629                } else {
630                    format!("{path}[{old_index}]")
631                };
632                results.push(DiffResult::Removed(item_path, (*old_item).clone()));
633            }
634            (None, Some((new_index, new_item))) => {
635                let item_path = if path.is_empty() {
636                    format!("[{new_index}]")
637                } else {
638                    format!("{path}[{new_index}]")
639                };
640                results.push(DiffResult::Added(item_path, (*new_item).clone()));
641            }
642            (None, None) => unreachable!(),
643        }
644    }
645}
646
647fn diff_arrays_by_index(
648    old_arr: &[Value],
649    new_arr: &[Value],
650    path: &str,
651    results: &mut Vec<DiffResult>,
652    options: &DiffOptions,
653) {
654    let max_len = old_arr.len().max(new_arr.len());
655
656    for i in 0..max_len {
657        let item_path = format!("{path}[{i}]");
658
659        match (old_arr.get(i), new_arr.get(i)) {
660            (Some(old_item), Some(new_item)) => {
661                diff_recursive(old_item, new_item, &item_path, results, options);
662            }
663            (Some(old_item), None) => {
664                results.push(DiffResult::Removed(item_path, old_item.clone()));
665            }
666            (None, Some(new_item)) => {
667                results.push(DiffResult::Added(item_path, new_item.clone()));
668            }
669            (None, None) => unreachable!(),
670        }
671    }
672}
673
674// ============================================================================
675// BACKWARD COMPATIBILITY FUNCTIONS - REMOVED PER UNIFIED API SPECIFICATION
676// ============================================================================
677// All backward compatibility functions have been removed to comply with
678// the unified API design philosophy: only the single diff() function should be exposed.
679
680// ============================================================================
681// PARSER FUNCTIONS - FOR INTERNAL USE ONLY
682// ============================================================================
683// These functions are public only for CLI and language bindings.
684// External users should use the main diff() function with file reading.
685
686/// Parse JSON content - FOR INTERNAL USE ONLY
687/// External users should read files themselves and use diff() function
688pub fn parse_json(content: &str) -> Result<Value> {
689    serde_json::from_str(content).map_err(|e| anyhow!("JSON parse error: {}", e))
690}
691
692/// Parse CSV content - FOR INTERNAL USE ONLY
693pub fn parse_csv(content: &str) -> Result<Value> {
694    let mut reader = ReaderBuilder::new()
695        .has_headers(true)
696        .from_reader(content.as_bytes());
697
698    let headers = reader.headers()?.clone();
699    let mut records = Vec::new();
700
701    for result in reader.records() {
702        let record = result?;
703        let mut map = serde_json::Map::new();
704
705        for (i, field) in record.iter().enumerate() {
706            if let Some(header) = headers.get(i) {
707                map.insert(header.to_string(), Value::String(field.to_string()));
708            }
709        }
710
711        records.push(Value::Object(map));
712    }
713
714    Ok(Value::Array(records))
715}
716
717/// Parse YAML content - FOR INTERNAL USE ONLY
718pub fn parse_yaml(content: &str) -> Result<Value> {
719    serde_yaml::from_str(content).map_err(|e| anyhow!("YAML parse error: {}", e))
720}
721
722/// Parse TOML content - FOR INTERNAL USE ONLY
723pub fn parse_toml(content: &str) -> Result<Value> {
724    let toml_value: toml::Value = content.parse()?;
725    toml_to_json_value(toml_value)
726}
727
728fn toml_to_json_value(toml_val: toml::Value) -> Result<Value> {
729    match toml_val {
730        toml::Value::String(s) => Ok(Value::String(s)),
731        toml::Value::Integer(i) => Ok(Value::Number(i.into())),
732        toml::Value::Float(f) => Ok(Value::Number(
733            serde_json::Number::from_f64(f).ok_or_else(|| anyhow!("Invalid float"))?,
734        )),
735        toml::Value::Boolean(b) => Ok(Value::Bool(b)),
736        toml::Value::Array(arr) => {
737            let mut json_arr = Vec::new();
738            for item in arr {
739                json_arr.push(toml_to_json_value(item)?);
740            }
741            Ok(Value::Array(json_arr))
742        }
743        toml::Value::Table(table) => {
744            let mut json_obj = serde_json::Map::new();
745            for (key, value) in table {
746                json_obj.insert(key, toml_to_json_value(value)?);
747            }
748            Ok(Value::Object(json_obj))
749        }
750        toml::Value::Datetime(dt) => Ok(Value::String(dt.to_string())),
751    }
752}
753
754/// Parse INI content - FOR INTERNAL USE ONLY
755pub fn parse_ini(content: &str) -> Result<Value> {
756    let mut result = serde_json::Map::new();
757    let mut current_section = String::new();
758    let mut global_section = serde_json::Map::new();
759
760    for line in content.lines() {
761        let line = line.trim();
762
763        if line.is_empty() || line.starts_with(';') || line.starts_with('#') {
764            continue;
765        }
766
767        if line.starts_with('[') && line.ends_with(']') {
768            current_section = line[1..line.len() - 1].to_string();
769            result.insert(
770                current_section.clone(),
771                Value::Object(serde_json::Map::new()),
772            );
773        } else if let Some(eq_pos) = line.find('=') {
774            let key = line[..eq_pos].trim().to_string();
775            let value = line[eq_pos + 1..].trim().to_string();
776
777            if current_section.is_empty() {
778                global_section.insert(key, Value::String(value));
779            } else if let Some(Value::Object(section)) = result.get_mut(&current_section) {
780                section.insert(key, Value::String(value));
781            }
782        }
783    }
784
785    // Add global section if it exists
786    if !global_section.is_empty() {
787        result.insert("default".to_string(), Value::Object(global_section));
788    }
789
790    Ok(Value::Object(result))
791}
792
793/// Parse XML content - FOR INTERNAL USE ONLY
794pub fn parse_xml(content: &str) -> Result<Value> {
795    use quick_xml::events::Event;
796    use quick_xml::Reader;
797
798    let mut reader = Reader::from_str(content);
799    reader.trim_text(true);
800
801    // Stack-based parsing for nested structures
802    let mut stack: Vec<(String, serde_json::Map<String, Value>)> = Vec::new();
803    let mut root: Option<(String, serde_json::Map<String, Value>)> = None;
804    let mut current_text = String::new();
805
806    loop {
807        match reader.read_event() {
808            Ok(Event::Start(ref e)) => {
809                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
810                let mut element = serde_json::Map::new();
811
812                // Parse attributes
813                for attr in e.attributes().flatten() {
814                    let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
815                    let value = String::from_utf8_lossy(&attr.value).to_string();
816                    element.insert(key, Value::String(value));
817                }
818
819                // If we have text content to add to parent
820                if !current_text.trim().is_empty() && !stack.is_empty() {
821                    let (_, parent) = stack.last_mut().unwrap();
822                    parent.insert(
823                        "text".to_string(),
824                        Value::String(current_text.trim().to_string()),
825                    );
826                }
827                current_text.clear();
828
829                // Push new element to stack
830                stack.push((tag_name, element));
831            }
832            Ok(Event::Text(e)) => {
833                let text = e.unescape().unwrap_or_default().to_string();
834                if !text.trim().is_empty() {
835                    current_text.push_str(&text);
836                }
837            }
838            Ok(Event::CData(e)) => {
839                // Handle CDATA sections
840                let cdata_text = String::from_utf8_lossy(&e).to_string();
841                current_text.push_str(&cdata_text);
842            }
843            Ok(Event::End(ref e)) => {
844                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
845
846                if let Some((name, mut element)) = stack.pop() {
847                    if name == tag_name {
848                        // Add any remaining text content
849                        if !current_text.trim().is_empty() {
850                            // If element only has text (no attributes or children), make it a simple string
851                            if element.is_empty() {
852                                let text_value = Value::String(current_text.trim().to_string());
853                                current_text.clear();
854
855                                if let Some((_, parent)) = stack.last_mut() {
856                                    // Add to parent
857                                    add_to_parent(parent, &name, text_value);
858                                } else {
859                                    // This is the root element
860                                    root = Some((
861                                        name.clone(),
862                                        serde_json::Map::from_iter(vec![(name, text_value)]),
863                                    ));
864                                }
865                                continue;
866                            } else {
867                                element.insert(
868                                    "text".to_string(),
869                                    Value::String(current_text.trim().to_string()),
870                                );
871                            }
872                        }
873                        current_text.clear();
874
875                        // Convert element to Value
876                        let element_value = if element.is_empty() {
877                            Value::Object(serde_json::Map::new())
878                        } else if element.len() == 1 && element.contains_key("text") {
879                            element.get("text").unwrap().clone()
880                        } else {
881                            Value::Object(element)
882                        };
883
884                        if let Some((_, parent)) = stack.last_mut() {
885                            // Add to parent
886                            add_to_parent(parent, &name, element_value);
887                        } else {
888                            // This is the root element
889                            let mut root_map = serde_json::Map::new();
890                            root_map.insert(name.clone(), element_value);
891                            root = Some((name.clone(), root_map));
892                        }
893                    }
894                }
895            }
896            Ok(Event::Empty(ref e)) => {
897                let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
898                let mut element = serde_json::Map::new();
899
900                // Parse attributes
901                for attr in e.attributes().flatten() {
902                    let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
903                    let value = String::from_utf8_lossy(&attr.value).to_string();
904                    element.insert(key, Value::String(value));
905                }
906
907                let element_value = Value::Object(element);
908
909                if let Some((_, parent)) = stack.last_mut() {
910                    // Add to parent
911                    add_to_parent(parent, &tag_name, element_value);
912                } else {
913                    // This is a root-level empty element
914                    let mut root_map = serde_json::Map::new();
915                    root_map.insert(tag_name.clone(), element_value);
916                    root = Some((tag_name.clone(), root_map));
917                }
918            }
919            Ok(Event::Eof) => break,
920            Err(e) => return Err(anyhow!("XML parsing error: {}", e)),
921            _ => {}
922        }
923    }
924
925    // Return the root element
926    if let Some((_, root_map)) = root {
927        Ok(Value::Object(root_map))
928    } else {
929        Ok(Value::Object(serde_json::Map::new()))
930    }
931}
932
933// Helper function to add a child element to a parent
934fn add_to_parent(parent: &mut serde_json::Map<String, Value>, key: &str, value: Value) {
935    if let Some(existing) = parent.get_mut(key) {
936        match existing {
937            Value::Array(arr) => {
938                arr.push(value);
939            }
940            other => {
941                let _ = std::mem::replace(other, Value::Array(vec![other.clone(), value]));
942            }
943        }
944    } else {
945        parent.insert(key.to_string(), value);
946    }
947}
948
949// ============================================================================
950// UTILITY FUNCTIONS - FOR INTERNAL USE ONLY
951// ============================================================================
952// These functions are public only for CLI and language bindings.
953// External users should use the main diff() function.
954
955/// Get type name of a JSON value - FOR INTERNAL USE ONLY
956pub fn value_type_name(value: &Value) -> &str {
957    match value {
958        Value::Null => "Null",
959        Value::Bool(_) => "Boolean",
960        Value::Number(_) => "Number",
961        Value::String(_) => "String",
962        Value::Array(_) => "Array",
963        Value::Object(_) => "Object",
964    }
965}
966
967/// Estimate memory usage of a JSON value - FOR INTERNAL USE ONLY
968pub fn estimate_memory_usage(value: &Value) -> usize {
969    match value {
970        Value::Null => 0,
971        Value::Bool(_) => 1,
972        Value::Number(_) => 8,
973        Value::String(s) => s.len(),
974        Value::Array(arr) => arr.iter().map(estimate_memory_usage).sum::<usize>() + 24,
975        Value::Object(obj) => {
976            obj.iter()
977                .map(|(k, v)| k.len() + estimate_memory_usage(v))
978                .sum::<usize>()
979                + 24
980        }
981    }
982}
983
984/// Check if values would exceed memory limit - FOR INTERNAL USE ONLY
985pub fn would_exceed_memory_limit(v1: &Value, v2: &Value) -> bool {
986    const MAX_MEMORY_MB: usize = 100;
987    const BYTES_PER_MB: usize = 1024 * 1024;
988
989    let total_size = estimate_memory_usage(v1) + estimate_memory_usage(v2);
990    total_size > MAX_MEMORY_MB * BYTES_PER_MB
991}
992
993/// Format output to string - FOR INTERNAL USE ONLY
994pub fn format_output<T: Serialize>(results: &[T], format: OutputFormat) -> Result<String> {
995    match format {
996        OutputFormat::Json => serde_json::to_string_pretty(results)
997            .map_err(|e| anyhow!("JSON serialization error: {}", e)),
998        OutputFormat::Yaml => {
999            serde_yaml::to_string(results).map_err(|e| anyhow!("YAML serialization error: {}", e))
1000        }
1001        OutputFormat::Diffx => {
1002            let mut output = String::new();
1003            for result in results {
1004                let json = serde_json::to_string(result)?;
1005                output.push_str(&json);
1006                output.push('\n');
1007            }
1008            Ok(output)
1009        }
1010    }
1011}
1012
1013/// Format DiffResult output using proper Display implementation
1014pub fn format_diff_output(
1015    results: &[DiffResult],
1016    format: OutputFormat,
1017    _options: Option<&DiffOptions>,
1018) -> Result<String> {
1019    match format {
1020        OutputFormat::Json => serde_json::to_string_pretty(results)
1021            .map_err(|e| anyhow!("JSON serialization error: {}", e)),
1022        OutputFormat::Yaml => {
1023            let mut output = String::new();
1024            for result in results {
1025                match result {
1026                    DiffResult::Added(path, value) => {
1027                        output.push_str("- Added:\n");
1028                        output.push_str(&format!("  - {path}\n"));
1029                        output.push_str(&format!(
1030                            "  - {}\n",
1031                            serde_yaml::to_string(value).unwrap_or_default().trim()
1032                        ));
1033                    }
1034                    DiffResult::Removed(path, value) => {
1035                        output.push_str("- Removed:\n");
1036                        output.push_str(&format!("  - {path}\n"));
1037                        output.push_str(&format!(
1038                            "  - {}\n",
1039                            serde_yaml::to_string(value).unwrap_or_default().trim()
1040                        ));
1041                    }
1042                    DiffResult::Modified(path, old_value, new_value) => {
1043                        output.push_str("- Modified:\n");
1044                        output.push_str(&format!("  - {path}\n"));
1045                        output.push_str(&format!(
1046                            "  - {}\n",
1047                            serde_yaml::to_string(old_value).unwrap_or_default().trim()
1048                        ));
1049                        output.push_str(&format!(
1050                            "  - {}\n",
1051                            serde_yaml::to_string(new_value).unwrap_or_default().trim()
1052                        ));
1053                    }
1054                    DiffResult::TypeChanged(path, old_value, new_value) => {
1055                        output.push_str("- TypeChanged:\n");
1056                        output.push_str(&format!("  - {path}\n"));
1057                        output.push_str(&format!(
1058                            "  - {}\n",
1059                            serde_yaml::to_string(old_value).unwrap_or_default().trim()
1060                        ));
1061                        output.push_str(&format!(
1062                            "  - {}\n",
1063                            serde_yaml::to_string(new_value).unwrap_or_default().trim()
1064                        ));
1065                    }
1066                }
1067            }
1068            Ok(output)
1069        }
1070        OutputFormat::Diffx => {
1071            let mut output = String::new();
1072            for result in results {
1073                output.push_str(&result.to_string());
1074                output.push('\n');
1075            }
1076            Ok(output)
1077        }
1078    }
1079}
1080
1081// ============================================================================
1082// TRAITS
1083// ============================================================================
1084
1085trait ValueTypeExt {
1086    fn type_name(&self) -> &str;
1087}
1088
1089impl ValueTypeExt for Value {
1090    fn type_name(&self) -> &str {
1091        value_type_name(self)
1092    }
1093}
1094
1095// ============================================================================
1096// DIRECTORY HANDLING TESTS
1097// ============================================================================
1098
1099#[cfg(test)]
1100mod directory_tests {
1101    use super::*;
1102    use std::fs;
1103
1104    #[test]
1105    fn test_diff_paths_files() {
1106        // Test file vs file comparison
1107        let temp_dir = std::env::temp_dir();
1108        let file1_path = temp_dir.join("diffx_test1.json");
1109        let file2_path = temp_dir.join("diffx_test2.json");
1110
1111        fs::write(&file1_path, r#"{"name": "test", "value": 1}"#).unwrap();
1112        fs::write(&file2_path, r#"{"name": "test", "value": 2}"#).unwrap();
1113
1114        let results = diff_paths(
1115            &file1_path.to_string_lossy(),
1116            &file2_path.to_string_lossy(),
1117            None,
1118        )
1119        .unwrap();
1120
1121        assert_eq!(results.len(), 1);
1122
1123        // Cleanup
1124        let _ = fs::remove_file(file1_path);
1125        let _ = fs::remove_file(file2_path);
1126    }
1127
1128    #[test]
1129    fn test_diff_paths_file_vs_directory_error() {
1130        let temp_dir = std::env::temp_dir();
1131        let file_path = temp_dir.join("diffx_test_file.json");
1132        let dir_path = temp_dir.join("diffx_test_dir");
1133
1134        fs::write(&file_path, r#"{"test": true}"#).unwrap();
1135        fs::create_dir_all(&dir_path).unwrap();
1136
1137        let result = diff_paths(
1138            &file_path.to_string_lossy(),
1139            &dir_path.to_string_lossy(),
1140            None,
1141        );
1142
1143        assert!(result.is_err());
1144        assert!(result
1145            .unwrap_err()
1146            .to_string()
1147            .contains("Cannot compare file"));
1148
1149        // Cleanup
1150        let _ = fs::remove_file(file_path);
1151        let _ = fs::remove_dir_all(dir_path);
1152    }
1153}