Skip to main content

fionn_diff/
tape_merge.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2//! Tape-based merge operations
3//!
4//! This module provides merge operations for tape-parsed data, enabling
5//! efficient cross-format merging without intermediate serialization.
6//!
7//! # Overview
8//!
9//! Merging works by:
10//! 1. Converting source tape to a mutable Value
11//! 2. Walking the target tape and applying merge rules
12//! 3. Returning the merged Value
13//!
14//! This is more efficient than serialize→parse→merge→serialize because:
15//! - Tape parsing is 10-100x faster than DOM parsing
16//! - Direct tape walking avoids intermediate allocations
17//! - Cross-format merges don't require format conversion
18//!
19//! # Example
20//!
21//! ```ignore
22//! use fionn_diff::{merge_tapes, deep_merge_tapes};
23//! use fionn_simd::transform::UnifiedTape;
24//! use fionn_core::format::FormatKind;
25//!
26//! // Parse YAML and JSON
27//! let yaml_tape = UnifiedTape::parse(yaml.as_bytes(), FormatKind::Yaml)?;
28//! let json_tape = UnifiedTape::parse(json.as_bytes(), FormatKind::Json)?;
29//!
30//! // Merge JSON over YAML (JSON takes precedence)
31//! let result = merge_tapes(&yaml_tape, &json_tape)?;
32//! ```
33
34use fionn_core::Result;
35use fionn_core::tape_source::{TapeNodeKind, TapeSource, TapeValue};
36use serde_json::{Map, Value};
37
38use crate::tape_patch::tape_to_value;
39
40// ============================================================================
41// Tape Merge (RFC 7396 semantics)
42// ============================================================================
43
44/// Merge two tapes using RFC 7396 (JSON Merge Patch) semantics
45///
46/// The `overlay` tape's values take precedence:
47/// - `null` values in overlay delete keys from target
48/// - Objects are recursively merged
49/// - Non-objects replace existing values
50///
51/// # Example
52///
53/// ```ignore
54/// let base = parse_yaml("name: Alice\nage: 30");
55/// let overlay = parse_json(r#"{"name": "Bob", "city": "NYC"}"#);
56///
57/// let result = merge_tapes(&base, &overlay)?;
58/// // {"name": "Bob", "age": 30, "city": "NYC"}
59/// ```
60///
61/// # Errors
62///
63/// Returns an error if either tape cannot be converted to a value.
64pub fn merge_tapes<S: TapeSource, T: TapeSource>(base: &S, overlay: &T) -> Result<Value> {
65    let mut base_value = tape_to_value(base)?;
66    apply_tape_merge(&mut base_value, overlay)?;
67    Ok(base_value)
68}
69
70/// Apply merge from a tape onto a mutable Value
71fn apply_tape_merge<T: TapeSource>(target: &mut Value, overlay: &T) -> Result<()> {
72    if overlay.is_empty() {
73        return Ok(());
74    }
75
76    let overlay_value = tape_to_value(overlay)?;
77    merge_value_into(target, &overlay_value);
78    Ok(())
79}
80
81/// RFC 7396 merge implementation
82fn merge_value_into(target: &mut Value, patch: &Value) {
83    // If patch is not an object, it replaces target entirely
84    if !patch.is_object() {
85        *target = patch.clone();
86        return;
87    }
88
89    // Ensure target is an object
90    if !target.is_object() {
91        *target = Value::Object(Map::new());
92    }
93
94    let target_obj = target.as_object_mut().expect("target should be object");
95    let patch_obj = patch.as_object().expect("patch should be object");
96
97    for (key, patch_value) in patch_obj {
98        if patch_value.is_null() {
99            // Null means delete
100            target_obj.remove(key);
101        } else if patch_value.is_object() {
102            // Recursive merge for objects
103            let target_value = target_obj
104                .entry(key.clone())
105                .or_insert_with(|| Value::Object(Map::new()));
106            merge_value_into(target_value, patch_value);
107        } else {
108            // Replace/add for non-objects
109            target_obj.insert(key.clone(), patch_value.clone());
110        }
111    }
112}
113
114// ============================================================================
115// Deep Merge (null-preserving)
116// ============================================================================
117
118/// Deep merge two tapes (preserves null values)
119///
120/// Unlike `merge_tapes` (RFC 7396), this preserves null values
121/// instead of treating them as deletions.
122///
123/// # Example
124///
125/// ```ignore
126/// let base = parse_json(r#"{"a": 1, "b": 2}"#);
127/// let overlay = parse_json(r#"{"b": null, "c": 3}"#);
128///
129/// let result = deep_merge_tapes(&base, &overlay)?;
130/// // {"a": 1, "b": null, "c": 3}
131/// ```
132///
133/// # Errors
134///
135/// Returns an error if either tape cannot be converted to a value.
136pub fn deep_merge_tapes<S: TapeSource, T: TapeSource>(base: &S, overlay: &T) -> Result<Value> {
137    let base_value = tape_to_value(base)?;
138    let overlay_value = tape_to_value(overlay)?;
139    Ok(deep_merge_values(&base_value, &overlay_value))
140}
141
142/// Deep merge two values (null-preserving)
143fn deep_merge_values(base: &Value, overlay: &Value) -> Value {
144    match (base, overlay) {
145        (Value::Object(base_obj), Value::Object(overlay_obj)) => {
146            let mut result = base_obj.clone();
147
148            for (key, overlay_value) in overlay_obj {
149                let merged = result.get(key).map_or_else(
150                    || overlay_value.clone(),
151                    |base_value| deep_merge_values(base_value, overlay_value),
152                );
153                result.insert(key.clone(), merged);
154            }
155
156            Value::Object(result)
157        }
158        // Non-objects: overlay wins
159        (_, overlay) => overlay.clone(),
160    }
161}
162
163// ============================================================================
164// Merge Many
165// ============================================================================
166
167/// Merge multiple tapes left-to-right using RFC 7396 semantics
168///
169/// Each subsequent tape is merged onto the accumulated result.
170///
171/// # Example
172///
173/// ```ignore
174/// let tapes: Vec<&dyn TapeSource> = vec![&base, &overlay1, &overlay2];
175/// let result = merge_many_tapes(tapes)?;
176/// ```
177///
178/// # Errors
179///
180/// Returns an error if any tape cannot be converted to a value.
181pub fn merge_many_tapes<T: TapeSource>(tapes: &[&T]) -> Result<Value> {
182    if tapes.is_empty() {
183        return Ok(Value::Object(Map::new()));
184    }
185
186    let mut result = tape_to_value(tapes[0])?;
187
188    for tape in &tapes[1..] {
189        let overlay = tape_to_value(*tape)?;
190        merge_value_into(&mut result, &overlay);
191    }
192
193    Ok(result)
194}
195
196// ============================================================================
197// Cross-Format Merge Helpers
198// ============================================================================
199
200/// Merge a tape onto an existing Value
201///
202/// Useful when building up merged results from multiple tapes.
203///
204/// # Errors
205///
206/// Returns an error if the overlay tape cannot be converted to a value.
207pub fn merge_tape_into_value<T: TapeSource>(target: &mut Value, overlay: &T) -> Result<()> {
208    apply_tape_merge(target, overlay)
209}
210
211/// Deep merge a tape onto an existing Value
212///
213/// # Errors
214///
215/// Returns an error if the overlay tape cannot be converted to a value.
216pub fn deep_merge_tape_into_value<T: TapeSource>(target: &mut Value, overlay: &T) -> Result<Value> {
217    let overlay_value = tape_to_value(overlay)?;
218    Ok(deep_merge_values(target, &overlay_value))
219}
220
221// ============================================================================
222// Streaming Merge (Memory-Efficient)
223// ============================================================================
224
225/// Options for streaming merge operations
226#[derive(Debug, Clone, Default)]
227pub struct StreamingMergeOptions {
228    /// Maximum depth for recursive merge (0 = unlimited)
229    pub max_depth: usize,
230    /// Stop merging after this many keys (0 = unlimited)
231    pub max_keys: usize,
232}
233
234impl StreamingMergeOptions {
235    /// Create default options
236    #[must_use]
237    pub fn new() -> Self {
238        Self::default()
239    }
240
241    /// Set maximum depth
242    #[must_use]
243    pub const fn with_max_depth(mut self, depth: usize) -> Self {
244        self.max_depth = depth;
245        self
246    }
247
248    /// Set maximum keys
249    #[must_use]
250    pub const fn with_max_keys(mut self, keys: usize) -> Self {
251        self.max_keys = keys;
252        self
253    }
254}
255
256/// Streaming merge that can be interrupted/resumed
257///
258/// Useful for very large documents where you want to:
259/// - Limit memory usage
260/// - Get partial results quickly
261/// - Cancel long-running merges
262///
263/// # Errors
264///
265/// Returns an error if either tape cannot be converted to a value.
266pub fn streaming_merge<S: TapeSource, T: TapeSource>(
267    base: &S,
268    overlay: &T,
269    options: &StreamingMergeOptions,
270) -> Result<Value> {
271    let mut result = tape_to_value(base)?;
272    streaming_merge_at(&mut result, overlay, 0, options, &mut 0)?;
273    Ok(result)
274}
275
276fn streaming_merge_at<T: TapeSource>(
277    target: &mut Value,
278    overlay: &T,
279    idx: usize,
280    options: &StreamingMergeOptions,
281    keys_processed: &mut usize,
282) -> Result<usize> {
283    if options.max_keys > 0 && *keys_processed >= options.max_keys {
284        // Skip remaining merge
285        return overlay.skip_value(idx);
286    }
287
288    let node = overlay.node_at(idx);
289
290    let Some(n) = node else {
291        return Ok(idx + 1);
292    };
293
294    if let TapeNodeKind::ObjectStart { count } = n.kind {
295        // Check depth limit
296        if options.max_depth > 0 {
297            // Would need depth tracking - skip for now
298        }
299
300        // Ensure target is object
301        if !target.is_object() {
302            *target = Value::Object(Map::new());
303        }
304
305        let target_obj = target.as_object_mut().unwrap();
306        let mut current_idx = idx + 1;
307
308        for _ in 0..count {
309            if options.max_keys > 0 && *keys_processed >= options.max_keys {
310                break;
311            }
312
313            // Get key
314            let key = overlay
315                .key_at(current_idx)
316                .map(std::borrow::Cow::into_owned)
317                .unwrap_or_default();
318            current_idx += 1;
319
320            *keys_processed += 1;
321
322            // Check for null (deletion)
323            if overlay.value_at(current_idx) == Some(TapeValue::Null) {
324                target_obj.remove(&key);
325                current_idx += 1;
326            } else if let Some(inner_node) = overlay.node_at(current_idx) {
327                if matches!(inner_node.kind, TapeNodeKind::ObjectStart { .. }) {
328                    // Recursive merge
329                    let entry = target_obj
330                        .entry(key)
331                        .or_insert_with(|| Value::Object(Map::new()));
332                    current_idx =
333                        streaming_merge_at(entry, overlay, current_idx, options, keys_processed)?;
334                } else {
335                    // Convert and replace
336                    let (value, next) = convert_tape_value(overlay, current_idx)?;
337                    target_obj.insert(key, value);
338                    current_idx = next;
339                }
340            } else {
341                current_idx = overlay.skip_value(current_idx)?;
342            }
343        }
344
345        Ok(current_idx)
346    } else {
347        // Non-object overlay replaces target
348        let (value, next) = convert_tape_value(overlay, idx)?;
349        *target = value;
350        Ok(next)
351    }
352}
353
354/// Convert a tape node to a Value
355fn convert_tape_value<T: TapeSource>(tape: &T, idx: usize) -> Result<(Value, usize)> {
356    let node = tape.node_at(idx);
357
358    match node {
359        Some(n) => match n.kind {
360            TapeNodeKind::ObjectStart { count } => {
361                let mut map = Map::with_capacity(count);
362                let mut current_idx = idx + 1;
363
364                for _ in 0..count {
365                    let key = tape
366                        .key_at(current_idx)
367                        .map(std::borrow::Cow::into_owned)
368                        .unwrap_or_default();
369                    current_idx += 1;
370
371                    let (value, next_idx) = convert_tape_value(tape, current_idx)?;
372                    map.insert(key, value);
373                    current_idx = next_idx;
374                }
375
376                Ok((Value::Object(map), current_idx))
377            }
378
379            TapeNodeKind::ArrayStart { count } => {
380                let mut arr = Vec::with_capacity(count);
381                let mut current_idx = idx + 1;
382
383                for _ in 0..count {
384                    let (value, next_idx) = convert_tape_value(tape, current_idx)?;
385                    arr.push(value);
386                    current_idx = next_idx;
387                }
388
389                Ok((Value::Array(arr), current_idx))
390            }
391
392            TapeNodeKind::Value | TapeNodeKind::Key => {
393                let value = n.value.map_or(Value::Null, tape_value_to_json);
394                Ok((value, idx + 1))
395            }
396
397            TapeNodeKind::ObjectEnd | TapeNodeKind::ArrayEnd => Ok((Value::Null, idx + 1)),
398        },
399        None => Ok((Value::Null, idx + 1)),
400    }
401}
402
403fn tape_value_to_json(val: TapeValue<'_>) -> Value {
404    match val {
405        TapeValue::Null => Value::Null,
406        TapeValue::Bool(b) => Value::Bool(b),
407        TapeValue::Int(n) => Value::Number(n.into()),
408        TapeValue::Float(f) => serde_json::Number::from_f64(f).map_or(Value::Null, Value::Number),
409        TapeValue::String(s) => Value::String(s.into_owned()),
410        TapeValue::RawNumber(s) => {
411            // Try parsing as i64 first, then f64, falling back to string
412            #[allow(clippy::option_if_let_else)]
413            // Chained if-let-else is clearer for fallback parsing
414            if let Ok(n) = s.parse::<i64>() {
415                Value::Number(n.into())
416            } else if let Ok(f) = s.parse::<f64>() {
417                serde_json::Number::from_f64(f)
418                    .map_or_else(|| Value::String(s.into_owned()), Value::Number)
419            } else {
420                Value::String(s.into_owned())
421            }
422        }
423    }
424}
425
426// ============================================================================
427// Tests
428// ============================================================================
429
430#[cfg(test)]
431mod tests {
432    use super::*;
433    use fionn_tape::DsonTape;
434
435    fn parse_json(s: &str) -> DsonTape {
436        DsonTape::parse(s).expect("valid JSON")
437    }
438
439    #[test]
440    fn test_merge_tapes_add_field() {
441        let base = parse_json(r#"{"a": 1}"#);
442        let overlay = parse_json(r#"{"b": 2}"#);
443
444        let result = merge_tapes(&base, &overlay).unwrap();
445
446        assert_eq!(result["a"], 1);
447        assert_eq!(result["b"], 2);
448    }
449
450    #[test]
451    fn test_merge_tapes_replace_field() {
452        let base = parse_json(r#"{"a": 1}"#);
453        let overlay = parse_json(r#"{"a": 2}"#);
454
455        let result = merge_tapes(&base, &overlay).unwrap();
456
457        assert_eq!(result["a"], 2);
458    }
459
460    #[test]
461    fn test_merge_tapes_delete_field() {
462        let base = parse_json(r#"{"a": 1, "b": 2}"#);
463        let overlay = parse_json(r#"{"a": null}"#);
464
465        let result = merge_tapes(&base, &overlay).unwrap();
466
467        assert!(result.get("a").is_none());
468        assert_eq!(result["b"], 2);
469    }
470
471    #[test]
472    fn test_merge_tapes_nested() {
473        let base = parse_json(r#"{"user": {"name": "Alice", "age": 30}}"#);
474        let overlay = parse_json(r#"{"user": {"age": null, "city": "NYC"}}"#);
475
476        let result = merge_tapes(&base, &overlay).unwrap();
477
478        assert_eq!(result["user"]["name"], "Alice");
479        assert!(result["user"].get("age").is_none());
480        assert_eq!(result["user"]["city"], "NYC");
481    }
482
483    #[test]
484    fn test_deep_merge_tapes_preserves_null() {
485        let base = parse_json(r#"{"a": 1}"#);
486        let overlay = parse_json(r#"{"a": null, "b": 2}"#);
487
488        let result = deep_merge_tapes(&base, &overlay).unwrap();
489
490        assert!(result["a"].is_null());
491        assert_eq!(result["b"], 2);
492    }
493
494    #[test]
495    fn test_deep_merge_tapes_nested() {
496        let base = parse_json(r#"{"outer": {"x": 1, "y": 2}}"#);
497        let overlay = parse_json(r#"{"outer": {"y": 20, "z": 3}}"#);
498
499        let result = deep_merge_tapes(&base, &overlay).unwrap();
500
501        assert_eq!(result["outer"]["x"], 1);
502        assert_eq!(result["outer"]["y"], 20);
503        assert_eq!(result["outer"]["z"], 3);
504    }
505
506    #[test]
507    fn test_merge_tapes_array_replacement() {
508        let base = parse_json(r#"{"arr": [1, 2, 3]}"#);
509        let overlay = parse_json(r#"{"arr": [4, 5]}"#);
510
511        let result = merge_tapes(&base, &overlay).unwrap();
512
513        let arr = result["arr"].as_array().unwrap();
514        assert_eq!(arr.len(), 2);
515        assert_eq!(arr[0], 4);
516        assert_eq!(arr[1], 5);
517    }
518
519    #[test]
520    fn test_merge_tape_into_value() {
521        let mut value = serde_json::json!({"a": 1});
522        let overlay = parse_json(r#"{"b": 2, "c": 3}"#);
523
524        merge_tape_into_value(&mut value, &overlay).unwrap();
525
526        assert_eq!(value["a"], 1);
527        assert_eq!(value["b"], 2);
528        assert_eq!(value["c"], 3);
529    }
530
531    #[test]
532    fn test_streaming_merge_basic() {
533        let base = parse_json(r#"{"a": 1}"#);
534        let overlay = parse_json(r#"{"b": 2}"#);
535        let options = StreamingMergeOptions::new();
536
537        let result = streaming_merge(&base, &overlay, &options).unwrap();
538
539        assert_eq!(result["a"], 1);
540        assert_eq!(result["b"], 2);
541    }
542
543    #[test]
544    fn test_streaming_merge_with_max_keys() {
545        let base = parse_json(r#"{"a": 1}"#);
546        let overlay = parse_json(r#"{"b": 2, "c": 3, "d": 4}"#);
547        let options = StreamingMergeOptions::new().with_max_keys(2);
548
549        let result = streaming_merge(&base, &overlay, &options).unwrap();
550
551        // Should have base + limited overlay keys
552        assert_eq!(result["a"], 1);
553        // Only some overlay keys should be merged due to limit
554    }
555
556    #[test]
557    fn test_merge_empty_tapes() {
558        let base = parse_json(r"{}");
559        let overlay = parse_json(r"{}");
560
561        let result = merge_tapes(&base, &overlay).unwrap();
562
563        assert!(result.is_object());
564        assert!(result.as_object().unwrap().is_empty());
565    }
566
567    #[test]
568    fn test_merge_into_empty_base() {
569        let base = parse_json(r"{}");
570        let overlay = parse_json(r#"{"a": 1, "b": 2}"#);
571
572        let result = merge_tapes(&base, &overlay).unwrap();
573
574        assert_eq!(result["a"], 1);
575        assert_eq!(result["b"], 2);
576    }
577
578    #[test]
579    fn test_merge_complex_nested() {
580        let base = parse_json(
581            r#"{"config": {"server": {"host": "localhost", "port": 8080}, "database": {"url": "db://local"}}}"#,
582        );
583        let overlay =
584            parse_json(r#"{"config": {"server": {"port": 9000}, "logging": {"level": "debug"}}}"#);
585
586        let result = merge_tapes(&base, &overlay).unwrap();
587
588        assert_eq!(result["config"]["server"]["host"], "localhost");
589        assert_eq!(result["config"]["server"]["port"], 9000);
590        assert_eq!(result["config"]["database"]["url"], "db://local");
591        assert_eq!(result["config"]["logging"]["level"], "debug");
592    }
593}