Skip to main content

mnem_core/prolly/
diff.rs

1//! Structural diff of two Prolly trees.
2//!
3//! The diff walks both trees simultaneously. Any pair of child CIDs that
4//! compare equal is pruned immediately - the subtrees are byte-identical by
5//! construction, so they cannot contain any differences. This is the
6//! Prolly payoff: a 10,000-entry tree with 10 edits is diffed by touching
7//! only the O(d × log n) chunks on the edited paths, not the whole tree.
8//!
9//! Output is a `Vec<DiffEntry>` sorted ascending by key. Entries describe
10//! insertions, removals, and value changes from `a` to `b`.
11
12use std::cmp::Ordering;
13
14use crate::error::Error;
15use crate::id::Cid;
16use crate::prolly::constants::ProllyKey;
17use crate::prolly::tree::{Internal, TreeChunk, load_tree_chunk};
18use crate::store::Blockstore;
19
20/// A single difference between two Prolly trees.
21#[derive(Clone, Debug, PartialEq, Eq)]
22pub enum DiffEntry {
23    /// Key is present in `b` but not `a`.
24    Added {
25        /// The new key.
26        key: ProllyKey,
27        /// Its value CID in `b`.
28        value: Cid,
29    },
30    /// Key is present in `a` but not `b`.
31    Removed {
32        /// The removed key.
33        key: ProllyKey,
34        /// Its value CID in `a`.
35        value: Cid,
36    },
37    /// Key is present in both trees but with different value CIDs.
38    Changed {
39        /// The key with a changed value.
40        key: ProllyKey,
41        /// Old value CID (`a`-side).
42        before: Cid,
43        /// New value CID (`b`-side).
44        after: Cid,
45    },
46}
47
48impl DiffEntry {
49    /// The key affected by this diff entry.
50    #[must_use]
51    pub const fn key(&self) -> &ProllyKey {
52        match self {
53            Self::Added { key, .. } | Self::Removed { key, .. } | Self::Changed { key, .. } => key,
54        }
55    }
56}
57
58/// Compute the difference between the two Prolly trees `a` and `b`.
59///
60/// Returns every added / removed / changed entry, in ascending key order.
61///
62/// # Errors
63///
64/// Propagates store and codec errors.
65pub fn diff<B: Blockstore + ?Sized>(
66    store: &B,
67    root_a: &Cid,
68    root_b: &Cid,
69) -> Result<Vec<DiffEntry>, Error> {
70    let mut out = Vec::new();
71    diff_cids(store, root_a, root_b, &mut out)?;
72    Ok(out)
73}
74
75// ---------------- Recursive core ----------------
76
77fn diff_cids<B: Blockstore + ?Sized>(
78    store: &B,
79    a: &Cid,
80    b: &Cid,
81    out: &mut Vec<DiffEntry>,
82) -> Result<(), Error> {
83    if a == b {
84        return Ok(());
85    }
86    let chunk_a = load_tree_chunk(store, a)?;
87    let chunk_b = load_tree_chunk(store, b)?;
88    match (chunk_a, chunk_b) {
89        (TreeChunk::Leaf(la), TreeChunk::Leaf(lb)) => {
90            diff_sorted_entries(&la.entries, &lb.entries, out);
91            Ok(())
92        }
93        (TreeChunk::Internal(ia), TreeChunk::Internal(ib)) => diff_internals(store, &ia, &ib, out),
94        (TreeChunk::Leaf(la), TreeChunk::Internal(ib)) => {
95            let mut flat = Vec::new();
96            collect_entries_internal(store, &ib, &mut flat)?;
97            diff_sorted_entries(&la.entries, &flat, out);
98            Ok(())
99        }
100        (TreeChunk::Internal(ia), TreeChunk::Leaf(lb)) => {
101            let mut flat = Vec::new();
102            collect_entries_internal(store, &ia, &mut flat)?;
103            diff_sorted_entries(&flat, &lb.entries, out);
104            Ok(())
105        }
106    }
107}
108
109fn diff_internals<B: Blockstore + ?Sized>(
110    store: &B,
111    ia: &Internal,
112    ib: &Internal,
113    out: &mut Vec<DiffEntry>,
114) -> Result<(), Error> {
115    // Merge the two boundary sets into a common set of key-ranges.
116    let mut all: Vec<ProllyKey> = Vec::with_capacity(ia.boundaries.len() + ib.boundaries.len());
117    all.extend_from_slice(&ia.boundaries);
118    all.extend_from_slice(&ib.boundaries);
119    all.sort();
120    all.dedup();
121
122    let n_ranges = all.len() + 1;
123    let mut last_pair: Option<(usize, usize)> = None;
124
125    for range_idx in 0..n_ranges {
126        // A range is identified by its lower boundary: range 0 has no lower
127        // bound (neg-infinity), range i>0 has lower bound `all[i - 1]`.
128        // For each side, find the child whose range overlaps this one by
129        // `partition_point(|b| b <= lower_bound)`.
130        let lower = if range_idx == 0 {
131            None
132        } else {
133            Some(all[range_idx - 1])
134        };
135        let a_idx = child_index_for(&ia.boundaries, lower.as_ref());
136        let b_idx = child_index_for(&ib.boundaries, lower.as_ref());
137
138        // Consecutive ranges that map to the same child pair were already
139        // handled by the previous iteration - skip to avoid duplicate
140        // recursion.
141        if Some((a_idx, b_idx)) == last_pair {
142            continue;
143        }
144        last_pair = Some((a_idx, b_idx));
145
146        diff_cids(store, &ia.children[a_idx], &ib.children[b_idx], out)?;
147    }
148    Ok(())
149}
150
151fn child_index_for(boundaries: &[ProllyKey], lower: Option<&ProllyKey>) -> usize {
152    match lower {
153        None => 0,
154        Some(k) => boundaries.partition_point(|b| b <= k),
155    }
156}
157
158fn collect_entries_internal<B: Blockstore + ?Sized>(
159    store: &B,
160    internal: &Internal,
161    out: &mut Vec<(ProllyKey, Cid)>,
162) -> Result<(), Error> {
163    for child_cid in &internal.children {
164        match load_tree_chunk(store, child_cid)? {
165            TreeChunk::Leaf(l) => out.extend(l.entries),
166            TreeChunk::Internal(i) => collect_entries_internal(store, &i, out)?,
167        }
168    }
169    Ok(())
170}
171
172/// Merge-diff two sorted `(key, value)` slices. Pushes [`DiffEntry`]
173/// variants to `out` in ascending key order.
174fn diff_sorted_entries(a: &[(ProllyKey, Cid)], b: &[(ProllyKey, Cid)], out: &mut Vec<DiffEntry>) {
175    let mut i = 0;
176    let mut j = 0;
177    while i < a.len() && j < b.len() {
178        match a[i].0.cmp(&b[j].0) {
179            Ordering::Less => {
180                out.push(DiffEntry::Removed {
181                    key: a[i].0,
182                    value: a[i].1.clone(),
183                });
184                i += 1;
185            }
186            Ordering::Greater => {
187                out.push(DiffEntry::Added {
188                    key: b[j].0,
189                    value: b[j].1.clone(),
190                });
191                j += 1;
192            }
193            Ordering::Equal => {
194                if a[i].1 != b[j].1 {
195                    out.push(DiffEntry::Changed {
196                        key: a[i].0,
197                        before: a[i].1.clone(),
198                        after: b[j].1.clone(),
199                    });
200                }
201                i += 1;
202                j += 1;
203            }
204        }
205    }
206    while i < a.len() {
207        out.push(DiffEntry::Removed {
208            key: a[i].0,
209            value: a[i].1.clone(),
210        });
211        i += 1;
212    }
213    while j < b.len() {
214        out.push(DiffEntry::Added {
215            key: b[j].0,
216            value: b[j].1.clone(),
217        });
218        j += 1;
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use crate::id::{CODEC_RAW, Multihash};
226    use crate::prolly::tree::build_tree;
227    use crate::store::MemoryBlockstore;
228
229    fn keyed(i: u32) -> ProllyKey {
230        let mut k = [0u8; 16];
231        k[12..16].copy_from_slice(&i.to_be_bytes());
232        ProllyKey(k)
233    }
234
235    fn val(i: u32) -> Cid {
236        Cid::new(CODEC_RAW, Multihash::sha2_256(&i.to_be_bytes()))
237    }
238
239    #[test]
240    fn diff_of_identical_roots_is_empty() {
241        let entries: Vec<_> = (0..1_000u32).map(|i| (keyed(i), val(i))).collect();
242        let store = MemoryBlockstore::new();
243        let root = build_tree(&store, entries).unwrap();
244        let changes = diff(&store, &root, &root).unwrap();
245        assert!(changes.is_empty());
246    }
247
248    #[test]
249    fn diff_detects_added_key() {
250        let base: Vec<_> = (0..100u32).map(|i| (keyed(i), val(i))).collect();
251        let mut extended = base.clone();
252        extended.push((keyed(1_000), val(1_000)));
253        let store = MemoryBlockstore::new();
254        let root_a = build_tree(&store, base).unwrap();
255        let root_b = build_tree(&store, extended).unwrap();
256        let changes = diff(&store, &root_a, &root_b).unwrap();
257        assert_eq!(
258            changes,
259            vec![DiffEntry::Added {
260                key: keyed(1_000),
261                value: val(1_000),
262            }]
263        );
264    }
265
266    #[test]
267    fn diff_detects_removed_key() {
268        let extended: Vec<_> = (0..100u32).map(|i| (keyed(i), val(i))).collect();
269        let mut base = extended.clone();
270        base.retain(|(k, _)| *k != keyed(50));
271        let store = MemoryBlockstore::new();
272        let root_a = build_tree(&store, extended).unwrap();
273        let root_b = build_tree(&store, base).unwrap();
274        let changes = diff(&store, &root_a, &root_b).unwrap();
275        assert_eq!(
276            changes,
277            vec![DiffEntry::Removed {
278                key: keyed(50),
279                value: val(50),
280            }]
281        );
282    }
283
284    #[test]
285    fn diff_detects_changed_value() {
286        let a: Vec<_> = (0..100u32).map(|i| (keyed(i), val(i))).collect();
287        let mut b = a.clone();
288        b[50].1 = val(999);
289        let store = MemoryBlockstore::new();
290        let root_a = build_tree(&store, a).unwrap();
291        let root_b = build_tree(&store, b).unwrap();
292        let changes = diff(&store, &root_a, &root_b).unwrap();
293        assert_eq!(
294            changes,
295            vec![DiffEntry::Changed {
296                key: keyed(50),
297                before: val(50),
298                after: val(999),
299            }]
300        );
301    }
302
303    #[test]
304    fn diff_detects_many_changes_in_order() {
305        let a: Vec<_> = (0..1_000u32).map(|i| (keyed(i), val(i))).collect();
306        let mut b = a.clone();
307        for i in (0..1_000u32).step_by(100) {
308            b[i as usize].1 = val(10_000 + i);
309        }
310        let store = MemoryBlockstore::new();
311        let root_a = build_tree(&store, a).unwrap();
312        let root_b = build_tree(&store, b).unwrap();
313        let changes = diff(&store, &root_a, &root_b).unwrap();
314        assert_eq!(changes.len(), 10);
315        for w in changes.windows(2) {
316            assert!(w[0].key() < w[1].key());
317        }
318        for c in &changes {
319            match c {
320                DiffEntry::Changed { .. } => {}
321                e => panic!("expected Changed, got {e:?}"),
322            }
323        }
324    }
325
326    #[test]
327    fn diff_detects_add_remove_change_mixture() {
328        // a: {0..100}. b: {1..100} + {200} + changed value at 50.
329        let a: Vec<_> = (0..100u32).map(|i| (keyed(i), val(i))).collect();
330        let mut b: Vec<(ProllyKey, Cid)> = (1..100u32).map(|i| (keyed(i), val(i))).collect();
331        // Mutate value at key=50
332        let k50_idx = b.iter().position(|(k, _)| *k == keyed(50)).unwrap();
333        b[k50_idx].1 = val(9_999);
334        // Add key=200
335        b.push((keyed(200), val(200)));
336        let store = MemoryBlockstore::new();
337        let root_a = build_tree(&store, a).unwrap();
338        let root_b = build_tree(&store, b).unwrap();
339        let changes = diff(&store, &root_a, &root_b).unwrap();
340        // Expect: Removed(0), Changed(50, 50 -> 9999), Added(200)
341        assert_eq!(changes.len(), 3);
342        assert_eq!(
343            changes[0],
344            DiffEntry::Removed {
345                key: keyed(0),
346                value: val(0),
347            }
348        );
349        assert_eq!(
350            changes[1],
351            DiffEntry::Changed {
352                key: keyed(50),
353                before: val(50),
354                after: val(9_999),
355            }
356        );
357        assert_eq!(
358            changes[2],
359            DiffEntry::Added {
360                key: keyed(200),
361                value: val(200),
362            }
363        );
364    }
365}