merkle_search_tree/
diff.rs

1//! Tree difference calculation algorithm & associated types.
2
3mod diff_builder;
4mod page_range;
5mod page_range_snapshot;
6mod range_list;
7
8use std::{fmt::Debug, iter::Peekable};
9
10pub use page_range::*;
11pub use page_range_snapshot::*;
12
13use crate::diff::diff_builder::DiffListBuilder;
14
15/// An inclusive range of keys that differ between two serialised ordered sets
16/// of [`PageRange`].
17#[derive(Debug, PartialEq)]
18pub struct DiffRange<'a, K> {
19    /// The inclusive start & end key bounds of this range to sync.
20    start: &'a K,
21    end: &'a K,
22}
23
24impl<'a, K> Clone for DiffRange<'a, K> {
25    fn clone(&self) -> Self {
26        Self {
27            start: self.start,
28            end: self.end,
29        }
30    }
31}
32
33impl<'a, K> DiffRange<'a, K> {
34    /// Returns true if the range within `self` overlaps any portion of the
35    /// range within `p`.
36    pub(crate) fn overlaps(&self, p: &Self) -> bool
37    where
38        K: PartialOrd,
39    {
40        p.end() >= self.start() && p.start() <= self.end()
41    }
42
43    /// Returns the inclusive start of this [`DiffRange`], identifying the start
44    /// of an inconsistency between trees.
45    pub fn start(&self) -> &K {
46        self.start
47    }
48
49    /// Returns the inclusive end of this [`DiffRange`], identifying the end of
50    /// an inconsistency between trees.
51    pub fn end(&self) -> &K {
52        self.end
53    }
54}
55
56/// Compute the difference between `local` and `peer`, returning the set of
57/// [`DiffRange`] covering the inconsistent key ranges found in `peer`.
58///
59/// ```rust
60/// use merkle_search_tree::{MerkleSearchTree, diff::diff};
61///
62/// // Initialise a "peer" tree.
63/// let mut node_a = MerkleSearchTree::default();
64/// node_a.upsert("bananas", &42);
65/// node_a.upsert("plátanos", &42);
66///
67/// // Initialise the "local" tree with differing keys
68/// let mut node_b = MerkleSearchTree::default();
69/// node_b.upsert("donkey", &42);
70///
71/// // Generate the tree hashes before serialising the page ranges
72/// node_a.root_hash();
73/// node_b.root_hash();
74///
75/// // Generate the tree page bounds & hashes, and feed into the diff function
76/// let diff_range = diff(
77///     node_b.serialise_page_ranges().unwrap().into_iter(),
78///     node_a.serialise_page_ranges().unwrap().into_iter(),
79/// );
80///
81/// // The diff_range contains all the inclusive key intervals the "local" tree
82/// // should fetch from the "peer" tree to converge.
83/// assert_matches::assert_matches!(diff_range.as_slice(), [range] => {
84///     assert_eq!(range.start(), &"bananas");
85///     assert_eq!(range.end(), &"plátanos");
86/// });
87/// ```
88///
89/// # State Convergence
90///
91/// To converge the state of the two trees, the key ranges in the returned
92/// [`DiffRange`] instances should be requested from `peer` and used to update
93/// the state of `local`.
94///
95/// If `local` is a superset of `peer` (contains all the keys in `peer` and the
96/// values are consistent), or the two trees are identical, no [`DiffRange`]
97/// intervals are returned.
98///
99/// # Termination
100///
101/// A single invocation to [`diff()`] always terminates, and completes in `O(n)`
102/// time and space. Inconsistent page ranges (if any) are minimised in
103/// `O(n_consistent * n_inconsistent)` time and `O(n)` space.
104///
105/// In the absence of further external updates to either tree, this algorithm
106/// terminates (leaving `local` and `peer` fully converged) and no diff is
107/// returned within a finite number of sync rounds between the two trees.
108///
109/// If a one-way sync is performed (pulling inconsistent keys from `peer` and
110/// updating `local`, but never syncing the other way around) this algorithm MAY
111/// NOT terminate.
112pub fn diff<'p, 'a: 'p, T, U, K>(local: T, peer: U) -> Vec<DiffRange<'p, K>>
113where
114    T: IntoIterator<Item = PageRange<'a, K>>,
115    U: IntoIterator<Item = PageRange<'p, K>>,
116    K: PartialOrd + Ord + Debug + 'p + 'a,
117{
118    let local = local.into_iter();
119    let peer = peer.into_iter();
120
121    // Any two merkle trees can be expressed as a series of overlapping page
122    // ranges, either consistent in content (hashes match), or inconsistent
123    // (hashes differ).
124    //
125    // This algorithm builds two sets of intervals - one for key ranges that are
126    // fully consistent between the two trees, and one for inconsistent ranges.
127    //
128    // This DiffListBuilder helps construct these lists, and merges them into a
129    // final, non-overlapping, deduplicated, and minimised set of ranges that
130    // are inconsistent between trees as described above.
131    let mut diff_builder = DiffListBuilder::default();
132
133    let mut local = local.peekable();
134    let mut peer = peer.peekable();
135
136    debug!("calculating diff");
137
138    let root = match peer.peek() {
139        Some(v) => v.clone(),
140        None => return vec![],
141    };
142
143    recurse_diff(&root, &mut peer, &mut local, &mut diff_builder);
144
145    diff_builder.into_diff_vec()
146}
147
148#[cfg_attr(any(test, feature = "tracing"), tracing::instrument(skip(peer, local)))]
149fn recurse_subtree<'p, 'a: 'p, T, U, K>(
150    subtree_root: &PageRange<'p, K>,
151    peer: &mut Peekable<U>,
152    local: &mut Peekable<T>,
153    diff_builder: &mut DiffListBuilder<'p, K>,
154) -> bool
155where
156    T: Iterator<Item = PageRange<'a, K>>,
157    U: Iterator<Item = PageRange<'p, K>>,
158    K: PartialOrd + Ord + Debug + 'p + 'a,
159{
160    // Recurse into the subtree, which will exit immediately if the next value
161    // in peer is not rooted at subtree_root (without consuming the iter value).
162    recurse_diff(subtree_root, peer, local, diff_builder);
163
164    // Invariant - when returning from this call, the entire subtree rooted at
165    // the peer_subtree_root should have been evaluated and the next peer page
166    // (if any) escapes the subtree.
167
168    while let Some(p) = peer.next_if(|v| subtree_root.is_superset_of(v)) {
169        debug!(
170            peer_page=?p,
171            "requesting unevaluated subtree page"
172        );
173        // Add all the un-evaluated peer sub-tree pages to the sync list.
174        diff_builder.inconsistent(p.start(), p.end());
175    }
176
177    debug_assert!(peer
178        .peek()
179        .map(|v| !subtree_root.is_superset_of(v))
180        .unwrap_or(true));
181
182    true
183}
184
185#[cfg_attr(any(test, feature = "tracing"), tracing::instrument(skip(peer, local)))]
186fn recurse_diff<'p, 'a: 'p, T, U, K>(
187    subtree_root: &PageRange<'p, K>,
188    peer: &mut Peekable<U>,
189    local: &mut Peekable<T>,
190    diff_builder: &mut DiffListBuilder<'p, K>,
191) where
192    T: Iterator<Item = PageRange<'a, K>>,
193    U: Iterator<Item = PageRange<'p, K>>,
194    K: PartialOrd + Ord + Debug + 'p + 'a,
195{
196    // The last visited peer page, if any.
197    let mut last_p = None;
198
199    // Process this subtree, descending down inconsistent paths recursively, and
200    // iterating through the tree.
201    loop {
202        let p = match maybe_advance_within(subtree_root, peer) {
203            Some(v) => v,
204            None => {
205                trace!("no more peer pages in subtree");
206                return;
207            }
208        };
209
210        let mut l = match maybe_advance_within(&p, local) {
211            Some(v) => v,
212            None => {
213                // If the local subtree range is a superset of the peer subtree
214                // range, the two are guaranteed to be inconsistent due to the
215                // local node containing more keys (potentially the sole cause
216                // of that inconsistency).
217                //
218                // Fetching any pages from the less-up-to-date peer may be
219                // spurious, causing no useful advancement of state.
220                if let Some(local) = local.peek() {
221                    if local.is_superset_of(&p) {
222                        trace!(
223                            peer_page=?p,
224                            local_page=?local,
225                            "local page is a superset of peer"
226                        );
227                        return;
228                    }
229                }
230
231                // If there's no matching local page that overlaps with p, then
232                // there must be be one or more keys to be synced from the peer
233                // to populate the missing local pages.
234                //
235                // Request the range starting from the end of the last checked p
236                // (last_p), or the start of the subtree_root if none.
237                let start = last_p
238                    .as_ref()
239                    .map(|v: &PageRange<'_, K>| v.end())
240                    .unwrap_or(subtree_root.start());
241                // And end at the next local page key, or the page end.
242                //
243                // Any pages missing between p.end and the end of this subtree
244                // will be added by the caller (recurse_subtree).
245                let end = local
246                    .peek()
247                    .map(|v| v.start().min(p.end()))
248                    .unwrap_or(p.end());
249                if end >= start {
250                    debug!(
251                        peer_page=?p,
252                        "no more local pages in subtree - requesting missing page ranges"
253                    );
254                    diff_builder.inconsistent(start, end);
255                } else {
256                    trace!(
257                        peer_page=?p,
258                        "no more local pages in subtree"
259                    );
260                }
261
262                return;
263            }
264        };
265
266        last_p = Some(p.clone());
267
268        // Never escape the subtree rooted at p_parent.
269        //
270        // Disable for fuzzing to test with fully random inputs.
271        #[cfg(not(fuzzing))]
272        debug_assert!(subtree_root.is_superset_of(&p));
273
274        trace!(
275            peer_page=?p,
276            local_page=?l,
277            "visit page"
278        );
279
280        // Advance the local cursor to minimise the comparable range, in turn
281        // minimising the sync range.
282        while let Some(v) = local.next_if(|v| v.is_superset_of(&p)) {
283            trace!(
284                peer_page=?p,
285                skip_local_page=?l,
286                local_page=?v,
287                "shrink local diff range"
288            );
289            l = v;
290        }
291
292        if l.hash() == p.hash() {
293            debug!(
294                peer_page=?p,
295                local_page=?l,
296                "hash match - consistent page"
297            );
298
299            // Record this page as fully consistent.
300            diff_builder.consistent(p.start(), p.end());
301
302            // Skip visiting the pages in the subtree rooted at the current
303            // page: they're guaranteed to be consistent due to the consistent
304            // parent hash.
305            skip_subtree(&p, peer);
306        } else {
307            debug!(
308                peer_page=?p,
309                local_page=?l,
310                "hash mismatch"
311            );
312
313            diff_builder.inconsistent(p.start(), p.end());
314        }
315
316        // Evaluate the sub-tree, causing all the (consistent) child ranges to
317        // be added to the consistent list to, shrink this inconsistent range
318        // (or simply advancing through the subtree if this page is consistent).
319        recurse_subtree(&p, peer, local, diff_builder);
320    }
321}
322
323/// Return the next [`PageRange`] if it is part of the sub-tree rooted at
324/// `parent`.
325fn maybe_advance_within<'a, 'p, K, T>(
326    parent: &PageRange<'p, K>,
327    cursor: &mut Peekable<T>,
328) -> Option<PageRange<'a, K>>
329where
330    T: Iterator<Item = PageRange<'a, K>>,
331    K: PartialOrd + 'a,
332{
333    if cursor
334        .peek()
335        .map(|p| !parent.is_superset_of(p))
336        .unwrap_or_default()
337    {
338        return None;
339    }
340
341    cursor.next()
342}
343
344/// Advance `iter` to the next page that does not form part of the subtree
345/// rooted at the given `subtree_root`.
346#[inline(always)]
347fn skip_subtree<'p, T, K>(subtree_root: &PageRange<'p, K>, iter: &mut Peekable<T>)
348where
349    T: Iterator<Item = PageRange<'p, K>>,
350    K: PartialOrd + Ord + Debug + 'p,
351{
352    while iter.next_if(|v| subtree_root.is_superset_of(v)).is_some() {}
353}
354
355#[cfg(test)]
356mod tests {
357    use std::collections::HashSet;
358
359    use assert_matches::assert_matches;
360    use proptest::prelude::*;
361
362    use super::*;
363    use crate::{
364        digest::PageDigest,
365        test_util::{IntKey, Node},
366    };
367
368    const fn new_digest(lsb: u8) -> PageDigest {
369        PageDigest::new([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, lsb])
370    }
371
372    macro_rules! test_page_is_superset_of {
373        (
374			$name:ident,
375			a = [$a_start:expr, $a_end:expr],
376			b = [$b_start:expr, $b_end:expr],
377			a_is_parent = $want:expr // Should a.contains(b) be true?
378		) => {
379            paste::paste! {
380                #[test]
381                fn [<test_page_is_superset_of_ $name>]() {
382                    let a = PageRange::new(&$a_start, $a_end, new_digest(42));
383                    let b = PageRange::new(&$b_start, $b_end, new_digest(42));
384
385                    assert!(a.is_superset_of(&b) == $want);
386                }
387            }
388        };
389    }
390
391    test_page_is_superset_of!(inclusive, a = [1, &10], b = [1, &10], a_is_parent = true);
392
393    test_page_is_superset_of!(full, a = [1, &10], b = [2, &9], a_is_parent = true);
394
395    test_page_is_superset_of!(start, a = [2, &10], b = [1, &9], a_is_parent = false);
396
397    test_page_is_superset_of!(end, a = [1, &8], b = [2, &9], a_is_parent = false);
398
399    test_page_is_superset_of!(outside, a = [1, &10], b = [0, &11], a_is_parent = false);
400
401    // Tests below model this tree:
402    //
403    //                          ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐
404    //                            ┌───┬───┬───────┐
405    //                          │ │ 7 │11 │ high  │ Level 2 │
406    //                            └───┴───┴───────┘
407    //                          └ ─ ┬ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ─ ┘
408    //                         ┌────┘         └─────┐
409    //                         ▼                    ▼
410    //             ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─  ┌ ─ ─ ─ ─ ─ ─ ─ ┐
411    //               ┌───┬───┬───┐        │   ┌───┐
412    //             │ │ 3 │ 4 │ 6 │Level 1   │ │15 │ Level 1 │
413    //               └───┴───┴───┘        │   └───┘
414    //             └ ─ ┬ ─ ─ ─ ┬ ─ ─ ─ ─ ─  └ ─ ─ ─ ─ ─ ─ ─ ┘
415    //             ┌───┘       └─────┐
416    //             ▼                 ▼
417    //     ┌ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ┐
418    //       ┌───┐             ┌───┐
419    //     │ │ 2 │ Level 0 │ │ │ 5 │ Level 0 │
420    //       └───┘             └───┘
421    //     └ ─ ─ ─ ─ ─ ─ ─ ┘ └ ─ ─ ─ ─ ─ ─ ─ ┘
422
423    #[test]
424    fn test_no_diff() {
425        enable_logging!();
426
427        let local = vec![
428            PageRange::new(&2, &15, new_digest(1)),
429            PageRange::new(&2, &6, new_digest(2)),
430            PageRange::new(&2, &2, new_digest(3)),
431            PageRange::new(&5, &5, new_digest(4)),
432            PageRange::new(&15, &15, new_digest(5)),
433        ];
434
435        let peer = local.clone();
436
437        assert_matches!(diff(local, peer).as_slice(), []);
438    }
439
440    #[test]
441    fn test_diff_peer_missing_last_page() {
442        enable_logging!();
443
444        let local = vec![
445            PageRange::new(&2, &15, new_digest(1)),
446            PageRange::new(&2, &6, new_digest(2)),
447            PageRange::new(&2, &2, new_digest(3)),
448            PageRange::new(&5, &5, new_digest(4)),
449            PageRange::new(&15, &15, new_digest(5)),
450        ];
451
452        let mut peer = local.clone();
453
454        // Remove the last page
455        let _ = peer.pop().unwrap();
456
457        // Invalidate the root/parent and update the peer root range to reflect
458        // the missing last page
459        peer[0] = PageRange::new(peer[0].start(), &11, new_digest(42));
460
461        // Nothing to ask for - the peer is behind
462        assert_matches!(diff(local, peer).as_slice(), []);
463    }
464
465    #[test]
466    fn test_diff_local_missing_last_page() {
467        enable_logging!();
468
469        let mut local = vec![
470            PageRange::new(&2, &15, new_digest(1)),
471            PageRange::new(&2, &6, new_digest(2)),
472            PageRange::new(&2, &2, new_digest(3)),
473            PageRange::new(&5, &5, new_digest(4)),
474            PageRange::new(&15, &15, new_digest(5)),
475        ];
476
477        let peer = local.clone();
478
479        // Remove the last page
480        let _ = local.pop().unwrap();
481
482        // Invalidate the root/parent and update the local root range to reflect
483        // the missing last page
484        local[0] = PageRange::new(local[0].start(), &11, new_digest(42));
485
486        assert_matches!(
487            diff(local, peer).as_slice(),
488            [DiffRange { start: 6, end: 15 }]
489        );
490    }
491
492    /// The peer is missing a child page.
493    ///
494    /// This means the local tree has an inconsistent hash for the
495    /// missing-child's parent, causing it to request the full range between the
496    /// end of it and the next consistent hash (an empty sync range).
497    #[test]
498    fn test_diff_peer_missing_leaf_page() {
499        enable_logging!();
500
501        let local = vec![
502            PageRange::new(&2, &15, new_digest(1)),
503            PageRange::new(&2, &6, new_digest(2)),
504            PageRange::new(&2, &2, new_digest(3)),
505            PageRange::new(&5, &5, new_digest(4)),
506            PageRange::new(&15, &15, new_digest(5)),
507        ];
508
509        let peer = vec![
510            PageRange::new(
511                &3, // Differs
512                &15,
513                new_digest(42), // Differs
514            ),
515            PageRange::new(
516                &3, // Differs
517                &6,
518                new_digest(43), // Differs
519            ),
520            //
521            // No page containing 2 at level 0
522            PageRange::new(&5, &5, new_digest(4)),
523            PageRange::new(&15, &15, new_digest(5)),
524        ];
525
526        assert_matches!(diff(local, peer).as_slice(), []);
527    }
528
529    #[test]
530    fn test_diff_local_missing_leaf_page() {
531        enable_logging!();
532
533        let local = vec![
534            PageRange::new(
535                &3, // Differs
536                &15,
537                new_digest(42), // Differs
538            ),
539            PageRange::new(
540                &3, // Differs
541                &6,
542                new_digest(43), // Differs
543            ),
544            //
545            // No page containing 2 at level 0
546            PageRange::new(&5, &5, new_digest(4)),
547            PageRange::new(&15, &15, new_digest(5)),
548        ];
549
550        let peer = vec![
551            PageRange::new(&2, &15, new_digest(1)),
552            PageRange::new(&2, &6, new_digest(2)),
553            PageRange::new(&2, &2, new_digest(3)),
554            PageRange::new(&5, &5, new_digest(4)),
555            PageRange::new(&15, &15, new_digest(5)),
556        ];
557
558        assert_matches!(
559            diff(local, peer).as_slice(),
560            [DiffRange { start: 2, end: 15 }]
561        );
562    }
563
564    #[test]
565    fn test_diff_local_missing_subtree() {
566        enable_logging!();
567
568        let local = vec![
569            PageRange::new(
570                &3,
571                &15,
572                new_digest(42), // Differs
573            ),
574            PageRange::new(&15, &15, new_digest(5)),
575        ];
576
577        let peer = vec![
578            PageRange::new(&2, &15, new_digest(1)),
579            PageRange::new(&2, &6, new_digest(2)),
580            PageRange::new(&2, &2, new_digest(3)),
581            PageRange::new(&5, &5, new_digest(4)),
582            PageRange::new(&15, &15, new_digest(5)),
583        ];
584
585        assert_matches!(
586            diff(local, peer).as_slice(),
587            [DiffRange { start: 2, end: 15 }]
588        );
589    }
590
591    #[test]
592    fn test_diff_peer_missing_subtree() {
593        enable_logging!();
594
595        let local = vec![
596            PageRange::new(&2, &15, new_digest(1)),
597            PageRange::new(&2, &6, new_digest(2)),
598            PageRange::new(&2, &2, new_digest(3)),
599            PageRange::new(&5, &5, new_digest(4)),
600            PageRange::new(&15, &15, new_digest(5)),
601        ];
602
603        let peer = vec![
604            PageRange::new(
605                &3,
606                &15,
607                new_digest(42), // Differs
608            ),
609            PageRange::new(&15, &15, new_digest(5)),
610        ];
611
612        assert_matches!(diff(local, peer).as_slice(), []);
613    }
614
615    #[test]
616    fn test_diff_leaf_page_hash() {
617        enable_logging!();
618
619        let peer = vec![
620            PageRange::new(
621                &2,
622                &15,
623                new_digest(42), // Differs
624            ),
625            PageRange::new(
626                &2,
627                &6,
628                new_digest(42), // Differs
629            ),
630            PageRange::new(&2, &2, new_digest(3)),
631            PageRange::new(&5, &5, new_digest(4)),
632            PageRange::new(&15, &15, new_digest(5)),
633        ];
634
635        let local = vec![
636            PageRange::new(&2, &15, new_digest(1)),
637            PageRange::new(&2, &6, new_digest(2)),
638            PageRange::new(&2, &2, new_digest(3)),
639            PageRange::new(&5, &5, new_digest(4)),
640            PageRange::new(&15, &15, new_digest(5)),
641        ];
642
643        assert_matches!(
644            diff(local, peer).as_slice(),
645            // Range (3,6) is optimal, but this gets the job done.
646            [DiffRange { start: 2, end: 15 }]
647        );
648    }
649
650    #[test]
651    fn test_diff_peer_extra_key_last_page() {
652        enable_logging!();
653
654        let local = vec![
655            PageRange::new(&2, &15, new_digest(1)),
656            PageRange::new(&2, &6, new_digest(2)),
657            PageRange::new(&2, &2, new_digest(3)),
658            PageRange::new(&5, &5, new_digest(4)),
659            PageRange::new(&15, &15, new_digest(5)),
660        ];
661
662        let mut peer = local.clone();
663        let end = peer.last_mut().unwrap();
664        *end = PageRange::new(end.start(), &16, new_digest(42));
665
666        // Root hash differs to reflect differing child
667        peer[0] = PageRange::new(peer[0].start(), &16, new_digest(42));
668
669        assert_matches!(
670            diff(local, peer).as_slice(),
671            [DiffRange { start: 6, end: 16 }]
672        );
673    }
674
675    #[test]
676    fn test_diff_root_page_hash() {
677        enable_logging!();
678
679        let local = vec![
680            PageRange::new(&2, &15, new_digest(1)),
681            PageRange::new(&2, &6, new_digest(2)),
682            PageRange::new(&2, &2, new_digest(3)),
683            PageRange::new(&5, &5, new_digest(4)),
684            PageRange::new(&15, &15, new_digest(5)),
685        ];
686
687        let mut peer = local.clone();
688
689        // Root hash differs due to added key 8
690        peer[0] = PageRange::new(peer[0].start(), peer[0].end(), new_digest(42));
691
692        // Without the reduce_sync_range optimisation, this root inconsistency
693        // would cause a fetch against the whole tree (start: 2, end: 15).
694        //
695        // Instead, the known-good sub-tree pages can be removed from the sync
696        // range.
697        assert_matches!(
698            diff(local, peer).as_slice(),
699            [DiffRange { start: 6, end: 15 }]
700        );
701    }
702
703    #[test]
704    fn test_diff_peer_intermediate_bounds() {
705        enable_logging!();
706
707        // This breaks the convention of the same tree being used, and instead
708        // pushes 7 down into level 1.
709        //
710        // It appears in the peer only.
711
712        let local = vec![
713            PageRange::new(&2, &15, new_digest(1)),
714            PageRange::new(&2, &6, new_digest(2)),
715            PageRange::new(&2, &2, new_digest(3)),
716            PageRange::new(&5, &5, new_digest(4)),
717            PageRange::new(&15, &15, new_digest(5)),
718        ];
719
720        let mut peer = local.clone();
721
722        // Root hash differs due to added key 8
723        peer[1] = PageRange::new(peer[1].start(), &7, new_digest(42));
724
725        peer[0] = PageRange::new(peer[0].start(), peer[0].end(), new_digest(42));
726
727        assert_matches!(
728            diff(local, peer).as_slice(),
729            [DiffRange { start: 2, end: 15 }]
730        );
731    }
732
733    #[test]
734    fn test_diff_peer_intermediate_bounds_and_inconsistent_subtree_leaf() {
735        enable_logging!();
736
737        // This breaks the convention of the same tree being used, and instead
738        // pushes 7 down into level 1.
739        //
740        // It appears in the peer only, and additionally the value of 2 is
741        // modified.
742
743        let local = vec![
744            PageRange::new(&2, &15, new_digest(1)),
745            PageRange::new(&2, &6, new_digest(2)),
746            PageRange::new(&2, &2, new_digest(3)),
747            PageRange::new(&5, &5, new_digest(4)),
748            PageRange::new(&15, &15, new_digest(5)),
749        ];
750
751        let mut peer = local.clone();
752
753        // Extend key range of 1st child to 2-6 to 2-7
754        peer[1] = PageRange::new(peer[1].start(), &7, new_digest(42));
755
756        // Key 2 value change
757        peer[2] = PageRange::new(peer[2].start(), peer[2].end(), new_digest(42));
758
759        // Root hash
760        peer[0] = PageRange::new(peer[0].start(), peer[0].end(), new_digest(42));
761
762        assert_matches!(
763            diff(local, peer.clone()).as_slice(),
764            [DiffRange { start: 2, end: 15 }]
765        );
766
767        let mut local = peer.clone();
768
769        // Only 2 should remain different - reset the hash.
770        local[2] = PageRange::new(local[2].start(), local[2].end(), new_digest(3));
771        peer[1] = PageRange::new(peer[1].start(), peer[1].end(), new_digest(2));
772        peer[0] = PageRange::new(peer[0].start(), peer[0].end(), new_digest(1));
773
774        // 2, 15 because the root page is inconsistent and there's no consistent
775        // pages that shrink the range.
776        assert_matches!(
777            diff(local, peer).as_slice(),
778            [DiffRange { start: 2, end: 15 }]
779        );
780    }
781
782    /// Construct a test where a child page is inconsistent and should not call
783    /// recurse_diff() as there's no subtree to evaluate.
784    ///
785    /// Additionally another child page is also inconsistent but skipped because
786    /// the local node has a larger key range than the peer.
787    #[test]
788    fn test_child_page_inconsistent_no_subtree_recurse() {
789        enable_logging!();
790
791        let local = vec![
792            PageRange::new(&0, &17995215864353464453_usize, new_digest(1)),
793            PageRange::new(&0, &1331283967702353742, new_digest(2)),
794            PageRange::new(
795                &2425302987964992968,
796                &3632803506728089373, // Larger key range than peer
797                new_digest(3),
798            ),
799            PageRange::new(
800                &4706903583207578752, // Shorter key range than peer (missing first key)
801                &4707132771120484774,
802                new_digest(4),
803            ),
804            PageRange::new(&17995215864353464453, &17995215864353464453, new_digest(5)),
805        ];
806        let peer = vec![
807            PageRange::new(
808                &0,
809                &17995215864353464453_usize,
810                new_digest(11), // Differs
811            ),
812            PageRange::new(&0, &1331283967702353742, new_digest(2)),
813            PageRange::new(
814                &2425302987964992968,
815                &3541571342636567061,
816                new_digest(13), // Differs
817            ),
818            PageRange::new(
819                &3632803506728089373,
820                &4707132771120484774,
821                new_digest(14), // Differs
822            ),
823            PageRange::new(&17995215864353464453, &17995215864353464453, new_digest(5)),
824        ];
825
826        assert_matches!(
827            diff(local, peer).as_slice(),
828            [DiffRange {
829                start: 1331283967702353742,
830                end: 17995215864353464453
831            }]
832        );
833    }
834
835    // If the bounds of the peer page exceed that of the local page on both
836    // sides, make sure both sides are requested to minimise round trips.
837    #[test]
838    fn test_diff_peer_bounds_larger_both_sides() {
839        enable_logging!();
840
841        let local = vec![PageRange::new(&2, &15, new_digest(1))];
842        let peer = vec![PageRange::new(&1, &42, new_digest(2))];
843
844        assert_matches!(
845            diff(local, peer).as_slice(),
846            [DiffRange { start: 1, end: 42 }]
847        );
848    }
849
850    #[test]
851    fn test_diff_empty_peer() {
852        enable_logging!();
853
854        let peer = vec![];
855        let local = vec![PageRange::new(&1, &42, new_digest(1))];
856
857        assert_matches!(diff(local, peer).as_slice(), []);
858    }
859
860    #[test]
861    fn test_diff_empty_local() {
862        enable_logging!();
863
864        let local = vec![];
865        let peer = vec![PageRange::new(&1, &42, new_digest(1))];
866
867        assert_matches!(
868            diff(local, peer).as_slice(),
869            [DiffRange { start: 1, end: 42 }]
870        );
871    }
872
873    #[test]
874    fn test_trivial_sync_differing_values() {
875        enable_logging!();
876
877        let mut a = Node::default();
878        a.upsert(IntKey::new(42), 1);
879
880        let mut b = Node::default();
881        b.upsert(IntKey::new(42), 2);
882
883        assert_eq!(sync_round(&mut a, &mut b), 1);
884        assert_eq!(sync_round(&mut a, &mut b), 0);
885
886        assert_eq!(sync_round(&mut a, &mut b), 0);
887        assert_eq!(sync_round(&mut a, &mut b), 0);
888
889        assert_eq!(a, b);
890    }
891
892    #[test]
893    fn test_trivial_sync_differing_keys() {
894        enable_logging!();
895
896        let mut a = Node::default();
897        a.upsert(IntKey::new(42), 1);
898
899        let mut b = Node::default();
900        b.upsert(IntKey::new(24), 1);
901
902        assert_eq!(sync_round(&mut a, &mut b), 0, "a => b");
903        assert_eq!(sync_round(&mut a, &mut b), 0, "a => b");
904        assert_eq!(sync_round(&mut b, &mut a), 1, "b => a");
905        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a");
906        assert_eq!(sync_round(&mut a, &mut b), 2, "a => b");
907        assert_eq!(sync_round(&mut a, &mut b), 0, "a => b");
908        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a");
909        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a");
910
911        assert_eq!(a, b);
912    }
913
914    /// Test the case where the local root page is a superset of the peer.
915    ///
916    /// This is an example of the peer needing to sync in order for the local
917    /// node to sync the missing peer keys. Because the range bounds do not
918    /// match, the peer -> local sync is not attempted until the peer has
919    /// obtained the local keys, making the page ranges match, allowing the page
920    /// to be peer -> local sync to complete.
921    #[test]
922    fn test_local_superset_of_peer() {
923        enable_logging!();
924
925        let mut a = Node::default();
926        a.upsert(IntKey::new(244067356035258375), 0);
927
928        let mut b = Node::default();
929        b.upsert(IntKey::new(0), 0);
930        b.upsert(IntKey::new(2750749774246655017), 0);
931
932        assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 1");
933        assert_eq!(sync_round(&mut b, &mut a), 2, "b => a run 1");
934        assert_eq!(sync_round(&mut a, &mut b), 3, "a => b run 2");
935        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 2");
936        assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 3");
937        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 3");
938
939        assert_eq!(a, b);
940    }
941
942    // Construct a test with a level 2 root node that is absent in the local
943    // tree, but who's presence does not affect the min/max ranges of the root
944    // (in essence, the peer has an extra level / key than local, but ranges
945    // match).
946    #[test]
947    fn test_root_single_node_covered() {
948        enable_logging!();
949
950        // 0: 2356959391436047
951        // 1: 1827784367256368463
952        // 2: 8090434540329235177
953        // 3: 8090434540343951592
954        let mut a = Node::default();
955        a.upsert(IntKey::new(2356959391436047), 0);
956        a.upsert(IntKey::new(8090434540343951592), 0);
957
958        // 2356959391436047 is lt subtree of 8090434540343951592
959
960        // pull two subtrees:
961        //   * start range mismatch for 0 -> 1, 2 -> 3
962        //   * end range mismatch
963
964        // this should complete B, but doesn't include the value in between the
965        // pulled ranges (1, 2) which is a level higher.
966
967        let mut b = Node::default();
968        b.upsert(IntKey::new(1827784367256368463), 0);
969        b.upsert(IntKey::new(8090434540329235177), 0);
970
971        assert_eq!(sync_round(&mut a, &mut b), 2, "a => b run 1");
972        assert_eq!(sync_round(&mut b, &mut a), 4, "b => a run 1");
973
974        assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 2");
975        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 2");
976
977        assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 3");
978        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 3");
979
980        assert_eq!(a, b);
981    }
982
983    /// One node has a tree range that is a superset of the other.
984    #[test]
985    fn test_superset() {
986        enable_logging!();
987
988        // 0
989        // 1479827427186972579
990        // 6895546778622627890
991        // 8090434540329235177
992        let mut a = Node::default();
993        a.upsert(IntKey::new(1479827427186972579), 0);
994        a.upsert(IntKey::new(6895546778622627890), 0);
995
996        let mut b = Node::default();
997        b.upsert(IntKey::new(0), 0);
998        b.upsert(IntKey::new(8090434540329235177), 0);
999
1000        assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 1");
1001        // B contains a range superset, so it believes it has everything
1002        // already.
1003
1004        assert_eq!(sync_round(&mut b, &mut a), 2, "b => a run 1");
1005        // A pulls the missing keys within B.
1006
1007        assert_eq!(sync_round(&mut a, &mut b), 4, "a => b run 2");
1008        // Subsequently B pulls all the keys from A (as B's existing keys were
1009        // within the A's range now being fetched due to a hash mismatch).
1010
1011        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 2");
1012        assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 3");
1013        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 3");
1014
1015        assert_eq!(a, b);
1016    }
1017
1018    /// Construct a test where both roots contain a single key, both with
1019    /// differing values - each node needs to pull their peer's root key.
1020    #[test]
1021    fn test_both_roots_single_differing_node() {
1022        enable_logging!();
1023
1024        let mut a = Node::default();
1025        a.upsert(IntKey::new(3541571342636567061), 0);
1026        a.upsert(IntKey::new(4706901308862946071), 0);
1027        a.upsert(IntKey::new(4706903583207578752), 0);
1028
1029        let mut b = Node::default();
1030        b.upsert(IntKey::new(3632796868130453657), 0);
1031        b.upsert(IntKey::new(3632803506728089373), 0);
1032        b.upsert(IntKey::new(4707132771120484774), 0);
1033
1034        for _i in 0..100 {
1035            sync_round(&mut a, &mut b);
1036            sync_round(&mut b, &mut a);
1037        }
1038
1039        assert_eq!(sync_round(&mut a, &mut b), 0);
1040        assert_eq!(sync_round(&mut b, &mut a), 0);
1041
1042        assert_eq!(a, b);
1043    }
1044
1045    /// OLD: Previously ensured only the "leading edge" missing keys are fetched
1046    /// - the common case for new monotonic keys added to a tree.
1047    ///
1048    /// Disabled to reduce average sync cost.
1049    #[test]
1050    fn test_leading_edge_range_sync() {
1051        enable_logging!();
1052
1053        let mut a = Node::default();
1054        a.upsert(IntKey::new(1), 0);
1055        a.upsert(IntKey::new(2), 0);
1056        a.upsert(IntKey::new(3), 0);
1057        a.upsert(IntKey::new(4), 0);
1058        a.upsert(IntKey::new(5), 0);
1059        a.upsert(IntKey::new(6), 0);
1060        a.upsert(IntKey::new(7), 0);
1061        a.upsert(IntKey::new(8), 0);
1062        a.upsert(IntKey::new(9), 0);
1063        a.upsert(IntKey::new(10), 0);
1064
1065        let mut b = Node::default();
1066        b.upsert(IntKey::new(1), 0);
1067        b.upsert(IntKey::new(2), 0);
1068        b.upsert(IntKey::new(3), 0);
1069        b.upsert(IntKey::new(4), 0);
1070        b.upsert(IntKey::new(5), 0);
1071        b.upsert(IntKey::new(6), 0);
1072
1073        assert_eq!(sync_round(&mut a, &mut b), 10, "a => b run 1");
1074        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 1");
1075
1076        assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 2");
1077        assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 2");
1078
1079        assert_eq!(a, b);
1080    }
1081
1082    const MAX_NODE_KEYS: usize = 100;
1083
1084    prop_compose! {
1085        /// Yield a set of keys covering the full u64 range, driving randomness
1086        /// of tree node placements instead of collisions.
1087        fn arbitrary_large_key_set()(kv_pairs in prop::collection::hash_set(
1088            (any::<u64>(), any::<u64>()),
1089            0..MAX_NODE_KEYS)
1090        ) -> HashSet<(u64, u64)> {
1091            kv_pairs
1092        }
1093    }
1094
1095    prop_compose! {
1096        /// Yield a small set of keys, to arbitrarily increase the collision
1097        /// count between peers by having them generate the same keys with
1098        /// differing values.
1099        fn arbitrary_small_key_set()(kv_pairs in prop::collection::hash_set(
1100            (0_u64..50, 0_u64..50),
1101            0..MAX_NODE_KEYS)
1102        ) -> HashSet<(u64, u64)> {
1103            kv_pairs
1104        }
1105    }
1106
1107    prop_compose! {
1108        /// Yield an arbitrary [`Node`] containing up to 100 random key/value
1109        /// pairs.
1110        fn arbitrary_node()(kv_pairs in prop_oneof![
1111            arbitrary_large_key_set(),
1112            arbitrary_small_key_set()
1113        ]) -> Node {
1114            let mut node = Node::default();
1115            for (k, v) in kv_pairs {
1116                node.upsert(IntKey::new(k), v);
1117            }
1118            node
1119        }
1120    }
1121
1122    proptest! {
1123        /// Perform a synchronisation test that asserts two arbitrary trees
1124        /// (potentially containing no overlapping key/values) converge to the
1125        /// same state after repeated synchronisation rounds.
1126        #[test]
1127        fn prop_sync_trees(mut a in arbitrary_node(), mut b in arbitrary_node()) {
1128            enable_logging!();
1129
1130            // Bound the number of sync rounds needed to converge to at most 1
1131            // key being sync'd per round.
1132            let max_count = a.key_count() + b.key_count() + 1;
1133            let mut count = 0;
1134
1135            loop {
1136                // Perform a two-way sync.
1137                let a_to_b = sync_round(&mut a, &mut b);
1138                let b_to_a = sync_round(&mut b, &mut a);
1139                if a_to_b == 0 && b_to_a == 0 {
1140                    // If no progress was made, stop syncing.
1141                    break;
1142                }
1143
1144                // Syncing should never pull more than the full peer tree.
1145                assert!(a_to_b <= a.key_count());
1146                assert!(b_to_a <= b.key_count());
1147
1148                count += 1;
1149                if count >= max_count {
1150                    panic!("failed to sync a => b in round limit");
1151                }
1152            }
1153
1154            // Ensure the nodes are now consistent.
1155            assert_eq!(a, b);
1156        }
1157
1158        /// Invariant: page ranges yielded from an [`OwnedPageRange`] are
1159        /// identical to those from the borrowed [`PageRange`] equivalent.
1160        #[test]
1161        fn prop_owned_page_range_equivalent(mut a in arbitrary_node()) {
1162            enable_logging!();
1163
1164            let _ = a.root_hash();
1165            let a_ref = a.serialise_page_ranges().unwrap();
1166            let a_owned = PageRangeSnapshot::from(a_ref.clone());
1167
1168            let a_owned_iter = a_owned.iter();
1169            let a_ref_iter = a_ref.iter().cloned();
1170
1171            assert!(a_owned_iter.eq(a_ref_iter));
1172        }
1173    }
1174
1175    /// Perform a single sync round, pulling differences from a into b.
1176    ///
1177    /// Returns the number of fetched pages.
1178    fn sync_round(from: &mut Node, to: &mut Node) -> usize {
1179        // First sync b from a, applying the "a is always right" merge rule.
1180        let a2 = from.clone();
1181        let a_tree = from.page_ranges();
1182
1183        let mut to2 = to.clone();
1184        let want = diff(to2.page_ranges(), a_tree);
1185
1186        let mut count = 0;
1187        for range in want {
1188            for (k, v) in a2.key_range_iter(range.start()..=range.end()) {
1189                to.upsert(k.clone(), *v);
1190                count += 1;
1191            }
1192        }
1193
1194        count
1195    }
1196}