merkle_search_tree/
tree.rs

1use std::{fmt::Debug, marker::PhantomData, num::NonZeroU8};
2
3use siphasher::sip128::SipHasher24;
4
5use crate::{
6    diff::PageRange,
7    digest::{self, siphash::SipHasher, Hasher, RootHash, ValueDigest},
8    node::Node,
9    node_iter::NodeIter,
10    page::{insert_intermediate_page, Page},
11    visitor::{page_range_hash::PageRangeHashVisitor, Visitor},
12    UpsertResult,
13};
14
15/// An alias for the default hash implementation.
16pub(crate) type DefaultHasher = SipHasher;
17
18/// The default level base used when computing the tree level a hash belongs to.
19pub(crate) const DEFAULT_LEVEL_BASE: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(16) }; // Safety: Not 0
20
21/// An implementation of the Merkle Search Tree as described in [Merkle Search
22/// Trees: Efficient State-Based CRDTs in Open Networks][paper].
23///
24/// This implementation stores only keys directly in the tree - hashes of values
25/// are retained instead of the actual value. This allows greatest flexibility,
26/// as the user can choose the most appropriate key/value storage data
27/// structure, while using the MST strictly for anti-entropy / Merkle proofs.
28///
29/// # Merkle Search Trees
30///
31/// In addition to the normal hash & consistency properties of a regular
32/// Merkle/hash tree, a MST is a searchable balanced B-tree with variable,
33/// probabilistically bounded page sizes and a deterministic representation
34/// irrespective of insert order - these properties make a MST a useful data
35/// structure for efficient state-based CRDT replication and anti-entropy.
36///
37/// Keys are stored in sort order (from min to max) in an MST. If monotonic keys
38/// are inserted, a minimal amount of hash re-computation needs to be performed
39/// as the nodes & pages for most of the older keys remain unchanged; this
40/// reduces the overhead of anti-entropy as fewer intermediate hashes need
41/// recomputing and exchanging during reconciliation.
42///
43/// # Portability & Compatibility
44///
45/// For two [`MerkleSearchTree`] to be useful, both instances must produce
46/// identical hash digests for a given input. To do so, they must be using the
47/// same [`Hasher`] implementation, and in turn it must output a deterministic
48/// hash across all peers interacting with the [`MerkleSearchTree`].
49///
50/// For ease of use, this library uses the standard library [`Hash`] trait by
51/// default to hash key and value types. The documentation for the trait warns
52/// it is not guaranteed to produce identical hashes for the same data across
53/// different compilation platforms and Rust compiler versions.
54///
55/// If you intend to interact with peers across multiple platforms and/or Rust
56/// versions, you should consider implementing a fully-deterministic [`Hasher`]
57/// specialised to your key/value types that does not make use of the [`Hash`]
58/// trait for correctness.
59///
60/// Any change to the underlying hash construction algorithm implemented in this
61/// crate that would cause existing hashes to no longer match will not occur
62/// without a breaking change major semver version bump once this crate reaches
63/// stability (>=1.0.0).
64///
65/// # Lazy Tree Hash Generation
66///
67/// Each page within the tree maintains a cache of the pre-computed hash of
68/// itself, and the sub-tree rooted from it (all pages & nodes below it).
69/// Successive root hash queries will re-use this cached value to avoid hashing
70/// the full tree each time.
71///
72/// Upserting elements into the tree invalidates the cached hashes of the pages
73/// along the path to the leaf, and the leaf page itself. To amortise the cost
74/// of regenerating these hashes, the affected pages are marked as "dirty",
75/// causing them to be rehashed next time the root hash is requested. This
76/// allows hash regeneration to be occur once per batch of upsert operations.
77///
78/// # Example
79///
80/// ```
81/// use merkle_search_tree::MerkleSearchTree;
82///
83/// let mut t = MerkleSearchTree::default();
84/// t.upsert("bananas", &"great");
85/// t.upsert("plátano", &"muy bien");
86///
87/// // Obtain a root hash / merkle proof covering all the tree data
88/// let hash_1 = t.root_hash().to_owned();
89/// println!("{:?}", hash_1.as_ref());
90///
91/// // Modify the MST, reflecting the new value of an existing key
92/// t.upsert("bananas", &"amazing");
93///
94/// // Obtain an updated root hash
95/// let hash_2 = t.root_hash().to_owned();
96/// println!("{:?}", hash_2);
97///
98/// // The root hash changes to reflect the changed state
99/// assert_ne!(hash_1.as_ref(), hash_2.as_ref());
100/// ```
101///
102/// [paper]: https://inria.hal.science/hal-02303490
103#[derive(Debug, Clone)]
104pub struct MerkleSearchTree<K, V, H = DefaultHasher, const N: usize = 16> {
105    /// User-provided hasher implementation used for key/value digests.
106    hasher: H,
107
108    /// Internal hasher used to produce page/root digests.
109    tree_hasher: SipHasher24,
110
111    root: Page<N, K>,
112    root_hash: Option<RootHash>,
113    level_base: NonZeroU8,
114
115    _value_type: PhantomData<V>,
116}
117
118impl<K, V> Default for MerkleSearchTree<K, V> {
119    fn default() -> Self {
120        Self {
121            hasher: SipHasher::default(),
122            tree_hasher: SipHasher24::default(),
123            root: Page::new(0, vec![]),
124            root_hash: None,
125            level_base: DEFAULT_LEVEL_BASE,
126            _value_type: Default::default(),
127        }
128    }
129}
130
131impl<K, V, H, const N: usize> MerkleSearchTree<K, V, H, N> {
132    /// Initialise a new [`MerkleSearchTree`] using the provided [`Hasher`] of
133    /// keys & values.
134    #[deprecated(since = "0.8.0", note = "please use `Builder::with_hasher` instead")]
135    pub fn new_with_hasher(hasher: H) -> Self {
136        Self {
137            hasher,
138            tree_hasher: SipHasher24::default(),
139            root: Page::new(0, vec![]),
140            root_hash: None,
141            level_base: DEFAULT_LEVEL_BASE,
142            _value_type: PhantomData,
143        }
144    }
145
146    /// Return the precomputed root hash, if any.
147    ///
148    /// This method never performs any hashing - if there's no precomputed hash
149    /// available, this immediately returns [`None`]. This operation is `O(1)`.
150    ///
151    /// If this returns [`None`], call [`MerkleSearchTree::root_hash()`] to
152    /// regenerate the root hash.
153    #[inline]
154    pub fn root_hash_cached(&self) -> Option<&RootHash> {
155        self.root_hash.as_ref()
156    }
157
158    /// Perform a depth-first, in-order traversal, yielding each [`Page`] and
159    /// [`Node`] to `visitor`.
160    ///
161    /// An in-order traversal yields nodes in key order, from min to max.
162    pub fn in_order_traversal<'a, T>(&'a self, visitor: &mut T)
163    where
164        T: Visitor<'a, N, K>,
165    {
166        self.root.in_order_traversal(visitor, false);
167    }
168
169    /// Iterate over all [`Node`] in the tree in ascending key order.
170    ///
171    /// This method can be used to inspect the keys stored in the tree:
172    ///
173    /// ```
174    /// # use merkle_search_tree::*;
175    /// #
176    /// let mut t = MerkleSearchTree::default();
177    /// t.upsert("bananas1", &42);
178    /// t.upsert("bananas3", &42);
179    /// t.upsert("bananas4", &42);
180    /// t.upsert("bananas2", &42);
181    ///
182    /// // Collect the keys within the tree
183    /// let keys = t.node_iter().map(|v| *v.key()).collect::<Vec<_>>();
184    ///
185    /// // Nodes are yield in ascending key order:
186    /// assert_eq!(
187    ///     keys.as_slice(),
188    ///     ["bananas1", "bananas2", "bananas3", "bananas4"]
189    /// )
190    /// ```
191    pub fn node_iter(&self) -> impl Iterator<Item = &'_ Node<N, K>> {
192        NodeIter::new(&self.root)
193    }
194}
195
196impl<K, V, H, const N: usize> MerkleSearchTree<K, V, H, N>
197where
198    K: AsRef<[u8]>,
199{
200    /// Generate the root hash if necessary, returning the result.
201    ///
202    /// If there's a precomputed root hash, it is immediately returned.
203    ///
204    /// If no cached root hash is available all tree pages with modified child
205    /// nodes are rehashed and the resulting new root hash is returned.
206    #[inline]
207    pub fn root_hash(&mut self) -> &RootHash {
208        self.root.maybe_generate_hash(&self.tree_hasher);
209        self.root_hash = self.root.hash().cloned().map(RootHash::new);
210
211        #[cfg(feature = "digest_base64")] // Required for display impl
212        debug!(
213            root_hash=%self.root_hash.as_ref().unwrap(),
214            "regenerated root hash"
215        );
216
217        self.root_hash.as_ref().unwrap()
218    }
219
220    /// Serialise the key interval and hash covering each [`Page`] within this
221    /// tree.
222    ///
223    /// Page hashes are generated on demand - this method returns [`None`] if
224    /// the tree needs rehashing (call [`MerkleSearchTree::root_hash()`] and
225    /// retry).
226    ///
227    /// Performs a pre-order traversal of all pages within this tree and emits a
228    /// [`PageRange`] per page that covers the min/max key of the subtree at the
229    /// given page.
230    ///
231    /// The first page is the tree root, and as such has a key min/max that
232    /// equals the min/max of the keys stored within this tree.
233    ///
234    /// # Reference vs. Owned
235    ///
236    /// This method borrows the underlying keys within the tree - this avoids
237    /// cloning the keys that form the page bounds when generating the
238    /// [`PageRange`] to maximise performance, however this also prevents the
239    /// caller from mutating the tree whilst holding onto the serialised pages
240    /// (an immutable reference to the tree).
241    ///
242    /// If the key type (`K`) implements [`Clone`], a set of owned serialised
243    /// pages that do not borrow from the tree can be created by constructing a
244    /// [`PageRangeSnapshot`] from the returned [`PageRange`] array:
245    ///
246    /// ```
247    /// # use merkle_search_tree::{*, diff::*};
248    /// #
249    /// let mut t = MerkleSearchTree::default();
250    /// t.upsert("bananas", &42);
251    ///
252    /// // Rehash the tree before generating the page ranges
253    /// let _ = t.root_hash();
254    ///
255    /// // Generate the hashes & page ranges
256    /// let ranges = t.serialise_page_ranges().unwrap();
257    ///
258    /// // At this point, attempting to insert into the tree fails because the
259    /// // tree is already borrowed as immutable.
260    /// //
261    /// // Instead clone all the keys and generate a snapshot:
262    /// let snap = PageRangeSnapshot::from(ranges);
263    ///
264    /// // And the tree is free to be mutated while `snap` exists!
265    /// t.upsert("plátanos", &42);
266    ///
267    /// // The `snap` yields `PageRange` for iteration:
268    /// assert_eq!(diff(snap.iter(), snap.iter()), vec![]);
269    /// ```
270    ///
271    /// [`PageRangeSnapshot`]: crate::diff::PageRangeSnapshot
272    #[inline]
273    pub fn serialise_page_ranges(&self) -> Option<Vec<PageRange<'_, K>>>
274    where
275        K: PartialOrd,
276    {
277        // The tree hash must be up-to-date.
278        self.root_hash_cached()?;
279
280        if self.root.nodes().is_empty() {
281            return Some(vec![]);
282        }
283
284        let mut v = PageRangeHashVisitor::default();
285        self.root.in_order_traversal(&mut v, false);
286        Some(v.finalise())
287    }
288}
289
290impl<K, V, H, const N: usize> MerkleSearchTree<K, V, H, N>
291where
292    K: PartialOrd,
293    H: Hasher<N, K> + Hasher<N, V>,
294{
295    /// Add or update the value for `key`.
296    ///
297    /// This method invalidates the cached, precomputed root hash value, if any
298    /// (even if the value is not modified).
299    ///
300    /// # Value Hash
301    ///
302    /// The tree stores a the hashed representation of `value` - the actual
303    /// value is not stored in the tree.
304    #[inline]
305    pub fn upsert(&mut self, key: K, value: &'_ V) {
306        let value_hash = ValueDigest::new(self.hasher.hash(value));
307        let level = digest::level(&self.hasher.hash(&key), self.level_base);
308
309        // Invalidate the root hash - it always changes when a key is upserted.
310        self.root_hash = None;
311
312        if let UpsertResult::InsertIntermediate(key) =
313            self.root.upsert(key, level, value_hash.clone())
314        {
315            // As an optimisation and simplification, if the current root is
316            // empty, simply replace it with the new root.
317            if self.root.nodes().is_empty() {
318                let node = Node::new(key, value_hash, None);
319                self.root = Page::new(level, vec![node]);
320                return;
321            }
322
323            insert_intermediate_page(&mut &mut self.root, key, level, value_hash);
324        }
325    }
326}
327
328impl<H> crate::builder::Builder<H> {
329    /// Consume this [`Builder`] and return the configured [`MerkleSearchTree`].
330    ///
331    /// [`Builder`]: crate::builder::Builder
332    pub fn build<K, V, const N: usize>(self) -> MerkleSearchTree<K, V, H, N> {
333        MerkleSearchTree {
334            hasher: self.hasher,
335            tree_hasher: SipHasher24::default(),
336            root: Page::new(0, vec![]),
337            root_hash: None,
338            level_base: self.level_base,
339            _value_type: PhantomData,
340        }
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use std::{
347        collections::{BTreeSet, HashSet},
348        hash::Hasher as _,
349    };
350
351    use proptest::prelude::*;
352    use siphasher::sip128::Hasher128;
353
354    use super::*;
355    use crate::{
356        assert_tree,
357        builder::Builder,
358        digest::{
359            mock::{LevelKey, MockHasher},
360            Digest,
361        },
362        test_util::IntKey,
363        visitor::{
364            assert_count::InvariantAssertCount, assert_order::InvariantAssertOrder, nop::NopVisitor,
365        },
366    };
367
368    /// A hash implementation that does not rely on the stdlib Hash trait, and
369    /// therefore produces stable hashes across rust version changes /
370    /// platforms.
371    #[derive(Debug, Default)]
372    struct FixtureHasher;
373    impl Hasher<16, IntKey> for FixtureHasher {
374        fn hash(&self, value: &IntKey) -> Digest<16> {
375            self.hash(&value.unwrap())
376        }
377    }
378    impl Hasher<16, u64> for FixtureHasher {
379        fn hash(&self, value: &u64) -> Digest<16> {
380            let mut h = SipHasher24::default();
381            h.write_u64(*value);
382            Digest::new(h.finish128().as_bytes())
383        }
384    }
385
386    #[test]
387    fn test_hash_fixture() {
388        let mut t = Builder::default()
389            .with_hasher(FixtureHasher::default())
390            .build();
391
392        for i in 0..1000 {
393            t.upsert(IntKey::new(i), &i);
394        }
395
396        // This hash ensures that any changes to this construction do not result
397        // in existing hashes being invalidated / unequal for the same data.
398        let fixture_hash = [
399            57, 77, 199, 66, 89, 217, 207, 166, 136, 181, 45, 80, 108, 80, 94, 3,
400        ];
401
402        assert_eq!(t.root_hash().as_ref(), &fixture_hash);
403    }
404
405    #[test]
406    fn test_level_generation() {
407        let h = Digest::new([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
408        assert_eq!(digest::level(&h, DEFAULT_LEVEL_BASE), 32);
409
410        let h = Digest::new([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
411        assert_eq!(digest::level(&h, DEFAULT_LEVEL_BASE), 0);
412
413        let h = Digest::new([0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
414        assert_eq!(digest::level(&h, DEFAULT_LEVEL_BASE), 1);
415
416        let h = Digest::new([0, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
417        assert_eq!(digest::level(&h, DEFAULT_LEVEL_BASE), 3);
418    }
419
420    macro_rules! test_insert {
421        (
422			$name:ident,
423			values = [$(($key:expr, $value:expr)),*]
424		) => {
425            paste::paste! {
426                #[test]
427                fn [<test_ $name>]() {
428                    let mut t = Builder::default()
429                        .with_hasher(MockHasher::default())
430                        .build();
431
432					$(
433						t.upsert($key, $value);
434					)*
435
436                    assert_tree!(t)
437                }
438            }
439        };
440    }
441
442    test_insert!(one, values = [(LevelKey::new("key", 0), &"bananas")]);
443
444    test_insert!(
445        one_non_zero_level,
446        values = [(LevelKey::new("key", 4), &"bananas")]
447    );
448
449    // Assert the tree is ordered by key.
450    test_insert!(
451        two_in_order,
452        values = [
453            (LevelKey::new("A", 0), &"bananas"),
454            (LevelKey::new("B", 0), &"bananas")
455        ]
456    );
457
458    // Assert the tree becomes ordered by key.
459    test_insert!(
460        two_unordered,
461        values = [
462            (LevelKey::new("B", 0), &"bananas"),
463            (LevelKey::new("A", 0), &"bananas")
464        ]
465    );
466
467    // Insert two values
468    //
469    // Level 0 [ A ]
470    //
471    // Then insert B, which is destined for level 1, and greater than A;
472    // therefore B is placed in level 1 as the new root, and A/level 0 is
473    // attached to the lt_pointer of B.
474    test_insert!(
475        root_split_page_gt,
476        values = [
477            (LevelKey::new("A", 0), &"bananas"),
478            (LevelKey::new("B", 1), &"bananas")
479        ]
480    );
481
482    // Insert two values
483    //
484    // Level 0 [ B ]
485    //
486    // Then insert A, which is destined for level 1, and less than B;
487    // therefore A is placed in level 1 as the new root, and B/level 0 is
488    // attached to the high page of level 1 because A < B.
489    test_insert!(
490        root_split_page_lt,
491        values = [
492            (LevelKey::new("B", 0), &"bananas"),
493            (LevelKey::new("A", 1), &"bananas")
494        ]
495    );
496
497    // Insert two values, the second with a level higher than the first, causing
498    // the root page to be split, both with differing (non-consecutive) levels.
499    test_insert!(
500        root_split_non_zero_step_gt,
501        values = [
502            (LevelKey::new("A", 3), &"bananas"),
503            (LevelKey::new("B", 9), &"bananas")
504        ]
505    );
506
507    // Insert two values, the second with a level higher than the first, causing
508    // the root page to be split, both with differing (non-consecutive) levels.
509    test_insert!(
510        root_split_non_zero_step_lt,
511        values = [
512            (LevelKey::new("B", 3), &"bananas"),
513            (LevelKey::new("A", 9), &"bananas")
514        ]
515    );
516
517    // Insert elements that cause the root to split, and then the child page to
518    // split. Each successive element causes a new page to be created and added
519    // to the previous level's high page.
520    test_insert!(
521        non_root_page_split_gt,
522        values = [
523            (LevelKey::new("A", 6), &"bananas"),
524            (LevelKey::new("B", 4), &"bananas"),
525            (LevelKey::new("C", 2), &"bananas")
526        ]
527    );
528
529    test_insert!(
530        non_root_page_split_lt,
531        values = [
532            (LevelKey::new("C", 6), &"bananas"),
533            (LevelKey::new("B", 4), &"bananas"),
534            (LevelKey::new("A", 2), &"bananas")
535        ]
536    );
537
538    // Upsert for an existing key does not create more nodes.
539    test_insert!(
540        update,
541        values = [
542            (LevelKey::new("A", 6), &"bananas"),
543            (LevelKey::new("A", 6), &"platanos")
544        ]
545    );
546
547    // Upsert for an existing key does not create more nodes.
548    test_insert!(
549        split_child_into_two_empty_gte_page,
550        values = [
551            (LevelKey::new("A", 5), &"platanos"),
552            (LevelKey::new("B", 0), &"platanos"),
553            (LevelKey::new("C", 0), &"platanos"),
554            (LevelKey::new("D", 1), &"platanos")
555        ]
556    );
557
558    // Upsert for an existing key does not create more nodes.
559    test_insert!(
560        split_child_into_two_with_gte_page,
561        values = [
562            (LevelKey::new("A", 5), &"platanos"),
563            (LevelKey::new("B", 0), &"platanos"),
564            (LevelKey::new("C", 0), &"platanos"),
565            (LevelKey::new("E", 0), &"platanos"),
566            (LevelKey::new("D", 1), &"platanos")
567        ]
568    );
569
570    // Ensure that when inserting a node greater than all existing nodes in a
571    // page, the high page is split if necessary
572    test_insert!(
573        greatest_key_splits_high_page,
574        values = [
575            (LevelKey::new(11, 1), &"bananas"),
576            (LevelKey::new(10, 2), &"bananas"),
577            (LevelKey::new(12, 2), &"bananas")
578        ]
579    );
580
581    // When inserting an intermediate page, ensure the high-page of the
582    // less-than split is processed.
583    test_insert!(
584        intermediate_page_move_all_nodes_and_high_page,
585        values = [
586            (LevelKey::new(1, 1), &"bananas"),
587            (LevelKey::new(2, 1), &"bananas"),
588            (LevelKey::new(4, 0), &"bananas"),
589            (LevelKey::new(3, 2), &"bananas")
590        ]
591    );
592
593    test_insert!(
594        intermediate_page_move_all_nodes_and_high_page_subset,
595        values = [
596            (LevelKey::new(1, 1), &"bananas"),
597            (LevelKey::new(2, 1), &"bananas"),
598            (LevelKey::new(3, 0), &"bananas"),
599            (LevelKey::new(5, 0), &"bananas"),
600            (LevelKey::new(4, 2), &"bananas")
601        ]
602    );
603
604    test_insert!(
605        child_page_split_add_intermediate,
606        values = [
607            (LevelKey::new("K", 2), &"bananas"),
608            (LevelKey::new("D", 0), &"bananas"),
609            (LevelKey::new("E", 1), &"bananas")
610        ]
611    );
612
613    test_insert!(
614        equal_page_move_all_nodes_and_high_page,
615        values = [
616            (LevelKey::new(2_usize, 64), &"bananas"),
617            (LevelKey::new(5_usize, 20), &"bananas"),
618            (LevelKey::new(3_usize, 52), &"bananas"),
619            (LevelKey::new(4_usize, 64), &"bananas")
620        ]
621    );
622
623    test_insert!(
624        equal_page_move_all_nodes_and_high_page_subset,
625        values = [
626            (LevelKey::new(2_usize, 64), &"bananas"),
627            (LevelKey::new(6_usize, 20), &"bananas"),
628            (LevelKey::new(4_usize, 20), &"bananas"),
629            (LevelKey::new(3_usize, 52), &"bananas"),
630            (LevelKey::new(5_usize, 64), &"bananas")
631        ]
632    );
633
634    test_insert!(
635        split_page_all_gte_nodes_with_lt_pointer,
636        values = [
637            (LevelKey::new(1, 0), &"bananas"),
638            (LevelKey::new(0, 1), &"bananas")
639        ]
640    );
641
642    test_insert!(
643        split_page_all_lt_nodes_with_high_page,
644        values = [
645            (LevelKey::new(0, 0), &"bananas"),
646            (LevelKey::new(1, 1), &"bananas")
647        ]
648    );
649
650    test_insert!(
651        insert_intermediate_recursive_lt_pointer,
652        values = [
653            (LevelKey::new(1_usize, 1), &""),
654            (LevelKey::new(2_usize, 0), &""),
655            (LevelKey::new(4_usize, 1), &""),
656            (LevelKey::new(3_usize, 2), &"")
657        ]
658    );
659
660    test_insert!(
661        split_page_move_gte_lt_pointer_to_high_page,
662        values = [
663            (LevelKey::new(1_usize, 1), &""),
664            (LevelKey::new(2_usize, 0), &""),
665            (LevelKey::new(4_usize, 1), &""),
666            (LevelKey::new(3_usize, 2), &"")
667        ]
668    );
669
670    test_insert!(
671        split_page_move_input_high_page_to_gte_page,
672        values = [
673            (LevelKey::new(6, 0), &"bananas"),
674            (LevelKey::new(3, 21), &"bananas"),
675            (LevelKey::new(0, 21), &"bananas"),
676            (LevelKey::new(1, 22), &"bananas")
677        ]
678    );
679
680    proptest! {
681        // Invariant 1: the tree structure is deterministic for a given set of
682        // inputs (regardless of insert order)
683        #[test]
684        fn prop_deterministic_construction(keys in proptest::collection::vec(any::<u64>(), 0..64)) {
685            // keys is a HashSet of (keys, level), which will iterate in random
686            // order.
687            //
688            // Collect the items into a vector and sort it, producing a
689            // different insert ordering from the HashSet iter.
690            let mut b_values = keys.to_vec();
691            b_values.sort();
692            b_values.dedup();
693
694            let a_values = keys;
695
696            let mut a = MerkleSearchTree::default();
697            let mut b = MerkleSearchTree::default();
698
699            let want_len = b_values.len();
700
701            let mut unique = HashSet::new();
702            for key in a_values {
703                if unique.insert(key) {
704                    a.upsert(IntKey::new(key), &"bananas");
705                }
706            }
707            for key in b_values {
708                b.upsert(IntKey::new(key), &"bananas");
709            }
710
711            assert_node_equal(&mut a, &mut b);
712
713            let mut asserter = InvariantAssertCount::new(InvariantAssertOrder::new(NopVisitor::default()));
714            a.in_order_traversal(&mut asserter);
715            asserter.unwrap_count(want_len);
716
717            let mut asserter = InvariantAssertCount::new(InvariantAssertOrder::new(NopVisitor::default()));
718            b.in_order_traversal(&mut asserter);
719            asserter.unwrap_count(want_len);
720        }
721
722        // Invariant 2: values in the tree are stored in key order.
723        #[test]
724        fn prop_in_order_traversal_key_order(keys in proptest::collection::vec(any::<u64>(), 0..64)) {
725            let mut t = MerkleSearchTree::default();
726
727            let mut unique = HashSet::new();
728            let mut want_len = 0;
729
730            for key in keys {
731                if unique.insert(key) {
732                    want_len += 1;
733                    t.upsert(IntKey::new(key), &"bananas");
734                }
735            }
736
737            let mut asserter = InvariantAssertCount::new(InvariantAssertOrder::new(NopVisitor::default()));
738            t.in_order_traversal(&mut asserter);
739            asserter.unwrap_count(want_len);
740        }
741
742        // Invariant 3: two independent trees contain the same data iff their
743        // root hashes are identical.
744        //
745        // Additionally the serialised page ranges MUST match iff the trees
746        // match.
747        #[test]
748        fn prop_root_hash_data_equality(keys in proptest::collection::vec(any::<u64>(), 0..64)) {
749            let mut a = MerkleSearchTree::default();
750            let mut b = MerkleSearchTree::default();
751
752            // They are equal when empty.
753            assert_eq!(a.root_hash(), b.root_hash());
754
755            let mut unique = HashSet::new();
756            let last_entry = keys.first().cloned();
757            for key in keys {
758                if !unique.insert(key) {
759                    // Root hashes may compute to the same value if the same
760                    // (key, value) pair is inserted twice, causing the
761                    // divergence assert below to spuriously trigger.
762                    continue;
763                }
764
765                // Add the key to tree A
766                a.upsert(IntKey::new(key), &"bananas");
767                assert_eq!(a.root_hash_cached(), None);
768
769                // The trees have now diverged
770                assert_node_not_equal(&mut a, &mut b);
771
772                // Add the key to tree B
773                b.upsert(IntKey::new(key), &"bananas");
774                assert_eq!(b.root_hash_cached(), None);
775
776                // And now the tees have converged
777                assert_node_equal(&mut a, &mut b);
778            }
779
780            // Update a value for an existing key
781            if let Some(key) = last_entry {
782                 b.upsert(IntKey::new(key), &"platanos");
783                 assert_eq!(b.root_hash_cached(), None);
784
785                 // The trees diverge
786                assert_node_not_equal(&mut a, &mut b);
787
788                 // And converge once again
789                 a.upsert(IntKey::new(key), &"platanos");
790                 assert_eq!(a.root_hash_cached(), None);
791
792                // And now the tees have converged
793                assert_node_equal(&mut a, &mut b);
794            }
795
796            let mut asserter = InvariantAssertCount::new(InvariantAssertOrder::new(NopVisitor::default()));
797            a.in_order_traversal(&mut asserter);
798            asserter.unwrap_count(unique.len());
799
800            let mut asserter = InvariantAssertCount::new(InvariantAssertOrder::new(NopVisitor::default()));
801            b.in_order_traversal(&mut asserter);
802            asserter.unwrap_count(unique.len());
803        }
804
805        // Invariant: the node iter yields every node in the tree in ascending
806        // key order.
807        #[test]
808        fn prop_node_iter(keys in proptest::collection::vec(any::<u64>(), 0..64)) {
809            let mut t = MerkleSearchTree::default();
810
811            let mut inserted = BTreeSet::new();
812            for key in keys {
813                t.upsert(key, &key);
814                inserted.insert(key);
815            }
816
817            // Use the node iter to visit all nodes, preserving the key order in
818            // the returned iterator.
819            let got = t
820                .node_iter()
821                .map(|v| *v.key());
822
823            // The iterator must yield all keys in the same order as a (sorted!)
824            // BTreeSet to satisfy the invariant.
825            assert!(inserted.into_iter().eq(got));
826        }
827
828        // Assert that lowering the level base decreases the page count
829        // (increasing the page size proportionally).
830        #[test]
831        fn prop_varying_b_changes_page_count(
832            low in 5_u8..10,     // A "low" B value
833            high in 200_u8..210, // A "higher" B value
834        ) {
835            let low_base_page_count = {
836                let low = NonZeroU8::new(low).unwrap();
837                let mut t = Builder::default().with_level_base(low).build();
838                for i in (0..u64::MAX).rev().take(1_000) {
839                    t.upsert(IntKey::new(i), &i);
840                }
841
842                t.root_hash();
843                t.serialise_page_ranges().map(|v| v.len()).unwrap()
844            };
845
846            let high_base_page_count = {
847                let high = NonZeroU8::new(high).unwrap();
848                let mut t = Builder::default().with_level_base(high).build();
849                for i in (0..u64::MAX).rev().take(1_000) {
850                    t.upsert(IntKey::new(i), &i);
851                }
852
853                t.root_hash();
854                t.serialise_page_ranges().map(|v| v.len()).unwrap()
855            };
856
857            // The "low" B value yields more pages than the "high" B value.
858            assert!(low_base_page_count >= high_base_page_count);
859        }
860    }
861
862    fn assert_node_equal<K, V>(a: &mut MerkleSearchTree<K, V>, b: &mut MerkleSearchTree<K, V>)
863    where
864        K: AsRef<[u8]> + PartialOrd + Debug,
865    {
866        assert_eq!(a.root_hash(), b.root_hash(), "root hashes should be equal");
867        assert_eq!(
868            a.serialise_page_ranges(),
869            b.serialise_page_ranges(),
870            "serialised pages should match"
871        );
872        // The cached values must always match their computed values.
873        assert_eq!(
874            b.root_hash_cached().unwrap().clone(),
875            *b.root_hash(),
876            "cached hashes should be equal for b"
877        );
878        assert_eq!(
879            a.root_hash_cached().unwrap().clone(),
880            *a.root_hash(),
881            "cached hashes should be equal for a"
882        );
883    }
884
885    fn assert_node_not_equal<K, V>(a: &mut MerkleSearchTree<K, V>, b: &mut MerkleSearchTree<K, V>)
886    where
887        K: AsRef<[u8]> + PartialOrd + Debug,
888    {
889        assert_ne!(
890            a.root_hash(),
891            b.root_hash(),
892            "root hash should not be equal"
893        );
894        assert_ne!(
895            a.serialise_page_ranges(),
896            b.serialise_page_ranges(),
897            "serialised pages should not match"
898        );
899        // The cached values must always match their computed values.
900        assert_eq!(
901            b.root_hash_cached().unwrap().clone(),
902            *b.root_hash(),
903            "cached hashes should always be equal for b"
904        );
905        assert_eq!(
906            a.root_hash_cached().unwrap().clone(),
907            *a.root_hash(),
908            "cached hashes should always be equal for a"
909        );
910    }
911}