merkle_search_tree/tree.rs
1use std::{fmt::Debug, marker::PhantomData, num::NonZeroU8};
2
3use siphasher::sip128::SipHasher24;
4
5use crate::{
6 diff::PageRange,
7 digest::{self, siphash::SipHasher, Hasher, RootHash, ValueDigest},
8 node::Node,
9 node_iter::NodeIter,
10 page::{insert_intermediate_page, Page},
11 visitor::{page_range_hash::PageRangeHashVisitor, Visitor},
12 UpsertResult,
13};
14
15/// An alias for the default hash implementation.
16pub(crate) type DefaultHasher = SipHasher;
17
18/// The default level base used when computing the tree level a hash belongs to.
19pub(crate) const DEFAULT_LEVEL_BASE: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(16) }; // Safety: Not 0
20
21/// An implementation of the Merkle Search Tree as described in [Merkle Search
22/// Trees: Efficient State-Based CRDTs in Open Networks][paper].
23///
24/// This implementation stores only keys directly in the tree - hashes of values
25/// are retained instead of the actual value. This allows greatest flexibility,
26/// as the user can choose the most appropriate key/value storage data
27/// structure, while using the MST strictly for anti-entropy / Merkle proofs.
28///
29/// # Merkle Search Trees
30///
31/// In addition to the normal hash & consistency properties of a regular
32/// Merkle/hash tree, a MST is a searchable balanced B-tree with variable,
33/// probabilistically bounded page sizes and a deterministic representation
34/// irrespective of insert order - these properties make a MST a useful data
35/// structure for efficient state-based CRDT replication and anti-entropy.
36///
37/// Keys are stored in sort order (from min to max) in an MST. If monotonic keys
38/// are inserted, a minimal amount of hash re-computation needs to be performed
39/// as the nodes & pages for most of the older keys remain unchanged; this
40/// reduces the overhead of anti-entropy as fewer intermediate hashes need
41/// recomputing and exchanging during reconciliation.
42///
43/// # Portability & Compatibility
44///
45/// For two [`MerkleSearchTree`] to be useful, both instances must produce
46/// identical hash digests for a given input. To do so, they must be using the
47/// same [`Hasher`] implementation, and in turn it must output a deterministic
48/// hash across all peers interacting with the [`MerkleSearchTree`].
49///
50/// For ease of use, this library uses the standard library [`Hash`] trait by
51/// default to hash key and value types. The documentation for the trait warns
52/// it is not guaranteed to produce identical hashes for the same data across
53/// different compilation platforms and Rust compiler versions.
54///
55/// If you intend to interact with peers across multiple platforms and/or Rust
56/// versions, you should consider implementing a fully-deterministic [`Hasher`]
57/// specialised to your key/value types that does not make use of the [`Hash`]
58/// trait for correctness.
59///
60/// Any change to the underlying hash construction algorithm implemented in this
61/// crate that would cause existing hashes to no longer match will not occur
62/// without a breaking change major semver version bump once this crate reaches
63/// stability (>=1.0.0).
64///
65/// # Lazy Tree Hash Generation
66///
67/// Each page within the tree maintains a cache of the pre-computed hash of
68/// itself, and the sub-tree rooted from it (all pages & nodes below it).
69/// Successive root hash queries will re-use this cached value to avoid hashing
70/// the full tree each time.
71///
72/// Upserting elements into the tree invalidates the cached hashes of the pages
73/// along the path to the leaf, and the leaf page itself. To amortise the cost
74/// of regenerating these hashes, the affected pages are marked as "dirty",
75/// causing them to be rehashed next time the root hash is requested. This
76/// allows hash regeneration to be occur once per batch of upsert operations.
77///
78/// # Example
79///
80/// ```
81/// use merkle_search_tree::MerkleSearchTree;
82///
83/// let mut t = MerkleSearchTree::default();
84/// t.upsert("bananas", &"great");
85/// t.upsert("plátano", &"muy bien");
86///
87/// // Obtain a root hash / merkle proof covering all the tree data
88/// let hash_1 = t.root_hash().to_owned();
89/// println!("{:?}", hash_1.as_ref());
90///
91/// // Modify the MST, reflecting the new value of an existing key
92/// t.upsert("bananas", &"amazing");
93///
94/// // Obtain an updated root hash
95/// let hash_2 = t.root_hash().to_owned();
96/// println!("{:?}", hash_2);
97///
98/// // The root hash changes to reflect the changed state
99/// assert_ne!(hash_1.as_ref(), hash_2.as_ref());
100/// ```
101///
102/// [paper]: https://inria.hal.science/hal-02303490
103#[derive(Debug, Clone)]
104pub struct MerkleSearchTree<K, V, H = DefaultHasher, const N: usize = 16> {
105 /// User-provided hasher implementation used for key/value digests.
106 hasher: H,
107
108 /// Internal hasher used to produce page/root digests.
109 tree_hasher: SipHasher24,
110
111 root: Page<N, K>,
112 root_hash: Option<RootHash>,
113 level_base: NonZeroU8,
114
115 _value_type: PhantomData<V>,
116}
117
118impl<K, V> Default for MerkleSearchTree<K, V> {
119 fn default() -> Self {
120 Self {
121 hasher: SipHasher::default(),
122 tree_hasher: SipHasher24::default(),
123 root: Page::new(0, vec![]),
124 root_hash: None,
125 level_base: DEFAULT_LEVEL_BASE,
126 _value_type: Default::default(),
127 }
128 }
129}
130
131impl<K, V, H, const N: usize> MerkleSearchTree<K, V, H, N> {
132 /// Initialise a new [`MerkleSearchTree`] using the provided [`Hasher`] of
133 /// keys & values.
134 #[deprecated(since = "0.8.0", note = "please use `Builder::with_hasher` instead")]
135 pub fn new_with_hasher(hasher: H) -> Self {
136 Self {
137 hasher,
138 tree_hasher: SipHasher24::default(),
139 root: Page::new(0, vec![]),
140 root_hash: None,
141 level_base: DEFAULT_LEVEL_BASE,
142 _value_type: PhantomData,
143 }
144 }
145
146 /// Return the precomputed root hash, if any.
147 ///
148 /// This method never performs any hashing - if there's no precomputed hash
149 /// available, this immediately returns [`None`]. This operation is `O(1)`.
150 ///
151 /// If this returns [`None`], call [`MerkleSearchTree::root_hash()`] to
152 /// regenerate the root hash.
153 #[inline]
154 pub fn root_hash_cached(&self) -> Option<&RootHash> {
155 self.root_hash.as_ref()
156 }
157
158 /// Perform a depth-first, in-order traversal, yielding each [`Page`] and
159 /// [`Node`] to `visitor`.
160 ///
161 /// An in-order traversal yields nodes in key order, from min to max.
162 pub fn in_order_traversal<'a, T>(&'a self, visitor: &mut T)
163 where
164 T: Visitor<'a, N, K>,
165 {
166 self.root.in_order_traversal(visitor, false);
167 }
168
169 /// Iterate over all [`Node`] in the tree in ascending key order.
170 ///
171 /// This method can be used to inspect the keys stored in the tree:
172 ///
173 /// ```
174 /// # use merkle_search_tree::*;
175 /// #
176 /// let mut t = MerkleSearchTree::default();
177 /// t.upsert("bananas1", &42);
178 /// t.upsert("bananas3", &42);
179 /// t.upsert("bananas4", &42);
180 /// t.upsert("bananas2", &42);
181 ///
182 /// // Collect the keys within the tree
183 /// let keys = t.node_iter().map(|v| *v.key()).collect::<Vec<_>>();
184 ///
185 /// // Nodes are yield in ascending key order:
186 /// assert_eq!(
187 /// keys.as_slice(),
188 /// ["bananas1", "bananas2", "bananas3", "bananas4"]
189 /// )
190 /// ```
191 pub fn node_iter(&self) -> impl Iterator<Item = &'_ Node<N, K>> {
192 NodeIter::new(&self.root)
193 }
194}
195
196impl<K, V, H, const N: usize> MerkleSearchTree<K, V, H, N>
197where
198 K: AsRef<[u8]>,
199{
200 /// Generate the root hash if necessary, returning the result.
201 ///
202 /// If there's a precomputed root hash, it is immediately returned.
203 ///
204 /// If no cached root hash is available all tree pages with modified child
205 /// nodes are rehashed and the resulting new root hash is returned.
206 #[inline]
207 pub fn root_hash(&mut self) -> &RootHash {
208 self.root.maybe_generate_hash(&self.tree_hasher);
209 self.root_hash = self.root.hash().cloned().map(RootHash::new);
210
211 #[cfg(feature = "digest_base64")] // Required for display impl
212 debug!(
213 root_hash=%self.root_hash.as_ref().unwrap(),
214 "regenerated root hash"
215 );
216
217 self.root_hash.as_ref().unwrap()
218 }
219
220 /// Serialise the key interval and hash covering each [`Page`] within this
221 /// tree.
222 ///
223 /// Page hashes are generated on demand - this method returns [`None`] if
224 /// the tree needs rehashing (call [`MerkleSearchTree::root_hash()`] and
225 /// retry).
226 ///
227 /// Performs a pre-order traversal of all pages within this tree and emits a
228 /// [`PageRange`] per page that covers the min/max key of the subtree at the
229 /// given page.
230 ///
231 /// The first page is the tree root, and as such has a key min/max that
232 /// equals the min/max of the keys stored within this tree.
233 ///
234 /// # Reference vs. Owned
235 ///
236 /// This method borrows the underlying keys within the tree - this avoids
237 /// cloning the keys that form the page bounds when generating the
238 /// [`PageRange`] to maximise performance, however this also prevents the
239 /// caller from mutating the tree whilst holding onto the serialised pages
240 /// (an immutable reference to the tree).
241 ///
242 /// If the key type (`K`) implements [`Clone`], a set of owned serialised
243 /// pages that do not borrow from the tree can be created by constructing a
244 /// [`PageRangeSnapshot`] from the returned [`PageRange`] array:
245 ///
246 /// ```
247 /// # use merkle_search_tree::{*, diff::*};
248 /// #
249 /// let mut t = MerkleSearchTree::default();
250 /// t.upsert("bananas", &42);
251 ///
252 /// // Rehash the tree before generating the page ranges
253 /// let _ = t.root_hash();
254 ///
255 /// // Generate the hashes & page ranges
256 /// let ranges = t.serialise_page_ranges().unwrap();
257 ///
258 /// // At this point, attempting to insert into the tree fails because the
259 /// // tree is already borrowed as immutable.
260 /// //
261 /// // Instead clone all the keys and generate a snapshot:
262 /// let snap = PageRangeSnapshot::from(ranges);
263 ///
264 /// // And the tree is free to be mutated while `snap` exists!
265 /// t.upsert("plátanos", &42);
266 ///
267 /// // The `snap` yields `PageRange` for iteration:
268 /// assert_eq!(diff(snap.iter(), snap.iter()), vec![]);
269 /// ```
270 ///
271 /// [`PageRangeSnapshot`]: crate::diff::PageRangeSnapshot
272 #[inline]
273 pub fn serialise_page_ranges(&self) -> Option<Vec<PageRange<'_, K>>>
274 where
275 K: PartialOrd,
276 {
277 // The tree hash must be up-to-date.
278 self.root_hash_cached()?;
279
280 if self.root.nodes().is_empty() {
281 return Some(vec![]);
282 }
283
284 let mut v = PageRangeHashVisitor::default();
285 self.root.in_order_traversal(&mut v, false);
286 Some(v.finalise())
287 }
288}
289
290impl<K, V, H, const N: usize> MerkleSearchTree<K, V, H, N>
291where
292 K: PartialOrd,
293 H: Hasher<N, K> + Hasher<N, V>,
294{
295 /// Add or update the value for `key`.
296 ///
297 /// This method invalidates the cached, precomputed root hash value, if any
298 /// (even if the value is not modified).
299 ///
300 /// # Value Hash
301 ///
302 /// The tree stores a the hashed representation of `value` - the actual
303 /// value is not stored in the tree.
304 #[inline]
305 pub fn upsert(&mut self, key: K, value: &'_ V) {
306 let value_hash = ValueDigest::new(self.hasher.hash(value));
307 let level = digest::level(&self.hasher.hash(&key), self.level_base);
308
309 // Invalidate the root hash - it always changes when a key is upserted.
310 self.root_hash = None;
311
312 if let UpsertResult::InsertIntermediate(key) =
313 self.root.upsert(key, level, value_hash.clone())
314 {
315 // As an optimisation and simplification, if the current root is
316 // empty, simply replace it with the new root.
317 if self.root.nodes().is_empty() {
318 let node = Node::new(key, value_hash, None);
319 self.root = Page::new(level, vec![node]);
320 return;
321 }
322
323 insert_intermediate_page(&mut &mut self.root, key, level, value_hash);
324 }
325 }
326}
327
328impl<H> crate::builder::Builder<H> {
329 /// Consume this [`Builder`] and return the configured [`MerkleSearchTree`].
330 ///
331 /// [`Builder`]: crate::builder::Builder
332 pub fn build<K, V, const N: usize>(self) -> MerkleSearchTree<K, V, H, N> {
333 MerkleSearchTree {
334 hasher: self.hasher,
335 tree_hasher: SipHasher24::default(),
336 root: Page::new(0, vec![]),
337 root_hash: None,
338 level_base: self.level_base,
339 _value_type: PhantomData,
340 }
341 }
342}
343
344#[cfg(test)]
345mod tests {
346 use std::{
347 collections::{BTreeSet, HashSet},
348 hash::Hasher as _,
349 };
350
351 use proptest::prelude::*;
352 use siphasher::sip128::Hasher128;
353
354 use super::*;
355 use crate::{
356 assert_tree,
357 builder::Builder,
358 digest::{
359 mock::{LevelKey, MockHasher},
360 Digest,
361 },
362 test_util::IntKey,
363 visitor::{
364 assert_count::InvariantAssertCount, assert_order::InvariantAssertOrder, nop::NopVisitor,
365 },
366 };
367
368 /// A hash implementation that does not rely on the stdlib Hash trait, and
369 /// therefore produces stable hashes across rust version changes /
370 /// platforms.
371 #[derive(Debug, Default)]
372 struct FixtureHasher;
373 impl Hasher<16, IntKey> for FixtureHasher {
374 fn hash(&self, value: &IntKey) -> Digest<16> {
375 self.hash(&value.unwrap())
376 }
377 }
378 impl Hasher<16, u64> for FixtureHasher {
379 fn hash(&self, value: &u64) -> Digest<16> {
380 let mut h = SipHasher24::default();
381 h.write_u64(*value);
382 Digest::new(h.finish128().as_bytes())
383 }
384 }
385
386 #[test]
387 fn test_hash_fixture() {
388 let mut t = Builder::default()
389 .with_hasher(FixtureHasher::default())
390 .build();
391
392 for i in 0..1000 {
393 t.upsert(IntKey::new(i), &i);
394 }
395
396 // This hash ensures that any changes to this construction do not result
397 // in existing hashes being invalidated / unequal for the same data.
398 let fixture_hash = [
399 57, 77, 199, 66, 89, 217, 207, 166, 136, 181, 45, 80, 108, 80, 94, 3,
400 ];
401
402 assert_eq!(t.root_hash().as_ref(), &fixture_hash);
403 }
404
405 #[test]
406 fn test_level_generation() {
407 let h = Digest::new([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
408 assert_eq!(digest::level(&h, DEFAULT_LEVEL_BASE), 32);
409
410 let h = Digest::new([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
411 assert_eq!(digest::level(&h, DEFAULT_LEVEL_BASE), 0);
412
413 let h = Digest::new([0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
414 assert_eq!(digest::level(&h, DEFAULT_LEVEL_BASE), 1);
415
416 let h = Digest::new([0, 0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
417 assert_eq!(digest::level(&h, DEFAULT_LEVEL_BASE), 3);
418 }
419
420 macro_rules! test_insert {
421 (
422 $name:ident,
423 values = [$(($key:expr, $value:expr)),*]
424 ) => {
425 paste::paste! {
426 #[test]
427 fn [<test_ $name>]() {
428 let mut t = Builder::default()
429 .with_hasher(MockHasher::default())
430 .build();
431
432 $(
433 t.upsert($key, $value);
434 )*
435
436 assert_tree!(t)
437 }
438 }
439 };
440 }
441
442 test_insert!(one, values = [(LevelKey::new("key", 0), &"bananas")]);
443
444 test_insert!(
445 one_non_zero_level,
446 values = [(LevelKey::new("key", 4), &"bananas")]
447 );
448
449 // Assert the tree is ordered by key.
450 test_insert!(
451 two_in_order,
452 values = [
453 (LevelKey::new("A", 0), &"bananas"),
454 (LevelKey::new("B", 0), &"bananas")
455 ]
456 );
457
458 // Assert the tree becomes ordered by key.
459 test_insert!(
460 two_unordered,
461 values = [
462 (LevelKey::new("B", 0), &"bananas"),
463 (LevelKey::new("A", 0), &"bananas")
464 ]
465 );
466
467 // Insert two values
468 //
469 // Level 0 [ A ]
470 //
471 // Then insert B, which is destined for level 1, and greater than A;
472 // therefore B is placed in level 1 as the new root, and A/level 0 is
473 // attached to the lt_pointer of B.
474 test_insert!(
475 root_split_page_gt,
476 values = [
477 (LevelKey::new("A", 0), &"bananas"),
478 (LevelKey::new("B", 1), &"bananas")
479 ]
480 );
481
482 // Insert two values
483 //
484 // Level 0 [ B ]
485 //
486 // Then insert A, which is destined for level 1, and less than B;
487 // therefore A is placed in level 1 as the new root, and B/level 0 is
488 // attached to the high page of level 1 because A < B.
489 test_insert!(
490 root_split_page_lt,
491 values = [
492 (LevelKey::new("B", 0), &"bananas"),
493 (LevelKey::new("A", 1), &"bananas")
494 ]
495 );
496
497 // Insert two values, the second with a level higher than the first, causing
498 // the root page to be split, both with differing (non-consecutive) levels.
499 test_insert!(
500 root_split_non_zero_step_gt,
501 values = [
502 (LevelKey::new("A", 3), &"bananas"),
503 (LevelKey::new("B", 9), &"bananas")
504 ]
505 );
506
507 // Insert two values, the second with a level higher than the first, causing
508 // the root page to be split, both with differing (non-consecutive) levels.
509 test_insert!(
510 root_split_non_zero_step_lt,
511 values = [
512 (LevelKey::new("B", 3), &"bananas"),
513 (LevelKey::new("A", 9), &"bananas")
514 ]
515 );
516
517 // Insert elements that cause the root to split, and then the child page to
518 // split. Each successive element causes a new page to be created and added
519 // to the previous level's high page.
520 test_insert!(
521 non_root_page_split_gt,
522 values = [
523 (LevelKey::new("A", 6), &"bananas"),
524 (LevelKey::new("B", 4), &"bananas"),
525 (LevelKey::new("C", 2), &"bananas")
526 ]
527 );
528
529 test_insert!(
530 non_root_page_split_lt,
531 values = [
532 (LevelKey::new("C", 6), &"bananas"),
533 (LevelKey::new("B", 4), &"bananas"),
534 (LevelKey::new("A", 2), &"bananas")
535 ]
536 );
537
538 // Upsert for an existing key does not create more nodes.
539 test_insert!(
540 update,
541 values = [
542 (LevelKey::new("A", 6), &"bananas"),
543 (LevelKey::new("A", 6), &"platanos")
544 ]
545 );
546
547 // Upsert for an existing key does not create more nodes.
548 test_insert!(
549 split_child_into_two_empty_gte_page,
550 values = [
551 (LevelKey::new("A", 5), &"platanos"),
552 (LevelKey::new("B", 0), &"platanos"),
553 (LevelKey::new("C", 0), &"platanos"),
554 (LevelKey::new("D", 1), &"platanos")
555 ]
556 );
557
558 // Upsert for an existing key does not create more nodes.
559 test_insert!(
560 split_child_into_two_with_gte_page,
561 values = [
562 (LevelKey::new("A", 5), &"platanos"),
563 (LevelKey::new("B", 0), &"platanos"),
564 (LevelKey::new("C", 0), &"platanos"),
565 (LevelKey::new("E", 0), &"platanos"),
566 (LevelKey::new("D", 1), &"platanos")
567 ]
568 );
569
570 // Ensure that when inserting a node greater than all existing nodes in a
571 // page, the high page is split if necessary
572 test_insert!(
573 greatest_key_splits_high_page,
574 values = [
575 (LevelKey::new(11, 1), &"bananas"),
576 (LevelKey::new(10, 2), &"bananas"),
577 (LevelKey::new(12, 2), &"bananas")
578 ]
579 );
580
581 // When inserting an intermediate page, ensure the high-page of the
582 // less-than split is processed.
583 test_insert!(
584 intermediate_page_move_all_nodes_and_high_page,
585 values = [
586 (LevelKey::new(1, 1), &"bananas"),
587 (LevelKey::new(2, 1), &"bananas"),
588 (LevelKey::new(4, 0), &"bananas"),
589 (LevelKey::new(3, 2), &"bananas")
590 ]
591 );
592
593 test_insert!(
594 intermediate_page_move_all_nodes_and_high_page_subset,
595 values = [
596 (LevelKey::new(1, 1), &"bananas"),
597 (LevelKey::new(2, 1), &"bananas"),
598 (LevelKey::new(3, 0), &"bananas"),
599 (LevelKey::new(5, 0), &"bananas"),
600 (LevelKey::new(4, 2), &"bananas")
601 ]
602 );
603
604 test_insert!(
605 child_page_split_add_intermediate,
606 values = [
607 (LevelKey::new("K", 2), &"bananas"),
608 (LevelKey::new("D", 0), &"bananas"),
609 (LevelKey::new("E", 1), &"bananas")
610 ]
611 );
612
613 test_insert!(
614 equal_page_move_all_nodes_and_high_page,
615 values = [
616 (LevelKey::new(2_usize, 64), &"bananas"),
617 (LevelKey::new(5_usize, 20), &"bananas"),
618 (LevelKey::new(3_usize, 52), &"bananas"),
619 (LevelKey::new(4_usize, 64), &"bananas")
620 ]
621 );
622
623 test_insert!(
624 equal_page_move_all_nodes_and_high_page_subset,
625 values = [
626 (LevelKey::new(2_usize, 64), &"bananas"),
627 (LevelKey::new(6_usize, 20), &"bananas"),
628 (LevelKey::new(4_usize, 20), &"bananas"),
629 (LevelKey::new(3_usize, 52), &"bananas"),
630 (LevelKey::new(5_usize, 64), &"bananas")
631 ]
632 );
633
634 test_insert!(
635 split_page_all_gte_nodes_with_lt_pointer,
636 values = [
637 (LevelKey::new(1, 0), &"bananas"),
638 (LevelKey::new(0, 1), &"bananas")
639 ]
640 );
641
642 test_insert!(
643 split_page_all_lt_nodes_with_high_page,
644 values = [
645 (LevelKey::new(0, 0), &"bananas"),
646 (LevelKey::new(1, 1), &"bananas")
647 ]
648 );
649
650 test_insert!(
651 insert_intermediate_recursive_lt_pointer,
652 values = [
653 (LevelKey::new(1_usize, 1), &""),
654 (LevelKey::new(2_usize, 0), &""),
655 (LevelKey::new(4_usize, 1), &""),
656 (LevelKey::new(3_usize, 2), &"")
657 ]
658 );
659
660 test_insert!(
661 split_page_move_gte_lt_pointer_to_high_page,
662 values = [
663 (LevelKey::new(1_usize, 1), &""),
664 (LevelKey::new(2_usize, 0), &""),
665 (LevelKey::new(4_usize, 1), &""),
666 (LevelKey::new(3_usize, 2), &"")
667 ]
668 );
669
670 test_insert!(
671 split_page_move_input_high_page_to_gte_page,
672 values = [
673 (LevelKey::new(6, 0), &"bananas"),
674 (LevelKey::new(3, 21), &"bananas"),
675 (LevelKey::new(0, 21), &"bananas"),
676 (LevelKey::new(1, 22), &"bananas")
677 ]
678 );
679
680 proptest! {
681 // Invariant 1: the tree structure is deterministic for a given set of
682 // inputs (regardless of insert order)
683 #[test]
684 fn prop_deterministic_construction(keys in proptest::collection::vec(any::<u64>(), 0..64)) {
685 // keys is a HashSet of (keys, level), which will iterate in random
686 // order.
687 //
688 // Collect the items into a vector and sort it, producing a
689 // different insert ordering from the HashSet iter.
690 let mut b_values = keys.to_vec();
691 b_values.sort();
692 b_values.dedup();
693
694 let a_values = keys;
695
696 let mut a = MerkleSearchTree::default();
697 let mut b = MerkleSearchTree::default();
698
699 let want_len = b_values.len();
700
701 let mut unique = HashSet::new();
702 for key in a_values {
703 if unique.insert(key) {
704 a.upsert(IntKey::new(key), &"bananas");
705 }
706 }
707 for key in b_values {
708 b.upsert(IntKey::new(key), &"bananas");
709 }
710
711 assert_node_equal(&mut a, &mut b);
712
713 let mut asserter = InvariantAssertCount::new(InvariantAssertOrder::new(NopVisitor::default()));
714 a.in_order_traversal(&mut asserter);
715 asserter.unwrap_count(want_len);
716
717 let mut asserter = InvariantAssertCount::new(InvariantAssertOrder::new(NopVisitor::default()));
718 b.in_order_traversal(&mut asserter);
719 asserter.unwrap_count(want_len);
720 }
721
722 // Invariant 2: values in the tree are stored in key order.
723 #[test]
724 fn prop_in_order_traversal_key_order(keys in proptest::collection::vec(any::<u64>(), 0..64)) {
725 let mut t = MerkleSearchTree::default();
726
727 let mut unique = HashSet::new();
728 let mut want_len = 0;
729
730 for key in keys {
731 if unique.insert(key) {
732 want_len += 1;
733 t.upsert(IntKey::new(key), &"bananas");
734 }
735 }
736
737 let mut asserter = InvariantAssertCount::new(InvariantAssertOrder::new(NopVisitor::default()));
738 t.in_order_traversal(&mut asserter);
739 asserter.unwrap_count(want_len);
740 }
741
742 // Invariant 3: two independent trees contain the same data iff their
743 // root hashes are identical.
744 //
745 // Additionally the serialised page ranges MUST match iff the trees
746 // match.
747 #[test]
748 fn prop_root_hash_data_equality(keys in proptest::collection::vec(any::<u64>(), 0..64)) {
749 let mut a = MerkleSearchTree::default();
750 let mut b = MerkleSearchTree::default();
751
752 // They are equal when empty.
753 assert_eq!(a.root_hash(), b.root_hash());
754
755 let mut unique = HashSet::new();
756 let last_entry = keys.first().cloned();
757 for key in keys {
758 if !unique.insert(key) {
759 // Root hashes may compute to the same value if the same
760 // (key, value) pair is inserted twice, causing the
761 // divergence assert below to spuriously trigger.
762 continue;
763 }
764
765 // Add the key to tree A
766 a.upsert(IntKey::new(key), &"bananas");
767 assert_eq!(a.root_hash_cached(), None);
768
769 // The trees have now diverged
770 assert_node_not_equal(&mut a, &mut b);
771
772 // Add the key to tree B
773 b.upsert(IntKey::new(key), &"bananas");
774 assert_eq!(b.root_hash_cached(), None);
775
776 // And now the tees have converged
777 assert_node_equal(&mut a, &mut b);
778 }
779
780 // Update a value for an existing key
781 if let Some(key) = last_entry {
782 b.upsert(IntKey::new(key), &"platanos");
783 assert_eq!(b.root_hash_cached(), None);
784
785 // The trees diverge
786 assert_node_not_equal(&mut a, &mut b);
787
788 // And converge once again
789 a.upsert(IntKey::new(key), &"platanos");
790 assert_eq!(a.root_hash_cached(), None);
791
792 // And now the tees have converged
793 assert_node_equal(&mut a, &mut b);
794 }
795
796 let mut asserter = InvariantAssertCount::new(InvariantAssertOrder::new(NopVisitor::default()));
797 a.in_order_traversal(&mut asserter);
798 asserter.unwrap_count(unique.len());
799
800 let mut asserter = InvariantAssertCount::new(InvariantAssertOrder::new(NopVisitor::default()));
801 b.in_order_traversal(&mut asserter);
802 asserter.unwrap_count(unique.len());
803 }
804
805 // Invariant: the node iter yields every node in the tree in ascending
806 // key order.
807 #[test]
808 fn prop_node_iter(keys in proptest::collection::vec(any::<u64>(), 0..64)) {
809 let mut t = MerkleSearchTree::default();
810
811 let mut inserted = BTreeSet::new();
812 for key in keys {
813 t.upsert(key, &key);
814 inserted.insert(key);
815 }
816
817 // Use the node iter to visit all nodes, preserving the key order in
818 // the returned iterator.
819 let got = t
820 .node_iter()
821 .map(|v| *v.key());
822
823 // The iterator must yield all keys in the same order as a (sorted!)
824 // BTreeSet to satisfy the invariant.
825 assert!(inserted.into_iter().eq(got));
826 }
827
828 // Assert that lowering the level base decreases the page count
829 // (increasing the page size proportionally).
830 #[test]
831 fn prop_varying_b_changes_page_count(
832 low in 5_u8..10, // A "low" B value
833 high in 200_u8..210, // A "higher" B value
834 ) {
835 let low_base_page_count = {
836 let low = NonZeroU8::new(low).unwrap();
837 let mut t = Builder::default().with_level_base(low).build();
838 for i in (0..u64::MAX).rev().take(1_000) {
839 t.upsert(IntKey::new(i), &i);
840 }
841
842 t.root_hash();
843 t.serialise_page_ranges().map(|v| v.len()).unwrap()
844 };
845
846 let high_base_page_count = {
847 let high = NonZeroU8::new(high).unwrap();
848 let mut t = Builder::default().with_level_base(high).build();
849 for i in (0..u64::MAX).rev().take(1_000) {
850 t.upsert(IntKey::new(i), &i);
851 }
852
853 t.root_hash();
854 t.serialise_page_ranges().map(|v| v.len()).unwrap()
855 };
856
857 // The "low" B value yields more pages than the "high" B value.
858 assert!(low_base_page_count >= high_base_page_count);
859 }
860 }
861
862 fn assert_node_equal<K, V>(a: &mut MerkleSearchTree<K, V>, b: &mut MerkleSearchTree<K, V>)
863 where
864 K: AsRef<[u8]> + PartialOrd + Debug,
865 {
866 assert_eq!(a.root_hash(), b.root_hash(), "root hashes should be equal");
867 assert_eq!(
868 a.serialise_page_ranges(),
869 b.serialise_page_ranges(),
870 "serialised pages should match"
871 );
872 // The cached values must always match their computed values.
873 assert_eq!(
874 b.root_hash_cached().unwrap().clone(),
875 *b.root_hash(),
876 "cached hashes should be equal for b"
877 );
878 assert_eq!(
879 a.root_hash_cached().unwrap().clone(),
880 *a.root_hash(),
881 "cached hashes should be equal for a"
882 );
883 }
884
885 fn assert_node_not_equal<K, V>(a: &mut MerkleSearchTree<K, V>, b: &mut MerkleSearchTree<K, V>)
886 where
887 K: AsRef<[u8]> + PartialOrd + Debug,
888 {
889 assert_ne!(
890 a.root_hash(),
891 b.root_hash(),
892 "root hash should not be equal"
893 );
894 assert_ne!(
895 a.serialise_page_ranges(),
896 b.serialise_page_ranges(),
897 "serialised pages should not match"
898 );
899 // The cached values must always match their computed values.
900 assert_eq!(
901 b.root_hash_cached().unwrap().clone(),
902 *b.root_hash(),
903 "cached hashes should always be equal for b"
904 );
905 assert_eq!(
906 a.root_hash_cached().unwrap().clone(),
907 *a.root_hash(),
908 "cached hashes should always be equal for a"
909 );
910 }
911}