1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312
//! Tree difference calculation algorithm & associated types.
mod diff_builder;
mod page_range;
mod page_range_snapshot;
mod range_list;
use std::{fmt::Debug, iter::Peekable};
pub use page_range::*;
pub use page_range_snapshot::*;
use crate::diff::diff_builder::DiffListBuilder;
/// An inclusive range of keys that differ between two serialised ordered sets
/// of [`PageRange`].
#[derive(Debug, PartialEq)]
pub struct DiffRange<'a, K> {
/// The inclusive start & end key bounds of this range to sync.
start: &'a K,
end: &'a K,
}
impl<'a, K> Clone for DiffRange<'a, K> {
fn clone(&self) -> Self {
Self {
start: self.start.clone(),
end: self.end.clone(),
}
}
}
impl<'a, K> DiffRange<'a, K> {
/// Returns true if the range within `self` overlaps any portion of the
/// range within `p`.
pub(crate) fn overlaps(&self, p: &Self) -> bool
where
K: PartialOrd,
{
// 0 1 2 3 4 5 6 7 8 9
// A | |
// B | |
let leading_edge = self.start <= p.start && self.end >= p.start;
let trailing_edge = p.start <= self.end && p.end >= self.end;
leading_edge || trailing_edge
}
/// Returns the inclusive start of this [`DiffRange`], identifying the start
/// of an inconsistency between trees.
pub fn start(&self) -> &K {
self.start
}
/// Returns the inclusive end of this [`DiffRange`], identifying the end of
/// an inconsistency between trees.
pub fn end(&self) -> &K {
self.end
}
}
/// Compute the difference between `local` and `peer`, returning the set of
/// [`DiffRange`] covering the inconsistent key ranges found in `peer`.
///
/// ```rust
/// use merkle_search_tree::{MerkleSearchTree, diff::diff};
///
/// // Initialise a "peer" tree.
/// let mut node_a = MerkleSearchTree::default();
/// node_a.upsert("bananas", &42);
/// node_a.upsert("plátanos", &42);
///
/// // Initialise the "local" tree with differing keys
/// let mut node_b = MerkleSearchTree::default();
/// node_b.upsert("donkey", &42);
///
/// // Generate the tree hashes before serialising the page ranges
/// node_a.root_hash();
/// node_b.root_hash();
///
/// // Generate the tree page bounds & hashes, and feed into the diff function
/// let diff_range = diff(
/// node_b.serialise_page_ranges().unwrap().into_iter(),
/// node_a.serialise_page_ranges().unwrap().into_iter(),
/// );
///
/// // The diff_range contains all the inclusive key intervals the "local" tree
/// // should fetch from the "peer" tree to converge.
/// assert_matches::assert_matches!(diff_range.as_slice(), [range] => {
/// assert_eq!(range.start(), &"bananas");
/// assert_eq!(range.end(), &"plátanos");
/// });
/// ```
///
/// # State Convergence
///
/// To converge the state of the two trees, the key ranges in the returned
/// [`DiffRange`] instances should be requested from `peer` and used to update
/// the state of `local`.
///
/// If `local` is a superset of `peer` (contains all the keys in `peer` and the
/// values are consistent), or the two trees are identical, no [`DiffRange`]
/// intervals are returned.
///
/// # Optimistic Page Bounds Fetching
///
/// When a page exists in `peer` has a key range that is a strict superset of
/// the key range of the same page in `local`, it is guaranteed to be
/// inconsistent. When this occurs, only the difference / missing keys are
/// returned in that page's [`DiffRange`].
///
/// This optimistically assumes the intersection of keys between the two pages
/// are consistent, minimising the number of keys fetched when this assumption
/// holds true. If false, the page will be marked fully inconsistent (inclusive
/// of the newly fetched keys) in the next [`diff()`] call.
///
/// This optimises for monotonic keys, recommended to minimise tree page
/// inconsistencies / keys fetched when new keys are inserted into the tree.
///
/// # Termination
///
/// A single invocation to [`diff()`] always terminates, and completes in `O(n)`
/// time and space. Inconsistent page ranges (if any) are minimised in
/// `O(n_consistent * n_inconsistent)` time and `O(n)` space.
///
/// In the absence of further external updates to either tree, this algorithm
/// terminates (leaving `local` and `peer` fully converged) and no diff is
/// returned within a finite number of sync rounds between the two trees.
///
/// If a one-way sync is performed (pulling inconsistent keys from `peer` and
/// updating `local`, but never syncing the other way around) this algorithm MAY
/// NOT terminate.
pub fn diff<'p, 'a: 'p, T, U, K>(local: T, peer: U) -> Vec<DiffRange<'p, K>>
where
T: IntoIterator<Item = PageRange<'a, K>>,
U: IntoIterator<Item = PageRange<'p, K>>,
K: PartialOrd + Ord + Debug + 'p + 'a,
{
let local = local.into_iter();
let peer = peer.into_iter();
// Any two merkle trees can be expressed as a series of overlapping page
// ranges, either consistent in content (hashes match), or inconsistent
// (hashes differ).
//
// This algorithm builds two sets of intervals - one for key ranges that are
// fully consistent between the two trees, and one for inconsistent ranges.
//
// This DiffListBuilder helps construct these lists, and merges them into a
// final, non-overlapping, deduplicated, and minimised set of ranges that
// are inconsistent between trees as described above.
let mut diff_builder = DiffListBuilder::default();
let mut local = local.peekable();
let mut peer = peer.peekable();
debug!("calculating diff");
let root = match peer.peek() {
Some(v) => v.clone(),
None => return vec![],
};
recurse_diff(&root, &mut peer, &mut local, &mut diff_builder);
diff_builder.into_diff_vec()
}
#[cfg_attr(any(test, feature = "tracing"), tracing::instrument(skip(peer, local)))]
fn recurse_subtree<'p, 'a: 'p, T, U, K>(
subtree_root: &PageRange<'p, K>,
peer: &mut Peekable<U>,
local: &mut Peekable<T>,
diff_builder: &mut DiffListBuilder<'p, K>,
) -> bool
where
T: Iterator<Item = PageRange<'a, K>>,
U: Iterator<Item = PageRange<'p, K>>,
K: PartialOrd + Ord + Debug + 'p + 'a,
{
// Recurse into the subtree, which will exit immediately if the next value
// in peer is not rooted at subtree_root (without consuming the iter value).
recurse_diff(subtree_root, peer, local, diff_builder);
// Invariant - when returning from this call, the entire subtree rooted at
// the peer_subtree_root should have been evaluated and the next peer page
// (if any) escapes the subtree.
while let Some(p) = peer.next_if(|v| subtree_root.is_superset_of(v)) {
debug!(
peer_page=?p,
"requesting unevaluated subtree page"
);
// Add all the un-evaluated peer sub-tree pages to the sync list.
diff_builder.inconsistent(p.start(), p.end());
}
debug_assert!(peer
.peek()
.map(|v| !subtree_root.is_superset_of(v))
.unwrap_or(true));
true
}
#[cfg_attr(any(test, feature = "tracing"), tracing::instrument(skip(peer, local)))]
fn recurse_diff<'p, 'a: 'p, T, U, K>(
subtree_root: &PageRange<'p, K>,
peer: &mut Peekable<U>,
local: &mut Peekable<T>,
diff_builder: &mut DiffListBuilder<'p, K>,
) where
T: Iterator<Item = PageRange<'a, K>>,
U: Iterator<Item = PageRange<'p, K>>,
K: PartialOrd + Ord + Debug + 'p + 'a,
{
// The last visited peer page, if any.
let mut last_p = None;
// Process this subtree, descending down inconsistent paths recursively, and
// iterating through the tree.
loop {
let p = match maybe_advance_within(subtree_root, peer) {
Some(v) => v,
None => {
trace!("no more peer pages in subtree");
return;
}
};
let mut l = match maybe_advance_within(&p, local) {
Some(v) => v,
None => {
// If there's no matching local page that overlaps with p, then
// there must be be one or more keys to be synced from the peer
// to populate the missing local pages.
//
// Request the range starting from the end of the last checked p
// (last_p), or the start of the subtree_root if none.
let start = last_p
.as_ref()
.map(|v: &PageRange<'_, K>| v.end())
.unwrap_or(subtree_root.start());
// And end at the next local page key, or the page end.
//
// Any pages missing between p.end and the end of this subtree
// will be added by the caller (recurse_subtree).
let end = local
.peek()
.map(|v| v.start().min(p.end()))
.unwrap_or(p.end());
if end >= start {
debug!(
peer_page=?p,
"no more local pages in subtree - requesting missing page ranges"
);
diff_builder.inconsistent(start, end);
} else {
trace!(
peer_page=?p,
"no more local pages in subtree"
);
}
return;
}
};
last_p = Some(p.clone());
// Never escape the subtree rooted at p_parent.
//
// Disable for fuzzing to test with fully random inputs.
#[cfg(not(fuzzing))]
debug_assert!(subtree_root.is_superset_of(&p));
trace!(
peer_page=?p,
local_page=?l,
"visit page"
);
// Advance the local cursor to minimise the comparable range, in turn
// minimising the sync range.
while let Some(v) = local.next_if(|v| v.is_superset_of(&p)) {
trace!(
peer_page=?p,
skip_local_page=?l,
local_page=?v,
"shrink local diff range"
);
l = v;
}
// If the bounds don't match, fetch the missing parts from the peer and
// move on to the next page.
if p.start() < l.start() {
trace!(
peer_page=?p,
local_page=?l,
"request missing page start range from peer"
);
// If a parent was missing a leaf node, it's guaranteed all
// children pages on the left-most edge will be missing a node
// too (and therefore are inconsistent) until they reach that
// leaf node.
//
// This would cause the same range to be requested by this check
// as the left-most edge is descended, so de-duplicate the
// requests.
diff_builder.inconsistent(p.start(), l.start()); // exclusive end range
// Ignore any potential inconsistency within the intersection
// between pages when fetching a range bounds mismatch.
//
// This may cause more round trips to sync this page, but optimises
// for the "leading edge" case, fetching only the range bounds and
// assuming it'll make the page consistent instead of fetching the
// whole page for what may be a single new (monotonic) key.
diff_builder.consistent(l.start().max(p.start()), l.end().min(p.end()));
}
if p.end() > l.end() {
trace!(
peer_page=?p,
local_page=?l,
"request missing page end range from peer"
);
// As above for the less-than side, dedupe end ranges for the
// right-most edge.
diff_builder.inconsistent(l.end(), p.end()); // inclusive end range
// Ignore any potential inconsistency within the intersection
// between pages when fetching a range bounds mismatch - see above.
diff_builder.consistent(l.start().max(p.start()), l.end().min(p.end()));
}
// If the page bounds do not match, it is guaranteed this page is
// inconsistent.
//
// The missing range has been requested above - begin visiting child
// pages without marking this entire page range as inconsistent this
// round.
//
// Once the full range of keys for this page is obtained, a subsequent
// diff will cause a proper consistency check of this page and fetch
// only the keys/child ranges that actually differ.
if p.start() != l.start() || p.end() != l.end() {
debug!(
peer_page=?p,
local_page=?l,
"page inconsistent due to range mismatch"
);
// Skip hash evaluation (they're definitely not equal) and avoid
// adding the full peer range as a consistent/inconsistent range -
// prefer instead to optimistically fetch only the range diff.
recurse_subtree(&p, peer, local, diff_builder);
continue;
}
if l.hash() == p.hash() {
debug!(
peer_page=?p,
local_page=?l,
"hash match - consistent page"
);
// Record this page as fully consistent.
diff_builder.consistent(p.start(), p.end());
// Skip visiting the pages in the subtree rooted at the current
// page: they're guaranteed to be consistent due to the consistent
// parent hash.
skip_subtree(&p, peer);
} else {
debug!(
peer_page=?p,
local_page=?l,
"hash mismatch"
);
diff_builder.inconsistent(p.start(), p.end());
}
// Evaluate the sub-tree, causing all the (consistent) child ranges to
// be added to the consistent list to, shrink this inconsistent range
// (or simply advancing through the subtree if this page is consistent).
recurse_subtree(&p, peer, local, diff_builder);
}
}
/// Return the next [`PageRange`] if it is part of the sub-tree rooted at
/// `parent`.
fn maybe_advance_within<'a, 'p, K, T>(
parent: &PageRange<'p, K>,
cursor: &mut Peekable<T>,
) -> Option<PageRange<'a, K>>
where
T: Iterator<Item = PageRange<'a, K>>,
K: PartialOrd + 'a,
{
if cursor
.peek()
.map(|p| !parent.overlaps(p))
.unwrap_or_default()
{
return None;
}
cursor.next()
}
/// Advance `iter` to the next page that does not form part of the subtree
/// rooted at the given `subtree_root`.
#[inline(always)]
fn skip_subtree<'p, T, K>(subtree_root: &PageRange<'p, K>, iter: &mut Peekable<T>)
where
T: Iterator<Item = PageRange<'p, K>>,
K: PartialOrd + Ord + Debug + 'p,
{
while iter.next_if(|v| subtree_root.is_superset_of(v)).is_some() {}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use assert_matches::assert_matches;
use proptest::prelude::*;
use super::*;
use crate::{
digest::PageDigest,
test_util::{IntKey, Node},
};
const fn new_digest(lsb: u8) -> PageDigest {
PageDigest::new([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, lsb])
}
macro_rules! test_page_is_superset_of {
(
$name:ident,
a = [$a_start:expr, $a_end:expr],
b = [$b_start:expr, $b_end:expr],
a_is_parent = $want:expr // Should a.contains(b) be true?
) => {
paste::paste! {
#[test]
fn [<test_page_is_superset_of_ $name>]() {
let a = PageRange::new(&$a_start, $a_end, new_digest(42));
let b = PageRange::new(&$b_start, $b_end, new_digest(42));
assert!(a.is_superset_of(&b) == $want);
// All pages that are a superset, also overlap.
if a.is_superset_of(&b) {
assert!(a.overlaps(&b));
assert!(b.overlaps(&a));
}
}
}
};
}
test_page_is_superset_of!(full, a = [1, &10], b = [2, &9], a_is_parent = true);
test_page_is_superset_of!(start, a = [2, &10], b = [1, &9], a_is_parent = false);
test_page_is_superset_of!(end, a = [1, &8], b = [2, &9], a_is_parent = false);
test_page_is_superset_of!(outside, a = [1, &10], b = [0, &11], a_is_parent = false);
// Tests below model this tree:
//
// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐
// ┌───┬───┬───────┐
// │ │ 7 │11 │ high │ Level 2 │
// └───┴───┴───────┘
// └ ─ ┬ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ─ ┘
// ┌────┘ └─────┐
// ▼ ▼
// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ┐
// ┌───┬───┬───┐ │ ┌───┐
// │ │ 3 │ 4 │ 6 │Level 1 │ │15 │ Level 1 │
// └───┴───┴───┘ │ └───┘
// └ ─ ┬ ─ ─ ─ ┬ ─ ─ ─ ─ ─ └ ─ ─ ─ ─ ─ ─ ─ ┘
// ┌───┘ └─────┐
// ▼ ▼
// ┌ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ┐
// ┌───┐ ┌───┐
// │ │ 2 │ Level 0 │ │ │ 5 │ Level 0 │
// └───┘ └───┘
// └ ─ ─ ─ ─ ─ ─ ─ ┘ └ ─ ─ ─ ─ ─ ─ ─ ┘
#[test]
fn test_no_diff() {
enable_logging!();
let local = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let peer = local.clone();
assert_matches!(diff(local, peer).as_slice(), []);
}
#[test]
fn test_diff_peer_missing_last_page() {
enable_logging!();
let local = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let mut peer = local.clone();
// Remove the last page
let _ = peer.pop().unwrap();
// Invalidate the root/parent and update the peer root range to reflect
// the missing last page
peer[0] = PageRange::new(peer[0].start(), &11, new_digest(42));
// Nothing to ask for - the peer is behind
assert_matches!(diff(local, peer).as_slice(), []);
}
#[test]
fn test_diff_local_missing_last_page() {
enable_logging!();
let mut local = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let peer = local.clone();
// Remove the last page
let _ = local.pop().unwrap();
// Invalidate the root/parent and update the local root range to reflect
// the missing last page
local[0] = PageRange::new(local[0].start(), &11, new_digest(42));
assert_matches!(
diff(local, peer).as_slice(),
[DiffRange { start: 11, end: 15 }]
);
}
/// The peer is missing a child page.
///
/// This means the local tree has an inconsistent hash for the
/// missing-child's parent, causing it to request the full range between the
/// end of it and the next consistent hash (an empty sync range).
#[test]
fn test_diff_peer_missing_leaf_page() {
enable_logging!();
let local = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let peer = vec![
PageRange::new(
&3, // Differs
&15,
new_digest(42), // Differs
),
PageRange::new(
&3, // Differs
&6,
new_digest(43), // Differs
),
//
// No page containing 2 at level 0
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
assert_matches!(diff(local, peer).as_slice(), []);
}
#[test]
fn test_diff_local_missing_leaf_page() {
enable_logging!();
let local = vec![
PageRange::new(
&3, // Differs
&15,
new_digest(42), // Differs
),
PageRange::new(
&3, // Differs
&6,
new_digest(43), // Differs
),
//
// No page containing 2 at level 0
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let peer = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
assert_matches!(
diff(local, peer).as_slice(),
[DiffRange { start: 2, end: 3 }]
);
}
#[test]
fn test_diff_local_missing_subtree() {
enable_logging!();
let local = vec![
PageRange::new(
&3,
&15,
new_digest(42), // Differs
),
PageRange::new(&15, &15, new_digest(5)),
];
let peer = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
assert_matches!(
diff(local, peer).as_slice(),
[DiffRange { start: 2, end: 6 }]
);
}
#[test]
fn test_diff_peer_missing_subtree() {
enable_logging!();
let local = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let peer = vec![
PageRange::new(
&3,
&15,
new_digest(42), // Differs
),
PageRange::new(&15, &15, new_digest(5)),
];
assert_matches!(diff(local, peer).as_slice(), []);
}
#[test]
fn test_diff_leaf_page_hash() {
enable_logging!();
let peer = vec![
PageRange::new(
&2,
&15,
new_digest(42), // Differs
),
PageRange::new(
&2,
&6,
new_digest(42), // Differs
),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let local = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
assert_matches!(
diff(local, peer).as_slice(),
// Range (3,6) is optimal, but this gets the job done.
[DiffRange { start: 2, end: 15 }]
);
}
#[test]
fn test_diff_peer_extra_key_last_page() {
enable_logging!();
let local = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let mut peer = local.clone();
let end = peer.last_mut().unwrap();
*end = PageRange::new(end.start(), &16, new_digest(42));
// Root hash differs to reflect differing child
peer[0] = PageRange::new(peer[0].start(), &16, new_digest(42));
assert_matches!(
diff(local, peer).as_slice(),
[DiffRange { start: 15, end: 16 }]
);
}
#[test]
fn test_diff_root_page_hash() {
enable_logging!();
let local = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let mut peer = local.clone();
// Root hash differs due to added key 8
peer[0] = PageRange::new(peer[0].start(), peer[0].end(), new_digest(42));
// Without the reduce_sync_range optimisation, this root inconsistency
// would cause a fetch against the whole tree (start: 2, end: 15).
//
// Instead, the known-good sub-tree pages can be removed from the sync
// range.
assert_matches!(
diff(local, peer).as_slice(),
[DiffRange { start: 6, end: 15 }]
);
}
#[test]
fn test_diff_peer_intermediate_bounds() {
enable_logging!();
// This breaks the convention of the same tree being used, and instead
// pushes 7 down into level 1.
//
// It appears in the peer only.
let local = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let mut peer = local.clone();
// Root hash differs due to added key 8
peer[1] = PageRange::new(peer[1].start(), &7, new_digest(42));
peer[0] = PageRange::new(peer[0].start(), peer[0].end(), new_digest(42));
assert_matches!(
diff(local, peer).as_slice(),
[DiffRange { start: 6, end: 15 }]
);
}
#[test]
fn test_diff_peer_intermediate_bounds_and_inconsistent_subtree_leaf() {
enable_logging!();
// This breaks the convention of the same tree being used, and instead
// pushes 7 down into level 1.
//
// It appears in the peer only, and additionally the value of 2 is
// modified.
//
// This checks if discrepancies in children are still discovered, even
// when bounds of parents differ. Ideally all discrepancies would be
// discovered the first time round, but in order to optimise for
// "leading edge" key additions and optimistically minimise sync ranges,
// this is not the case - instead the range diff is resolved first, and
// after, the page inconsistency for 2 is resolved via a second sync.
let local = vec![
PageRange::new(&2, &15, new_digest(1)),
PageRange::new(&2, &6, new_digest(2)),
PageRange::new(&2, &2, new_digest(3)),
PageRange::new(&5, &5, new_digest(4)),
PageRange::new(&15, &15, new_digest(5)),
];
let mut peer = local.clone();
// Extend key range of 1st child to 2-6 to 2-7
peer[1] = PageRange::new(peer[1].start(), &7, new_digest(42));
// Key 2 value change
peer[2] = PageRange::new(peer[2].start(), peer[2].end(), new_digest(42));
// Root hash
peer[0] = PageRange::new(peer[0].start(), peer[0].end(), new_digest(42));
assert_matches!(
diff(local, peer.clone()).as_slice(),
[DiffRange { start: 6, end: 15 }]
);
let mut local = peer.clone();
// Only 2 should remain different - reset the hash.
local[2] = PageRange::new(local[2].start(), local[2].end(), new_digest(3));
peer[1] = PageRange::new(peer[1].start(), peer[1].end(), new_digest(2));
peer[0] = PageRange::new(peer[0].start(), peer[0].end(), new_digest(1));
// 2, 15 because the root page is inconsistent and there's no consistent
// pages that shrink the range.
assert_matches!(
diff(local, peer).as_slice(),
[DiffRange { start: 2, end: 15 }]
);
}
/// Construct a test where a child page is inconsistent and should not call
/// recurse_diff() as there's no subtree to evaluate.
///
/// Additionally another child page is also inconsistent but skipped because
/// the local node has a larger key range than the peer.
#[test]
fn test_child_page_inconsistent_no_subtree_recurse() {
enable_logging!();
let local = vec![
PageRange::new(&0, &17995215864353464453_usize, new_digest(1)),
PageRange::new(&0, &1331283967702353742, new_digest(2)),
PageRange::new(
&2425302987964992968,
&3632803506728089373, // Larger key range than peer
new_digest(3),
),
PageRange::new(
&4706903583207578752, // Shorter key range than peer (missing first key)
&4707132771120484774,
new_digest(4),
),
PageRange::new(&17995215864353464453, &17995215864353464453, new_digest(5)),
];
let peer = vec![
PageRange::new(
&0,
&17995215864353464453_usize,
new_digest(11), // Differs
),
PageRange::new(&0, &1331283967702353742, new_digest(2)),
PageRange::new(
&2425302987964992968,
&3541571342636567061,
new_digest(13), // Differs
),
PageRange::new(
&3632803506728089373,
&4707132771120484774,
new_digest(14), // Differs
),
PageRange::new(&17995215864353464453, &17995215864353464453, new_digest(5)),
];
assert_matches!(
diff(local, peer).as_slice(),
[
DiffRange {
start: 1331283967702353742,
end: 4706903583207578752
},
DiffRange {
start: 4707132771120484774,
end: 17995215864353464453
}
]
);
}
// If the bounds of the peer page exceed that of the local page on both
// sides, make sure both sides are requested to minimise round trips.
#[test]
fn test_diff_peer_bounds_larger_both_sides() {
enable_logging!();
let local = vec![PageRange::new(&2, &15, new_digest(1))];
let peer = vec![PageRange::new(&1, &42, new_digest(1))];
assert_matches!(
diff(local, peer).as_slice(),
[
DiffRange { start: 1, end: 2 },
DiffRange { start: 15, end: 42 },
]
);
}
#[test]
fn test_diff_empty_peer() {
enable_logging!();
let peer = vec![];
let local = vec![PageRange::new(&1, &42, new_digest(1))];
assert_matches!(diff(local, peer).as_slice(), []);
}
#[test]
fn test_diff_empty_local() {
enable_logging!();
let local = vec![];
let peer = vec![PageRange::new(&1, &42, new_digest(1))];
assert_matches!(
diff(local, peer).as_slice(),
[DiffRange { start: 1, end: 42 }]
);
}
#[test]
fn test_trivial_sync_differing_values() {
enable_logging!();
let mut a = Node::default();
a.upsert(IntKey::new(42), 1);
let mut b = Node::default();
b.upsert(IntKey::new(42), 2);
assert_eq!(sync_round(&mut a, &mut b), 1);
assert_eq!(sync_round(&mut a, &mut b), 0);
assert_eq!(sync_round(&mut a, &mut b), 0);
assert_eq!(sync_round(&mut a, &mut b), 0);
assert_eq!(a, b);
}
#[test]
fn test_trivial_sync_differing_keys() {
enable_logging!();
let mut a = Node::default();
a.upsert(IntKey::new(42), 1);
let mut b = Node::default();
b.upsert(IntKey::new(24), 1);
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b");
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b");
assert_eq!(sync_round(&mut b, &mut a), 1, "b => a");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a");
assert_eq!(sync_round(&mut a, &mut b), 2, "a => b");
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a");
assert_eq!(a, b);
}
/// Test the case where the local root page is a superset of the peer.
///
/// This is an example of the peer needing to sync in order for the local
/// node to sync the missing peer keys. Because the range bounds do not
/// match, the peer -> local sync is not attempted until the peer has
/// obtained the local keys, making the page ranges match, allowing the page
/// to be peer -> local sync to complete.
#[test]
fn test_local_superset_of_peer() {
enable_logging!();
let mut a = Node::default();
a.upsert(IntKey::new(244067356035258375), 0);
let mut b = Node::default();
b.upsert(IntKey::new(0), 0);
b.upsert(IntKey::new(2750749774246655017), 0);
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 1");
assert_eq!(sync_round(&mut b, &mut a), 2, "b => a run 1");
assert_eq!(sync_round(&mut a, &mut b), 3, "a => b run 2");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 2");
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 3");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 3");
assert_eq!(a, b);
}
// Construct a test with a level 2 root node that is absent in the local
// tree, but who's presence does not affect the min/max ranges of the root
// (in essence, the peer has an extra level / key than local, but ranges
// match).
#[test]
fn test_root_single_node_covered() {
enable_logging!();
// 0: 2356959391436047
// 1: 1827784367256368463
// 2: 8090434540329235177
// 3: 8090434540343951592
let mut a = Node::default();
a.upsert(IntKey::new(2356959391436047), 0);
a.upsert(IntKey::new(8090434540343951592), 0);
// 2356959391436047 is lt subtree of 8090434540343951592
// pull two subtrees:
// * start range mismatch for 0 -> 1, 2 -> 3
// * end range mismatch
// this should complete B, but doesn't include the value in between the
// pulled ranges (1, 2) which is a level higher.
let mut b = Node::default();
b.upsert(IntKey::new(1827784367256368463), 0);
b.upsert(IntKey::new(8090434540329235177), 0);
assert_eq!(sync_round(&mut a, &mut b), 2, "a => b run 1");
assert_eq!(sync_round(&mut b, &mut a), 4, "b => a run 1");
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 2");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 2");
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 3");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 3");
assert_eq!(a, b);
}
/// One node has a tree range that is a superset of the other.
#[test]
fn test_superset() {
enable_logging!();
// 0
// 1479827427186972579
// 6895546778622627890
// 8090434540329235177
let mut a = Node::default();
a.upsert(IntKey::new(1479827427186972579), 0);
a.upsert(IntKey::new(6895546778622627890), 0);
let mut b = Node::default();
b.upsert(IntKey::new(0), 0);
b.upsert(IntKey::new(8090434540329235177), 0);
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 1");
// B contains a range superset, so it believes it has everything
// already.
assert_eq!(sync_round(&mut b, &mut a), 2, "b => a run 1");
// A pulls the missing keys within B.
assert_eq!(sync_round(&mut a, &mut b), 4, "a => b run 2");
// Subsequently B pulls all the keys from A (as B's existing keys were
// within the A's range now being fetched due to a hash mismatch).
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 2");
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 3");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 3");
assert_eq!(a, b);
}
/// Construct a test where both roots contain a single key, both with
/// differing values - each node needs to pull their peer's root key.
#[test]
fn test_both_roots_single_differing_node() {
enable_logging!();
let mut a = Node::default();
a.upsert(IntKey::new(3541571342636567061), 0);
a.upsert(IntKey::new(4706901308862946071), 0);
a.upsert(IntKey::new(4706903583207578752), 0);
let mut b = Node::default();
b.upsert(IntKey::new(3632796868130453657), 0);
b.upsert(IntKey::new(3632803506728089373), 0);
b.upsert(IntKey::new(4707132771120484774), 0);
for _i in 0..100 {
sync_round(&mut a, &mut b);
sync_round(&mut b, &mut a);
}
assert_eq!(sync_round(&mut a, &mut b), 0);
assert_eq!(sync_round(&mut b, &mut a), 0);
assert_eq!(a, b);
}
/// Ensure only the "leading edge" missing keys are fetched - the common
/// case for new monotonic keys added to a tree.
#[test]
fn test_leading_edge_range_sync() {
enable_logging!();
let mut a = Node::default();
a.upsert(IntKey::new(1), 0);
a.upsert(IntKey::new(2), 0);
a.upsert(IntKey::new(3), 0);
a.upsert(IntKey::new(4), 0);
a.upsert(IntKey::new(5), 0);
a.upsert(IntKey::new(6), 0);
a.upsert(IntKey::new(7), 0);
a.upsert(IntKey::new(8), 0);
a.upsert(IntKey::new(9), 0);
a.upsert(IntKey::new(10), 0);
let mut b = Node::default();
b.upsert(IntKey::new(1), 0);
b.upsert(IntKey::new(2), 0);
b.upsert(IntKey::new(3), 0);
b.upsert(IntKey::new(4), 0);
b.upsert(IntKey::new(5), 0);
b.upsert(IntKey::new(6), 0);
assert_eq!(sync_round(&mut a, &mut b), 5, "a => b run 1");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 1");
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 2");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 2");
assert_eq!(a, b);
}
#[test]
fn test_prop_fail() {
enable_logging!();
let mut a = Node::default();
a.upsert(IntKey::new(6416642709185293041), 0);
a.upsert(IntKey::new(6823694781896678135), 0);
a.upsert(IntKey::new(6823727308570268642), 0);
a.upsert(IntKey::new(16590198516108651936), 0);
let mut b = Node::default();
b.upsert(IntKey::new(0), 0);
b.upsert(IntKey::new(10417693944069773430), 0);
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 1");
assert_eq!(sync_round(&mut b, &mut a), 1, "b => a run 1");
assert_eq!(sync_round(&mut a, &mut b), 1, "a => b run 2");
assert_eq!(sync_round(&mut b, &mut a), 3, "b => a run 2");
assert_eq!(sync_round(&mut a, &mut b), 6, "a => b run 3");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 3");
assert_eq!(sync_round(&mut a, &mut b), 0, "a => b run 4");
assert_eq!(sync_round(&mut b, &mut a), 0, "b => a run 4");
assert_eq!(a, b);
}
const MAX_NODE_KEYS: usize = 100;
prop_compose! {
/// Yield a set of keys covering the full u64 range, driving randomness
/// of tree node placements instead of collisions.
fn arbitrary_large_key_set()(kv_pairs in prop::collection::hash_set(
(any::<u64>(), any::<u64>()),
0..MAX_NODE_KEYS)
) -> HashSet<(u64, u64)> {
kv_pairs
}
}
prop_compose! {
/// Yield a small set of keys, to arbitrarily increase the collision
/// count between peers by having them generate the same keys with
/// differing values.
fn arbitrary_small_key_set()(kv_pairs in prop::collection::hash_set(
(0_u64..50, 0_u64..50),
0..MAX_NODE_KEYS)
) -> HashSet<(u64, u64)> {
kv_pairs
}
}
prop_compose! {
/// Yield an arbitrary [`Node`] containing up to 100 random key/value
/// pairs.
fn arbitrary_node()(kv_pairs in prop_oneof![
arbitrary_large_key_set(),
arbitrary_small_key_set()
]) -> Node {
let mut node = Node::default();
for (k, v) in kv_pairs {
node.upsert(IntKey::new(k), v);
}
node
}
}
proptest! {
/// Perform a synchronisation test that asserts two arbitrary trees
/// (potentially containing no overlapping key/values) converge to the
/// same state after repeated synchronisation rounds.
#[test]
fn prop_sync_trees(mut a in arbitrary_node(), mut b in arbitrary_node()) {
enable_logging!();
// Bound the number of sync rounds needed to converge to at most 1
// key being sync'd per round.
let max_count = a.key_count() + b.key_count() + 1;
let mut count = 0;
loop {
// Perform a two-way sync.
let a_to_b = sync_round(&mut a, &mut b);
let b_to_a = sync_round(&mut b, &mut a);
if a_to_b == 0 && b_to_a == 0 {
// If no progress was made, stop syncing.
break;
}
// Syncing should never pull more than the full peer tree.
assert!(a_to_b <= a.key_count());
assert!(b_to_a <= b.key_count());
count += 1;
if count >= max_count {
panic!("failed to sync a => b in round limit");
}
}
// Ensure the nodes are now consistent.
assert_eq!(a, b);
}
/// Invariant: page ranges yielded from an [`OwnedPageRange`] are
/// identical to those from the borrowed [`PageRange`] equivalent.
#[test]
fn prop_owned_page_range_equivalent(mut a in arbitrary_node()) {
enable_logging!();
let _ = a.root_hash();
let a_ref = a.serialise_page_ranges().unwrap();
let a_owned = PageRangeSnapshot::from(a_ref.clone());
let a_owned_iter = a_owned.iter();
let a_ref_iter = a_ref.iter().cloned();
assert!(a_owned_iter.eq(a_ref_iter));
}
}
/// Perform a single sync round, pulling differences from a into b.
///
/// Returns the number of fetched pages.
fn sync_round(from: &mut Node, to: &mut Node) -> usize {
// First sync b from a, applying the "a is always right" merge rule.
let a2 = from.clone();
let a_tree = from.page_ranges();
let mut to2 = to.clone();
let want = diff(to2.page_ranges(), a_tree);
let mut count = 0;
for range in want {
for (k, v) in a2.key_range_iter(range.start()..=range.end()) {
to.upsert(k.clone(), *v);
count += 1;
}
}
count
}
}