use serde::ser::SerializeStruct;
use serde::{Deserialize, Serialize, Serializer};
use super::MerkleHash;
#[cfg(debug_assertions)]
use super::aggregated_hashes::aggregated_node_hash;
use super::aggregated_hashes::{
MAX_GROUP_SIZE, MIN_GROUP_SIZE, is_natural_cut, merged_hash_of_sequence, next_merge_cut,
};
use crate::error::CoreError;
type Node = (MerkleHash, u64);
#[derive(Serialize, Deserialize)]
struct HexNode {
#[serde(with = "super::data_hash::hex::serde")]
hash: MerkleHash,
size: u64,
}
#[inline]
fn next_cut_unbounded(hashes: &[Node]) -> Option<usize> {
for (i, &(h, _)) in hashes.iter().enumerate() {
if is_natural_cut(h) {
return Some(i);
}
}
None
}
#[inline]
fn prev_cut_unbounded(hashes: &[Node]) -> Option<usize> {
(0..hashes.len()).rev().find(|&i| is_natural_cut(hashes[i].0))
}
pub fn find_stable_start(nodes: &[Node]) -> Option<usize> {
if nodes.len() < MIN_GROUP_SIZE + 1 {
return None;
}
let valid_gap = |gap: usize| gap > MIN_GROUP_SIZE && gap < MAX_GROUP_SIZE - 1;
let mut prev_prev_cut: Option<usize> = None;
let mut prev_cut: Option<usize> = None;
let mut pos = 0;
while pos < nodes.len() {
if let Some(offset) = next_cut_unbounded(&nodes[pos..]) {
let cut_pos = pos + offset;
if let Some(pc) = prev_cut
&& valid_gap(cut_pos - pc)
&& let Some(ppc) = prev_prev_cut
&& valid_gap(pc - ppc)
{
return Some(pc + 1);
}
prev_prev_cut = prev_cut;
prev_cut = Some(cut_pos);
pos = cut_pos + 1;
} else {
break;
}
}
None
}
pub fn find_stable_end(nodes: &[Node]) -> Option<usize> {
if nodes.len() < MIN_GROUP_SIZE + 1 {
return None;
}
let valid_gap = |gap: usize| gap > MIN_GROUP_SIZE && gap < MAX_GROUP_SIZE - 1;
let mut next_next_cut: Option<usize> = None;
let mut next_cut: Option<usize> = None;
let mut pos = nodes.len();
while pos > 0 {
if let Some(offset) = prev_cut_unbounded(&nodes[..pos]) {
let cut_pos = offset;
if let Some(nc) = next_cut
&& valid_gap(nc - cut_pos)
&& let Some(nnc) = next_next_cut
&& valid_gap(nnc - nc)
{
return Some(nc + 1);
}
next_next_cut = next_cut;
next_cut = Some(cut_pos);
pos = cut_pos;
} else {
break;
}
}
None
}
#[derive(Clone, Debug)]
pub struct MerkleHashSubtree {
nodes: Vec<Node>,
levels: Vec<(usize, usize)>,
left_offsets: Vec<usize>,
right_offsets: Vec<usize>,
at_start: bool,
at_end: bool,
#[cfg(debug_assertions)]
debug_chunks: Vec<Node>,
}
#[inline]
fn compute_offsets(levels: &[(usize, usize)]) -> (Vec<usize>, Vec<usize>) {
let n = levels.len();
let mut left_offsets = Vec::with_capacity(n);
let mut right_offsets = Vec::with_capacity(n);
let mut cumulative_left: usize = 0;
for &(lc, _) in levels {
left_offsets.push(cumulative_left);
cumulative_left += lc;
}
let total_left = cumulative_left;
let mut cumulative_right_after: usize = levels.iter().map(|&(_, rc)| rc).sum();
for &(_, rc) in levels {
cumulative_right_after -= rc;
right_offsets.push(total_left + cumulative_right_after);
}
(left_offsets, right_offsets)
}
impl Serialize for MerkleHashSubtree {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
let human_readable = serializer.is_human_readable();
let mut state = serializer.serialize_struct("MerkleHashSubtree", 4)?;
if human_readable {
let hex_nodes: Vec<HexNode> = self.nodes.iter().map(|&(hash, size)| HexNode { hash, size }).collect();
state.serialize_field("nodes", &hex_nodes)?;
} else {
state.serialize_field("nodes", &self.nodes)?;
}
state.serialize_field("levels", &self.levels)?;
state.serialize_field("at_start", &self.at_start)?;
state.serialize_field("at_end", &self.at_end)?;
state.end()
}
}
impl<'de> Deserialize<'de> for MerkleHashSubtree {
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let is_human_readable = deserializer.is_human_readable();
#[derive(Deserialize)]
struct HexRaw {
nodes: Vec<HexNode>,
levels: Vec<(usize, usize)>,
at_start: bool,
at_end: bool,
}
#[derive(Deserialize)]
struct BinRaw {
nodes: Vec<Node>,
levels: Vec<(usize, usize)>,
at_start: bool,
at_end: bool,
}
let (nodes, levels, at_start, at_end) = if is_human_readable {
let raw = HexRaw::deserialize(deserializer)?;
let nodes = raw.nodes.into_iter().map(|hn| (hn.hash, hn.size)).collect();
(nodes, raw.levels, raw.at_start, raw.at_end)
} else {
let raw = BinRaw::deserialize(deserializer)?;
(raw.nodes, raw.levels, raw.at_start, raw.at_end)
};
let (left_offsets, right_offsets) = compute_offsets(&levels);
Ok(MerkleHashSubtree {
nodes,
levels,
left_offsets,
right_offsets,
at_start,
at_end,
#[cfg(debug_assertions)]
debug_chunks: Vec::new(),
})
}
}
impl MerkleHashSubtree {
pub fn from_chunks(at_start: bool, chunks: &[Node], at_end: bool) -> Self {
let (nodes, levels) = build_hump(chunks, at_start, at_end);
let (left_offsets, right_offsets) = compute_offsets(&levels);
let result = Self {
nodes,
levels,
left_offsets,
right_offsets,
at_start,
at_end,
#[cfg(debug_assertions)]
debug_chunks: chunks.to_vec(),
};
#[cfg(debug_assertions)]
result.verify_invariants();
result
}
pub fn num_nodes(&self) -> usize {
self.nodes.len()
}
pub fn num_levels(&self) -> usize {
self.levels.len()
}
pub fn is_empty(&self) -> bool {
self.nodes.is_empty()
}
#[inline]
fn left_at(&self, level: usize) -> &[Node] {
let start = self.left_offsets[level];
&self.nodes[start..start + self.levels[level].0]
}
#[inline]
fn right_at(&self, level: usize) -> &[Node] {
let start = self.right_offsets[level];
&self.nodes[start..start + self.levels[level].1]
}
pub fn merge_into(&mut self, other: &MerkleHashSubtree) -> crate::error::Result<()> {
let mut buffers = CHRMergeBuffers::new();
self.merge_into_impl(other, &mut buffers)
}
pub fn merge(ranges: &[MerkleHashSubtree]) -> crate::error::Result<MerkleHashSubtree> {
match ranges.len() {
0 => Ok(MerkleHashSubtree::from_chunks(true, &[], true)),
1 => Ok(ranges[0].clone()),
_ => {
let mut result = ranges[0].clone();
let mut buffers = CHRMergeBuffers::new();
for range in &ranges[1..] {
result.merge_into_impl(range, &mut buffers)?;
}
Ok(result)
},
}
}
fn merge_into_impl(&mut self, other: &MerkleHashSubtree, buf: &mut CHRMergeBuffers) -> crate::error::Result<()> {
if self.at_end {
return Err(CoreError::InvalidArguments);
}
if other.at_start {
return Err(CoreError::InvalidArguments);
}
let combined_at_start = self.at_start;
let combined_at_end = other.at_end;
let max_levels = self.num_levels().max(other.num_levels());
let estimated_total = self.num_nodes() + other.num_nodes() + 16;
let estimated_levels = max_levels + 2;
buf.out_left_nodes.clear();
buf.out_left_nodes.reserve(estimated_total);
buf.out_right_nodes.clear();
buf.out_right_nodes.reserve(estimated_total / 2);
buf.levels.clear();
buf.levels.reserve(estimated_levels);
buf.carry.clear();
buf.promoted.clear();
let mut all_lefts_empty = true;
let mut all_rights_empty = true;
for level in 0..max_levels {
let lr_left = if level < self.num_levels() {
self.left_at(level)
} else {
&[]
};
let lr_right = if level < self.num_levels() {
self.right_at(level)
} else {
&[]
};
let rr_left = if level < other.num_levels() {
other.left_at(level)
} else {
&[]
};
let rr_right = if level < other.num_levels() {
other.right_at(level)
} else {
&[]
};
buf.full.clear();
buf.full.extend_from_slice(lr_left);
buf.full.extend_from_slice(lr_right);
buf.full.extend_from_slice(&buf.carry);
buf.full.extend_from_slice(rr_left);
buf.full.extend_from_slice(rr_right);
let level_at_start = combined_at_start && all_lefts_empty;
let level_at_end = combined_at_end && all_rights_empty;
buf.promoted.clear();
let (prefix_len, suffix_len) =
split_and_promote(&buf.full, level_at_start, level_at_end, &mut buf.promoted);
buf.out_left_nodes.extend_from_slice(&buf.full[..prefix_len]);
buf.out_right_nodes.extend_from_slice(&buf.full[buf.full.len() - suffix_len..]);
if prefix_len > 0 {
all_lefts_empty = false;
}
if suffix_len > 0 {
all_rights_empty = false;
}
buf.levels.push((prefix_len, suffix_len));
std::mem::swap(&mut buf.carry, &mut buf.promoted);
}
while !buf.carry.is_empty() {
if buf.carry.len() == 1 {
buf.out_left_nodes.extend_from_slice(&buf.carry);
buf.levels.push((buf.carry.len(), 0));
buf.carry.clear();
} else {
let at_start_here = combined_at_start && all_lefts_empty;
let at_end_here = combined_at_end && all_rights_empty;
buf.promoted.clear();
let (prefix_len, suffix_len) =
split_and_promote(&buf.carry, at_start_here, at_end_here, &mut buf.promoted);
buf.out_left_nodes.extend_from_slice(&buf.carry[..prefix_len]);
buf.out_right_nodes
.extend_from_slice(&buf.carry[buf.carry.len() - suffix_len..]);
if prefix_len > 0 {
all_lefts_empty = false;
}
if suffix_len > 0 {
all_rights_empty = false;
}
buf.levels.push((prefix_len, suffix_len));
std::mem::swap(&mut buf.carry, &mut buf.promoted);
}
}
while buf.levels.len() > 1 && buf.levels.last() == Some(&(0, 0)) {
buf.levels.pop();
}
self.nodes.clear();
self.nodes.reserve(buf.out_left_nodes.len() + buf.out_right_nodes.len());
self.nodes.extend_from_slice(&buf.out_left_nodes);
{
let mut end = buf.out_right_nodes.len();
for &(_, rc) in buf.levels.iter().rev() {
let start = end - rc;
self.nodes.extend_from_slice(&buf.out_right_nodes[start..end]);
end = start;
}
}
self.levels.clear();
self.levels.extend_from_slice(&buf.levels);
let (left_offsets, right_offsets) = compute_offsets(&self.levels);
self.left_offsets = left_offsets;
self.right_offsets = right_offsets;
self.at_start = combined_at_start;
self.at_end = combined_at_end;
#[cfg(debug_assertions)]
{
self.debug_chunks.extend_from_slice(&other.debug_chunks);
}
#[cfg(debug_assertions)]
self.verify_invariants();
Ok(())
}
pub fn final_hash(&self) -> Option<MerkleHash> {
if !self.at_start || !self.at_end {
return None;
}
if self.nodes.is_empty() {
return Some(MerkleHash::default());
}
let top = self.levels.len() - 1;
debug_assert!(
self.levels.iter().take(top).all(|(l, r)| *l == 0 && *r == 0),
"Fully-closed hump should have empty lower levels, but found: {:?}",
&self.levels[..top]
);
let top_left = self.left_at(top);
debug_assert!(self.right_at(top).is_empty());
debug_assert_eq!(top_left.len(), 1);
Some(top_left[0].0)
}
#[cfg(debug_assertions)]
fn verify_invariants(&self) {
if !self.at_start || !self.at_end {
return;
}
if self.debug_chunks.is_empty() {
return;
}
let expected = aggregated_node_hash(&self.debug_chunks);
let got = self.final_hash().expect("at_start and at_end both true");
assert_eq!(
expected,
got,
"MerkleHashSubtree invariant: final_hash mismatch.\n\
Expected: {expected:x}\nGot: {got:x}\n\
Num debug_chunks: {}, num_nodes: {}",
self.debug_chunks.len(),
self.nodes.len(),
);
}
}
struct CHRMergeBuffers {
out_left_nodes: Vec<Node>,
out_right_nodes: Vec<Node>,
levels: Vec<(usize, usize)>,
carry: Vec<Node>,
full: Vec<Node>,
promoted: Vec<Node>,
}
impl CHRMergeBuffers {
fn new() -> Self {
Self {
out_left_nodes: Vec::new(),
out_right_nodes: Vec::new(),
levels: Vec::new(),
carry: Vec::new(),
full: Vec::new(),
promoted: Vec::new(),
}
}
}
fn build_hump(chunks: &[Node], at_start: bool, at_end: bool) -> (Vec<Node>, Vec<(usize, usize)>) {
let est_side = MAX_GROUP_SIZE * 12;
let mut out_left_nodes: Vec<Node> = Vec::with_capacity(est_side);
let mut out_right_nodes: Vec<Node> = Vec::with_capacity(est_side);
let mut levels: Vec<(usize, usize)> = Vec::with_capacity(12);
let mut current = chunks.to_vec();
let mut promoted: Vec<Node> = Vec::new();
let mut all_lefts_empty = true;
let mut all_rights_empty = true;
loop {
let level_at_start = at_start && all_lefts_empty;
let level_at_end = at_end && all_rights_empty;
promoted.clear();
let (prefix_len, suffix_len) = split_and_promote(¤t, level_at_start, level_at_end, &mut promoted);
out_left_nodes.extend_from_slice(¤t[..prefix_len]);
out_right_nodes.extend_from_slice(¤t[current.len() - suffix_len..]);
if prefix_len > 0 {
all_lefts_empty = false;
}
if suffix_len > 0 {
all_rights_empty = false;
}
levels.push((prefix_len, suffix_len));
if promoted.is_empty() {
break;
}
if promoted.len() == 1 {
out_left_nodes.extend_from_slice(&promoted);
levels.push((promoted.len(), 0));
break;
}
std::mem::swap(&mut current, &mut promoted);
}
let mut nodes = out_left_nodes;
{
let mut end = out_right_nodes.len();
for &(_, rc) in levels.iter().rev() {
let start = end - rc;
nodes.extend_from_slice(&out_right_nodes[start..end]);
end = start;
}
}
(nodes, levels)
}
#[inline]
fn split_and_promote(nodes: &[Node], at_start: bool, at_end: bool, promoted: &mut Vec<Node>) -> (usize, usize) {
if nodes.len() <= 1 {
return (nodes.len(), 0);
}
let stable_start = if at_start {
0
} else {
match find_stable_start(nodes) {
Some(idx) => idx,
None => return (nodes.len(), 0),
}
};
let stable_end = if at_end {
nodes.len()
} else {
match find_stable_end(&nodes[stable_start..]) {
Some(idx) => stable_start + idx,
None => return (nodes.len(), 0),
}
};
if stable_start >= stable_end {
return (nodes.len(), 0);
}
let prefix_len = stable_start;
let suffix_len = nodes.len() - stable_end;
let mergeable = &nodes[stable_start..stable_end];
let mut pos = 0;
while pos < mergeable.len() {
let remaining = &mergeable[pos..];
let cut_len = next_merge_cut(remaining);
promoted.push(merged_hash_of_sequence(&remaining[..cut_len]));
pos += cut_len;
}
(prefix_len, suffix_len)
}
#[cfg(test)]
mod tests {
use rand::rngs::SmallRng;
use rand::{RngExt, SeedableRng};
use super::*;
use crate::merklehash::xorb_hash;
fn random_chunks(rng: &mut SmallRng, n: usize) -> Vec<Node> {
(0..n)
.map(|_| {
let seed = rng.random::<u64>();
(MerkleHash::random_from_seed(seed), rng.random_range(100..10000))
})
.collect()
}
fn compute_cuts_from(nodes: &[Node], start: usize) -> Vec<usize> {
let mut cuts = Vec::new();
let mut pos = start;
while pos < nodes.len() {
let remaining = &nodes[pos..];
if remaining.is_empty() {
break;
}
let cut_len = next_merge_cut(remaining);
pos += cut_len;
cuts.push(pos);
}
cuts
}
fn verify_stable_with_random_prefixes(nodes: &[Node], m: usize, rng: &mut SmallRng, num_prefixes: usize) -> bool {
for _ in 0..num_prefixes {
let prefix_len = rng.random_range(1..50);
let prefix = random_chunks(rng, prefix_len);
let mut combined = prefix;
combined.extend_from_slice(nodes);
let adjusted_m = prefix_len + m;
let cuts = compute_cuts_from(&combined, 0);
if !cuts.contains(&adjusted_m) {
return false;
}
}
true
}
fn verify_stable_end_with_random_suffixes(
nodes: &[Node],
m: usize,
rng: &mut SmallRng,
num_suffixes: usize,
) -> bool {
for _ in 0..num_suffixes {
let suffix_len = rng.random_range(1..50);
let suffix = random_chunks(rng, suffix_len);
let mut combined = nodes.to_vec();
combined.extend_from_slice(&suffix);
let cuts = compute_cuts_from(&combined, 0);
if !cuts.contains(&m) {
return false;
}
}
true
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_find_stable_start_with_random_prefixes() {
let mut rng = SmallRng::seed_from_u64(42);
let mut tested = 0;
for _ in 0..1000 {
let n = rng.random_range(15..100);
let nodes = random_chunks(&mut rng, n);
if let Some(stable) = find_stable_start(&nodes) {
tested += 1;
assert!(
verify_stable_with_random_prefixes(&nodes, stable, &mut rng, 500),
"find_stable_start returned {stable} for n={n}, but it is NOT stable under random prefixes"
);
}
}
assert!(tested > 100, "Too few sequences had stable points: {tested}");
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_stability_exhaustive_prefix_lengths() {
let mut rng = SmallRng::seed_from_u64(123);
for trial in 0..500 {
let n = rng.random_range(15..60);
let nodes = random_chunks(&mut rng, n);
if let Some(stable) = find_stable_start(&nodes) {
for prefix_len in 1..=(2 * MAX_GROUP_SIZE + 2) {
for _ in 0..20 {
let prefix = random_chunks(&mut rng, prefix_len);
let mut combined = prefix;
combined.extend_from_slice(&nodes);
let adjusted = prefix_len + stable;
let cuts = compute_cuts_from(&combined, 0);
assert!(
cuts.contains(&adjusted),
"Trial {trial}: stable={stable} not a cut with {prefix_len}-element prefix. \
n={n}, adjusted={adjusted}"
);
}
}
}
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_find_stable_end_with_random_suffixes() {
let mut rng = SmallRng::seed_from_u64(44);
let mut tested = 0;
for _ in 0..1000 {
let n = rng.random_range(15..100);
let nodes = random_chunks(&mut rng, n);
if let Some(stable_end) = find_stable_end(&nodes) {
tested += 1;
assert!(
verify_stable_end_with_random_suffixes(&nodes, stable_end, &mut rng, 500),
"find_stable_end returned {stable_end} for n={n}, but it is NOT stable under random suffixes"
);
}
}
assert!(tested > 100, "Too few sequences had stable end points: {tested}");
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_stable_end_exhaustive_suffix_lengths() {
let mut rng = SmallRng::seed_from_u64(125);
for trial in 0..500 {
let n = rng.random_range(15..60);
let nodes = random_chunks(&mut rng, n);
if let Some(stable_end) = find_stable_end(&nodes) {
for suffix_len in 1..=(2 * MAX_GROUP_SIZE + 2) {
for _ in 0..20 {
let suffix = random_chunks(&mut rng, suffix_len);
let mut combined = nodes.clone();
combined.extend_from_slice(&suffix);
let cuts = compute_cuts_from(&combined, 0);
assert!(
cuts.contains(&stable_end),
"Trial {trial}: stable_end={stable_end} not a cut with {suffix_len}-element suffix. n={n}"
);
}
}
}
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_stable_start_implies_correct_merge() {
let mut rng = SmallRng::seed_from_u64(555);
for _trial in 0..500 {
let n = rng.random_range(30..500);
let chunks = random_chunks(&mut rng, n);
let offset = rng.random_range(0..n.saturating_sub(20).max(1));
if let Some(s) = find_stable_start(&chunks[offset..]) {
let abs_stable = offset + s;
if abs_stable < n && abs_stable > 0 {
let expected = xorb_hash(&chunks);
let mut merged = MerkleHashSubtree::from_chunks(true, &chunks[..abs_stable], false);
let r2 = MerkleHashSubtree::from_chunks(false, &chunks[abs_stable..], true);
merged.merge_into(&r2).unwrap();
assert_eq!(merged.final_hash().unwrap(), expected);
}
}
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_stable_cut_found_in_long_sequences() {
let mut rng = SmallRng::seed_from_u64(777);
let mut found = 0;
let trials = 1000;
for _ in 0..trials {
let n = rng.random_range(30..100);
let nodes = random_chunks(&mut rng, n);
if find_stable_start(&nodes).is_some() {
found += 1;
}
}
let rate = found as f64 / trials as f64;
assert!(rate > 0.7, "Expected stable points in >70% of sequences of length 30-100, got {rate:.1}%");
}
#[test]
fn test_empty() {
let r = MerkleHashSubtree::from_chunks(true, &[], true);
assert_eq!(r.final_hash(), Some(MerkleHash::default()));
}
#[test]
fn test_single_chunk() {
let h = MerkleHash::random_from_seed(42);
let chunks = vec![(h, 1000u64)];
let r = MerkleHashSubtree::from_chunks(true, &chunks, true);
assert_eq!(r.final_hash(), Some(xorb_hash(&chunks)));
}
#[test]
fn test_small_full_range() {
let mut rng = SmallRng::seed_from_u64(12345);
for n in 2..=30 {
let chunks = random_chunks(&mut rng, n);
let expected = xorb_hash(&chunks);
let r = MerkleHashSubtree::from_chunks(true, &chunks, true);
assert_eq!(r.final_hash().unwrap(), expected, "Failed for n={n}");
}
}
#[test]
fn test_no_final_hash_without_boundaries() {
let chunks = vec![(MerkleHash::random_from_seed(1), 100)];
assert!(MerkleHashSubtree::from_chunks(false, &chunks, true).final_hash().is_none());
assert!(MerkleHashSubtree::from_chunks(true, &chunks, false).final_hash().is_none());
assert!(MerkleHashSubtree::from_chunks(false, &chunks, false).final_hash().is_none());
}
#[test]
fn test_two_way_merge_basic() {
let mut rng = SmallRng::seed_from_u64(99);
let chunks = random_chunks(&mut rng, 16);
let expected = xorb_hash(&chunks);
for split in 1..16 {
let mut merged = MerkleHashSubtree::from_chunks(true, &chunks[..split], false);
let r2 = MerkleHashSubtree::from_chunks(false, &chunks[split..], true);
merged.merge_into(&r2).unwrap();
assert_eq!(merged.final_hash().unwrap(), expected, "Failed split={split}");
}
}
#[test]
fn test_find_stable_start_basic() {
let mut rng = SmallRng::seed_from_u64(777);
let chunks = random_chunks(&mut rng, 200);
if let Some(stable) = find_stable_start(&chunks) {
assert!(stable >= MIN_GROUP_SIZE);
assert!(stable <= chunks.len());
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_two_way_merge_sweep_16() {
let mut rng = SmallRng::seed_from_u64(42);
for trial in 0..800 {
let chunks = random_chunks(&mut rng, 16);
let expected = xorb_hash(&chunks);
let split = rng.random_range(1..16);
let mut merged = MerkleHashSubtree::from_chunks(true, &chunks[..split], false);
let r2 = MerkleHashSubtree::from_chunks(false, &chunks[split..], true);
merged.merge_into(&r2).unwrap();
assert_eq!(merged.final_hash().unwrap(), expected, "Failed trial {trial}, split at {split}");
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_two_way_merge_scaling() {
let mut rng = SmallRng::seed_from_u64(123);
for n in (20..=200).step_by(4) {
for _ in 0..200 {
let chunks = random_chunks(&mut rng, n);
let expected = xorb_hash(&chunks);
let split = rng.random_range(1..n);
let mut merged = MerkleHashSubtree::from_chunks(true, &chunks[..split], false);
let r2 = MerkleHashSubtree::from_chunks(false, &chunks[split..], true);
merged.merge_into(&r2).unwrap();
assert_eq!(merged.final_hash().unwrap(), expected, "Failed n={n}, split={split}");
}
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_multi_way_merge() {
let mut rng = SmallRng::seed_from_u64(987);
for n in (16..=200).step_by(8) {
for _ in 0..100 {
let chunks = random_chunks(&mut rng, n);
let expected = xorb_hash(&chunks);
let num_splits = rng.random_range(2..=5usize.min(n - 1));
let mut split_points: Vec<usize> = (0..num_splits).map(|_| rng.random_range(1..n)).collect();
split_points.sort();
split_points.dedup();
if split_points.is_empty() {
split_points.push(n / 2);
}
let mut ranges = Vec::new();
let mut prev = 0;
for &sp in &split_points {
let is_start = prev == 0;
ranges.push(MerkleHashSubtree::from_chunks(is_start, &chunks[prev..sp], false));
prev = sp;
}
ranges.push(MerkleHashSubtree::from_chunks(false, &chunks[prev..], true));
let merged = MerkleHashSubtree::merge(&ranges).unwrap();
assert_eq!(merged.final_hash().unwrap(), expected, "Multi-way failed n={n}, splits={split_points:?}");
}
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_storage_is_log_n() {
let mut rng = SmallRng::seed_from_u64(456);
for n in [100, 500, 1000, 5000] {
let chunks = random_chunks(&mut rng, n);
let r = MerkleHashSubtree::from_chunks(false, &chunks, false);
let log_n = (n as f64).log2().ceil() as usize;
let max_expected = MAX_GROUP_SIZE * log_n * 3;
assert!(r.num_nodes() <= max_expected, "n={n}, nodes={}, max={max_expected}", r.num_nodes());
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_merge_with_existing_reference_hashes() {
fn rh(h: u64) -> MerkleHash {
if h == 0 {
[0; 4].into()
} else {
MerkleHash::random_from_seed(h)
}
}
let test_cases: Vec<Vec<u64>> = vec![
vec![1, 2, 3],
vec![1, 2, 1, 2, 3, 4],
(0..8).collect(),
(0..8).chain([1, 1, 1, 1]).collect(),
(0..8).flat_map(|h| [h, h]).collect(),
];
for seeds in &test_cases {
let chunks: Vec<Node> = seeds.iter().map(|&s| (rh(s), s * 100)).collect();
let expected = xorb_hash(&chunks);
for split in 1..chunks.len() {
let mut merged = MerkleHashSubtree::from_chunks(true, &chunks[..split], false);
let r2 = MerkleHashSubtree::from_chunks(false, &chunks[split..], true);
merged.merge_into(&r2).unwrap();
assert_eq!(merged.final_hash().unwrap(), expected, "Reference failed: seeds={seeds:?}, split={split}");
}
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_three_way_merge_all_splits() {
let mut rng = SmallRng::seed_from_u64(321);
for n in [8, 12, 16, 20, 24] {
let chunks = random_chunks(&mut rng, n);
let expected = xorb_hash(&chunks);
for s1 in 1..n - 1 {
for s2 in s1 + 1..n {
let r1 = MerkleHashSubtree::from_chunks(true, &chunks[..s1], false);
let r2 = MerkleHashSubtree::from_chunks(false, &chunks[s1..s2], false);
let r3 = MerkleHashSubtree::from_chunks(false, &chunks[s2..], true);
let merged = MerkleHashSubtree::merge(&[r1, r2, r3]).unwrap();
assert_eq!(merged.final_hash().unwrap(), expected, "Three-way failed: n={n}, s1={s1}, s2={s2}");
}
}
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_merge_preserves_log_storage() {
let mut rng = SmallRng::seed_from_u64(789);
for n in [100, 500, 1000] {
let chunks = random_chunks(&mut rng, n);
let split = n / 2;
let mut merged = MerkleHashSubtree::from_chunks(true, &chunks[..split], false);
let r2 = MerkleHashSubtree::from_chunks(false, &chunks[split..], true);
merged.merge_into(&r2).unwrap();
let log_n = (n as f64).log2().ceil() as usize;
let max_expected = MAX_GROUP_SIZE * log_n * 3;
assert!(
merged.num_nodes() <= max_expected,
"Merged n={n}: nodes={}, max={max_expected}",
merged.num_nodes()
);
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_hump_invariants() {
let mut rng = SmallRng::seed_from_u64(654);
for n in [20, 50, 100, 200] {
let chunks = random_chunks(&mut rng, n);
for &(at_start, at_end) in &[(true, true), (true, false), (false, true), (false, false)] {
let r = MerkleHashSubtree::from_chunks(at_start, &chunks, at_end);
if r.levels.is_empty() {
continue;
}
for level in 0..r.num_levels() {
let left = r.left_at(level);
let right = r.right_at(level);
assert!(
left.len() + right.len() <= n,
"Level {level} has too many nodes: left={}, right={}",
left.len(),
right.len()
);
}
}
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_merge_preserves_log_storage_multi() {
let mut rng = SmallRng::seed_from_u64(790);
for total in [200, 500, 1000, 2000] {
let chunks = random_chunks(&mut rng, total);
let expected = xorb_hash(&chunks);
let log_n = (total as f64).log2().ceil() as usize;
let max_expected = MAX_GROUP_SIZE * log_n * 3;
let chunk_size = rng.random_range(10..50);
let mut ranges: Vec<MerkleHashSubtree> = Vec::new();
let mut pos = 0;
while pos < total {
let end = (pos + chunk_size).min(total);
let is_start = pos == 0;
let is_end = end == total;
ranges.push(MerkleHashSubtree::from_chunks(is_start, &chunks[pos..end], is_end));
pos = end;
}
let mut merged = ranges[0].clone();
for range in &ranges[1..] {
merged.merge_into(range).unwrap();
assert!(
merged.num_nodes() <= max_expected,
"After merging, n={total}: nodes={}, max={max_expected}",
merged.num_nodes()
);
}
assert_eq!(merged.final_hash().unwrap(), expected);
}
}
#[test]
#[ignore]
fn test_storage_scaling_large() {
let mut rng = SmallRng::seed_from_u64(12321);
let batch_size = 1000;
let milestones: Vec<usize> = vec![1_000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000];
let max_milestone = *milestones.last().unwrap();
let mut accumulated = MerkleHashSubtree::from_chunks(true, &random_chunks(&mut rng, batch_size), false);
let mut total_chunks: usize = batch_size;
let mut milestone_idx = 0;
let mut results: Vec<(usize, usize, usize, usize)> = Vec::new();
let mut worst_since_last: usize = accumulated.num_nodes();
while total_chunks < max_milestone && milestone_idx < milestones.len() {
let batch = random_chunks(&mut rng, batch_size);
let batch_range = MerkleHashSubtree::from_chunks(false, &batch, false);
accumulated.merge_into(&batch_range).unwrap();
total_chunks += batch_size;
worst_since_last = worst_since_last.max(accumulated.num_nodes());
if total_chunks >= milestones[milestone_idx] {
results.push((total_chunks, accumulated.num_nodes(), accumulated.num_levels(), worst_since_last));
worst_since_last = 0;
milestone_idx += 1;
}
}
eprintln!("\n=== Storage scaling (batch_size={batch_size}, at_end=false throughout) ===");
eprintln!("{:>15} {:>10} {:>8} {:>12} {:>12}", "total_chunks", "nodes", "levels", "nodes/log2", "worst/log2");
for &(n, nodes, levels, worst) in &results {
let log2_n = (n as f64).log2();
eprintln!(
"{n:>15} {nodes:>10} {levels:>8} {:>12.2} {:>12.2}",
nodes as f64 / log2_n,
worst as f64 / log2_n,
);
}
for &(n, _nodes, _levels, worst) in &results {
let log2_n = (n as f64).log2();
let ratio = worst as f64 / log2_n;
let bound = (MAX_GROUP_SIZE as f64) * 4.0;
assert!(ratio < bound, "n={n}: worst_nodes={worst}, ratio={ratio:.1}, exceeds {bound:.0} * log2(n)",);
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_json_round_trip() {
let mut rng = SmallRng::seed_from_u64(42);
for n in [0, 1, 5, 20, 100] {
for &(at_start, at_end) in &[(true, true), (true, false), (false, true), (false, false)] {
let chunks = random_chunks(&mut rng, n);
let original = MerkleHashSubtree::from_chunks(at_start, &chunks, at_end);
let json = serde_json::to_string(&original).unwrap();
let deserialized: MerkleHashSubtree = serde_json::from_str(&json).unwrap();
assert_eq!(original.nodes, deserialized.nodes, "n={n}");
assert_eq!(original.levels, deserialized.levels, "n={n}");
assert_eq!(original.left_offsets, deserialized.left_offsets, "n={n}");
assert_eq!(original.right_offsets, deserialized.right_offsets, "n={n}");
assert_eq!(original.at_start, deserialized.at_start, "n={n}");
assert_eq!(original.at_end, deserialized.at_end, "n={n}");
}
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_json_format_has_hex_hashes() {
let h = MerkleHash::random_from_seed(42);
let chunks = vec![(h, 1000u64)];
let r = MerkleHashSubtree::from_chunks(true, &chunks, true);
let json = serde_json::to_string_pretty(&r).unwrap();
assert!(json.contains(&h.hex()), "JSON should contain hex hash string:\n{json}");
assert!(json.contains("\"hash\""), "JSON should have 'hash' field:\n{json}");
assert!(json.contains("\"size\""), "JSON should have 'size' field:\n{json}");
assert!(!json.contains("[0,"), "JSON should not contain raw u64 array for hash:\n{json}");
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_json_round_trip_preserves_merge_result() {
let mut rng = SmallRng::seed_from_u64(99);
let chunks = random_chunks(&mut rng, 50);
let expected = xorb_hash(&chunks);
let mut r1 = MerkleHashSubtree::from_chunks(true, &chunks[..20], false);
let r2 = MerkleHashSubtree::from_chunks(false, &chunks[20..], true);
r1.merge_into(&r2).unwrap();
let json = serde_json::to_string(&r1).unwrap();
let deserialized: MerkleHashSubtree = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.final_hash().unwrap(), expected);
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_postcard_round_trip() {
let mut rng = SmallRng::seed_from_u64(42);
for n in [0, 1, 5, 20, 100] {
for &(at_start, at_end) in &[(true, true), (true, false), (false, true), (false, false)] {
let chunks = random_chunks(&mut rng, n);
let original = MerkleHashSubtree::from_chunks(at_start, &chunks, at_end);
let bytes = postcard::to_allocvec(&original).unwrap();
let deserialized: MerkleHashSubtree = postcard::from_bytes(&bytes).unwrap();
assert_eq!(original.nodes, deserialized.nodes, "n={n}");
assert_eq!(original.levels, deserialized.levels, "n={n}");
assert_eq!(original.left_offsets, deserialized.left_offsets, "n={n}");
assert_eq!(original.right_offsets, deserialized.right_offsets, "n={n}");
assert_eq!(original.at_start, deserialized.at_start, "n={n}");
assert_eq!(original.at_end, deserialized.at_end, "n={n}");
}
}
}
#[test]
#[cfg_attr(feature = "smoke-test", ignore)]
fn test_postcard_smaller_than_json() {
let mut rng = SmallRng::seed_from_u64(77);
let chunks = random_chunks(&mut rng, 100);
let r = MerkleHashSubtree::from_chunks(true, &chunks, true);
let json_bytes = serde_json::to_string(&r).unwrap().len();
let bin_bytes = postcard::to_allocvec(&r).unwrap().len();
assert!(bin_bytes < json_bytes, "postcard ({bin_bytes}) should be smaller than JSON ({json_bytes})");
}
}