use serde::{Deserialize, Serialize};
use super::error::BlobError;
#[cfg(test)]
use super::blob_ref::BLOB_CHUNK_SIZE_BYTES;
pub const DATAFORTS_BLOB_TREE_SUPPORTED: &str = "dataforts:blob-tree-supported";
#[derive(Default)]
pub enum TreeSupportProbe {
#[default]
AlwaysSupported,
ForceManifest,
Dynamic(Box<dyn Fn() -> bool + Send + Sync>),
}
impl TreeSupportProbe {
pub fn check(&self) -> bool {
match self {
TreeSupportProbe::AlwaysSupported => true,
TreeSupportProbe::ForceManifest => false,
TreeSupportProbe::Dynamic(f) => f(),
}
}
}
impl std::fmt::Debug for TreeSupportProbe {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TreeSupportProbe::AlwaysSupported => f.write_str("TreeSupportProbe::AlwaysSupported"),
TreeSupportProbe::ForceManifest => f.write_str("TreeSupportProbe::ForceManifest"),
TreeSupportProbe::Dynamic(_) => f.write_str("TreeSupportProbe::Dynamic(..)"),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ChunkingStrategy {
Fixed {
size: u32,
},
Cdc {
avg: u32,
min: u32,
max: u32,
},
}
impl Default for ChunkingStrategy {
fn default() -> Self {
Self::Fixed {
size: super::blob_ref::BLOB_CHUNK_SIZE_BYTES as u32,
}
}
}
pub const TREE_FANOUT: usize = 128;
pub const MAX_TREE_DEPTH: u8 = 4;
pub const TREE_THRESHOLD_BYTES: u64 = 32 * 1024 * 1024 * 1024;
pub const TREE_NODE_MAX_WIRE_BYTES: usize = 64 * 1024;
pub const TREE_LEAF_CHUNK_MAX_BYTES: u64 = 16 * 1024 * 1024;
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum ChunkRole {
#[default]
Data,
Parity {
stripe_index: u8,
},
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ChunkRefV3 {
pub hash: [u8; 32],
pub size: u32,
pub role: ChunkRole,
}
impl ChunkRefV3 {
pub fn data(hash: [u8; 32], size: u32) -> Self {
Self {
hash,
size,
role: ChunkRole::Data,
}
}
pub fn parity(hash: [u8; 32], size: u32, stripe_index: u8) -> Self {
Self {
hash,
size,
role: ChunkRole::Parity { stripe_index },
}
}
pub fn is_data(&self) -> bool {
matches!(self.role, ChunkRole::Data)
}
pub fn is_parity(&self) -> bool {
matches!(self.role, ChunkRole::Parity { .. })
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct StripeBlock {
pub encoding: super::blob_ref::Encoding,
pub chunks: Vec<ChunkRefV3>,
}
impl StripeBlock {
pub fn covered_bytes(&self) -> u64 {
self.chunks
.iter()
.filter(|c| c.is_data())
.map(|c| c.size as u64)
.sum()
}
pub fn validate(&self) -> Result<(), BlobError> {
let data_count = self.chunks.iter().filter(|c| c.is_data()).count();
let parity_count = self.chunks.iter().filter(|c| c.is_parity()).count();
match self.encoding {
super::blob_ref::Encoding::Replicated => {
if parity_count != 0 {
return Err(BlobError::Decode(format!(
"StripeBlock(Replicated) has {} parity chunks; expected 0",
parity_count
)));
}
if data_count == 0 {
return Err(BlobError::Decode(
"StripeBlock(Replicated) has no data chunks".to_owned(),
));
}
}
super::blob_ref::Encoding::ReedSolomon { k, m } => {
if data_count != k as usize || parity_count != m as usize {
return Err(BlobError::Decode(format!(
"StripeBlock(RS k={}, m={}) has {} data + {} parity; expected {} + {}",
k, m, data_count, parity_count, k, m
)));
}
for (i, c) in self.chunks.iter().enumerate() {
if i < k as usize && !c.is_data() {
return Err(BlobError::Decode(format!(
"StripeBlock(RS): chunk at position {} should be Data, got Parity",
i
)));
}
if i >= k as usize && !c.is_parity() {
return Err(BlobError::Decode(format!(
"StripeBlock(RS): chunk at position {} should be Parity, got Data",
i
)));
}
}
}
}
Ok(())
}
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum TreeNode {
Internal {
children: Vec<([u8; 32], u64)>,
},
Leaf {
chunks: Vec<ChunkRefV3>,
},
ErasureLeaf {
stripes: Vec<StripeBlock>,
},
}
impl TreeNode {
pub fn internal(children: Vec<([u8; 32], u64)>) -> Result<Self, BlobError> {
let node = TreeNode::Internal { children };
node.validate()?;
Ok(node)
}
pub fn leaf(chunks: Vec<ChunkRefV3>) -> Result<Self, BlobError> {
let node = TreeNode::Leaf { chunks };
node.validate()?;
Ok(node)
}
pub fn erasure_leaf(stripes: Vec<StripeBlock>) -> Result<Self, BlobError> {
let node = TreeNode::ErasureLeaf { stripes };
node.validate()?;
Ok(node)
}
pub fn encode(&self) -> Result<Vec<u8>, BlobError> {
self.validate()?;
let bytes = postcard::to_allocvec(self)
.map_err(|e| BlobError::Decode(format!("TreeNode encode failed: {}", e)))?;
if bytes.len() > TREE_NODE_MAX_WIRE_BYTES {
return Err(BlobError::Decode(format!(
"TreeNode encoded length {} exceeds cap {}",
bytes.len(),
TREE_NODE_MAX_WIRE_BYTES
)));
}
Ok(bytes)
}
pub fn decode(bytes: &[u8]) -> Result<Self, BlobError> {
if bytes.len() > TREE_NODE_MAX_WIRE_BYTES {
return Err(BlobError::Decode(format!(
"TreeNode wire length {} exceeds cap {}",
bytes.len(),
TREE_NODE_MAX_WIRE_BYTES
)));
}
let node: TreeNode = postcard::from_bytes(bytes)
.map_err(|e| BlobError::Decode(format!("TreeNode decode failed: {}", e)))?;
node.validate()?;
Ok(node)
}
pub fn validate(&self) -> Result<(), BlobError> {
match self {
TreeNode::Internal { children } => {
if children.is_empty() {
return Err(BlobError::Decode(
"TreeNode::Internal must have at least one child".to_owned(),
));
}
if children.len() > TREE_FANOUT {
return Err(BlobError::Decode(format!(
"TreeNode::Internal child count {} exceeds fanout cap {}",
children.len(),
TREE_FANOUT
)));
}
let mut sum: u64 = 0;
for (i, (_, sz)) in children.iter().enumerate() {
if *sz == 0 {
return Err(BlobError::Decode(format!(
"TreeNode::Internal child {} has zero subtree_size",
i
)));
}
sum = sum.checked_add(*sz).ok_or_else(|| {
BlobError::Decode(
"TreeNode::Internal subtree_size sum overflowed u64".to_owned(),
)
})?;
}
let _ = sum; }
TreeNode::ErasureLeaf { stripes } => {
if stripes.is_empty() {
return Err(BlobError::Decode(
"TreeNode::ErasureLeaf must have at least one stripe".to_owned(),
));
}
let mut chunk_total: usize = 0;
for (i, stripe) in stripes.iter().enumerate() {
stripe
.validate()
.map_err(|e| BlobError::Decode(format!("stripe {}: {}", i, e)))?;
chunk_total = chunk_total.saturating_add(stripe.chunks.len());
for (j, chunk) in stripe.chunks.iter().enumerate() {
if chunk.size == 0 {
return Err(BlobError::Decode(format!(
"TreeNode::ErasureLeaf stripe {} chunk {} has zero size",
i, j
)));
}
if (chunk.size as u64) > TREE_LEAF_CHUNK_MAX_BYTES {
return Err(BlobError::Decode(format!(
"TreeNode::ErasureLeaf stripe {} chunk {} size {} exceeds cap {}",
i, j, chunk.size, TREE_LEAF_CHUNK_MAX_BYTES
)));
}
}
}
if chunk_total > TREE_FANOUT {
return Err(BlobError::Decode(format!(
"TreeNode::ErasureLeaf total chunk count {} exceeds fanout cap {}",
chunk_total, TREE_FANOUT
)));
}
}
TreeNode::Leaf { chunks } => {
if chunks.is_empty() {
return Err(BlobError::Decode(
"TreeNode::Leaf must have at least one chunk".to_owned(),
));
}
if chunks.len() > TREE_FANOUT {
return Err(BlobError::Decode(format!(
"TreeNode::Leaf chunk count {} exceeds fanout cap {}",
chunks.len(),
TREE_FANOUT
)));
}
for (i, chunk) in chunks.iter().enumerate() {
if chunk.size == 0 {
return Err(BlobError::Decode(format!(
"TreeNode::Leaf chunk {} has zero size",
i
)));
}
if (chunk.size as u64) > TREE_LEAF_CHUNK_MAX_BYTES {
return Err(BlobError::Decode(format!(
"TreeNode::Leaf chunk {} size {} exceeds cap {}",
i, chunk.size, TREE_LEAF_CHUNK_MAX_BYTES
)));
}
}
}
}
Ok(())
}
pub fn is_internal(&self) -> bool {
matches!(self, TreeNode::Internal { .. })
}
pub fn is_leaf(&self) -> bool {
matches!(self, TreeNode::Leaf { .. })
}
pub fn covered_bytes(&self) -> u64 {
match self {
TreeNode::ErasureLeaf { stripes } => {
stripes.iter().map(|s| s.covered_bytes()).sum::<u64>()
}
TreeNode::Internal { children } => children.iter().map(|(_, sz)| *sz).sum::<u64>(),
TreeNode::Leaf { chunks } => chunks.iter().map(|c| c.size as u64).sum::<u64>(),
}
}
pub fn arity(&self) -> usize {
match self {
TreeNode::Internal { children } => children.len(),
TreeNode::Leaf { chunks } => chunks.len(),
TreeNode::ErasureLeaf { stripes } => stripes.iter().map(|s| s.chunks.len()).sum(),
}
}
pub fn is_erasure_leaf(&self) -> bool {
matches!(self, TreeNode::ErasureLeaf { .. })
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ClosedNode {
pub hash: [u8; 32],
pub bytes: Vec<u8>,
pub level: u8,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TreeBuildOutput {
pub root_hash: [u8; 32],
pub root_bytes: Vec<u8>,
pub root_depth: u8,
pub total_bytes: u64,
pub chunk_count: u64,
pub trailing_nodes: Vec<ClosedNode>,
}
#[derive(Debug)]
pub struct TreeBuilder {
leaf_chunks: Vec<ChunkRefV3>,
internals: Vec<Vec<([u8; 32], u64)>>,
total_bytes: u64,
chunk_count: u64,
}
impl TreeBuilder {
pub fn new() -> Self {
Self {
leaf_chunks: Vec::with_capacity(TREE_FANOUT),
internals: Vec::new(),
total_bytes: 0,
chunk_count: 0,
}
}
pub fn total_bytes(&self) -> u64 {
self.total_bytes
}
pub fn chunk_count(&self) -> u64 {
self.chunk_count
}
pub fn push_chunk(&mut self, chunk: ChunkRefV3) -> Result<Vec<ClosedNode>, BlobError> {
if chunk.size == 0 {
return Err(BlobError::Decode(
"TreeBuilder::push_chunk received zero-size chunk".to_owned(),
));
}
if (chunk.size as u64) > TREE_LEAF_CHUNK_MAX_BYTES {
return Err(BlobError::Decode(format!(
"TreeBuilder::push_chunk received oversize chunk: {} bytes (cap {})",
chunk.size, TREE_LEAF_CHUNK_MAX_BYTES
)));
}
self.total_bytes = self.total_bytes.saturating_add(chunk.size as u64);
self.chunk_count = self.chunk_count.saturating_add(1);
self.leaf_chunks.push(chunk);
if self.leaf_chunks.len() < TREE_FANOUT {
return Ok(Vec::new());
}
self.close_leaf_and_cascade()
}
pub fn push_prebuilt_leaf(
&mut self,
leaf_hash: [u8; 32],
leaf_bytes: Vec<u8>,
leaf_covered_bytes: u64,
synthetic_chunks: u64,
) -> Result<Vec<ClosedNode>, BlobError> {
self.total_bytes = self.total_bytes.saturating_add(leaf_covered_bytes);
self.chunk_count = self.chunk_count.saturating_add(synthetic_chunks);
let mut emitted = vec![ClosedNode {
hash: leaf_hash,
bytes: leaf_bytes,
level: 0,
}];
self.lift_into_internal(0, leaf_hash, leaf_covered_bytes, &mut emitted)?;
Ok(emitted)
}
fn close_leaf_and_cascade(&mut self) -> Result<Vec<ClosedNode>, BlobError> {
let leaf_chunks = std::mem::replace(&mut self.leaf_chunks, Vec::with_capacity(TREE_FANOUT));
if leaf_chunks.is_empty() {
return Ok(Vec::new());
}
let leaf = TreeNode::leaf(leaf_chunks)?;
let bytes = leaf.encode()?;
let hash: [u8; 32] = blake3::hash(&bytes).into();
let size = leaf.covered_bytes();
let mut emitted = vec![ClosedNode {
hash,
bytes,
level: 0,
}];
self.lift_into_internal(0, hash, size, &mut emitted)?;
Ok(emitted)
}
fn lift_into_internal(
&mut self,
level: usize,
hash: [u8; 32],
size: u64,
emitted: &mut Vec<ClosedNode>,
) -> Result<(), BlobError> {
while self.internals.len() <= level {
self.internals.push(Vec::with_capacity(TREE_FANOUT));
}
self.internals[level].push((hash, size));
if self.internals[level].len() < TREE_FANOUT {
return Ok(());
}
let entries =
std::mem::replace(&mut self.internals[level], Vec::with_capacity(TREE_FANOUT));
let node = TreeNode::internal(entries)?;
let bytes = node.encode()?;
let node_hash: [u8; 32] = blake3::hash(&bytes).into();
let node_size = node.covered_bytes();
let depth_for_emit = (level + 1) as u8;
if depth_for_emit > MAX_TREE_DEPTH {
return Err(BlobError::Decode(format!(
"TreeBuilder cascade exceeded MAX_TREE_DEPTH {}: \
internal node at depth {} would not fit",
MAX_TREE_DEPTH, depth_for_emit
)));
}
emitted.push(ClosedNode {
hash: node_hash,
bytes,
level: depth_for_emit,
});
self.lift_into_internal(level + 1, node_hash, node_size, emitted)
}
pub fn finalize(mut self) -> Result<TreeBuildOutput, BlobError> {
if self.chunk_count == 0 {
return Err(BlobError::Decode(
"TreeBuilder::finalize called with no chunks".to_owned(),
));
}
let mut trailing: Vec<ClosedNode> = Vec::new();
let mut current: Option<(
/*hash*/ [u8; 32],
/*size*/ u64,
/*bytes*/ Vec<u8>,
/*level*/ u8,
)> = None;
if !self.leaf_chunks.is_empty() {
let leaf = TreeNode::leaf(std::mem::take(&mut self.leaf_chunks))?;
let bytes = leaf.encode()?;
let hash: [u8; 32] = blake3::hash(&bytes).into();
let size = leaf.covered_bytes();
current = Some((hash, size, bytes, 0));
}
for level_idx in 0..self.internals.len() {
let mut entries = std::mem::take(&mut self.internals[level_idx]);
if let Some((hash, size, bytes, c_level)) = current.take() {
entries.push((hash, size));
trailing.push(ClosedNode {
hash,
bytes,
level: c_level,
});
}
if entries.is_empty() {
continue;
}
let node = TreeNode::internal(entries)?;
let bytes = node.encode()?;
let hash: [u8; 32] = blake3::hash(&bytes).into();
let size = node.covered_bytes();
let level = (level_idx + 1) as u8;
if level > MAX_TREE_DEPTH {
return Err(BlobError::Decode(format!(
"TreeBuilder::finalize produced internal node at depth {}, \
exceeding MAX_TREE_DEPTH {}",
level, MAX_TREE_DEPTH
)));
}
current = Some((hash, size, bytes, level));
}
let (mut root_hash, mut root_total, mut root_bytes, mut root_level) =
current.ok_or_else(|| {
BlobError::Decode(
"TreeBuilder::finalize internal error — non-empty input produced no root"
.to_owned(),
)
})?;
loop {
let decoded = TreeNode::decode(&root_bytes)?;
let TreeNode::Internal { children } = &decoded else {
break; };
if children.len() != 1 {
break; }
let (child_hash, child_size) = children[0];
let Some(pos) = trailing.iter().position(|n| n.hash == child_hash) else {
break; };
let child_node = trailing.remove(pos);
root_hash = child_hash;
root_total = child_size;
root_bytes = child_node.bytes;
root_level = child_node.level;
}
if !root_bytes.is_empty() {
if let Ok(TreeNode::Internal { children }) = TreeNode::decode(&root_bytes) {
if children.len() == 1 && root_level > 0 {
let (child_hash, child_size) = children[0];
if !trailing.iter().any(|n| n.hash == child_hash) {
root_hash = child_hash;
root_total = child_size;
root_bytes = Vec::new();
root_level -= 1;
}
}
}
}
let root_depth = (root_level as u16 + 1) as u8;
if root_depth > MAX_TREE_DEPTH {
return Err(BlobError::Decode(format!(
"TreeBuilder::finalize produced root at depth {}, \
exceeding MAX_TREE_DEPTH {}",
root_depth, MAX_TREE_DEPTH
)));
}
if root_total != self.total_bytes {
return Err(BlobError::Decode(format!(
"TreeBuilder::finalize root covers {} bytes but builder accepted {}",
root_total, self.total_bytes
)));
}
Ok(TreeBuildOutput {
root_hash,
root_bytes,
root_depth,
total_bytes: self.total_bytes,
chunk_count: self.chunk_count,
trailing_nodes: trailing,
})
}
}
impl Default for TreeBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn h(byte: u8) -> [u8; 32] {
[byte; 32]
}
#[test]
fn chunk_ref_data_helper_sets_role() {
let c = ChunkRefV3::data(h(0x11), 4 * 1024 * 1024);
assert!(c.is_data());
assert!(!c.is_parity());
assert_eq!(c.role, ChunkRole::Data);
}
#[test]
fn chunk_ref_parity_helper_sets_role_with_stripe_index() {
let c = ChunkRefV3::parity(h(0x22), 4 * 1024 * 1024, 3);
assert!(!c.is_data());
assert!(c.is_parity());
match c.role {
ChunkRole::Parity { stripe_index } => assert_eq!(stripe_index, 3),
ChunkRole::Data => panic!("expected Parity role"),
}
}
#[test]
fn chunk_role_default_is_data() {
assert_eq!(ChunkRole::default(), ChunkRole::Data);
}
#[test]
fn internal_requires_at_least_one_child() {
let err = TreeNode::internal(Vec::new()).unwrap_err();
assert!(matches!(err, BlobError::Decode(_)));
}
#[test]
fn internal_rejects_zero_subtree_size() {
let err = TreeNode::internal(vec![(h(0x01), 0)]).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("zero subtree_size"), "got: {msg}");
}
#[test]
fn internal_rejects_overflowing_subtree_sum() {
let err = TreeNode::internal(vec![(h(0x01), u64::MAX), (h(0x02), 1)]).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("overflowed"), "got: {msg}");
}
#[test]
fn internal_rejects_over_fanout_cap() {
let too_many: Vec<_> = (0..(TREE_FANOUT + 1) as u8)
.map(|i| (h(i), 1024u64))
.collect();
let err = TreeNode::internal(too_many).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("exceeds fanout cap"), "got: {msg}");
}
#[test]
fn internal_accepts_at_fanout_cap() {
let exactly_cap: Vec<_> = (0..TREE_FANOUT as u8)
.map(|i| (h(i), 4 * 1024 * 1024u64))
.collect();
let node = TreeNode::internal(exactly_cap).expect("at-cap construction succeeds");
assert_eq!(node.arity(), TREE_FANOUT);
}
#[test]
fn leaf_requires_at_least_one_chunk() {
let err = TreeNode::leaf(Vec::new()).unwrap_err();
assert!(matches!(err, BlobError::Decode(_)));
}
#[test]
fn leaf_rejects_zero_size_chunk() {
let err = TreeNode::leaf(vec![ChunkRefV3::data(h(0x33), 0)]).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("zero size"), "got: {msg}");
}
#[test]
fn leaf_rejects_oversize_chunk() {
let over = (TREE_LEAF_CHUNK_MAX_BYTES + 1) as u32;
let err = TreeNode::leaf(vec![ChunkRefV3::data(h(0x44), over)]).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("exceeds cap"), "got: {msg}");
}
#[test]
fn leaf_accepts_fixed_chunk_size() {
let node = TreeNode::leaf(vec![ChunkRefV3::data(
h(0x55),
BLOB_CHUNK_SIZE_BYTES as u32,
)])
.expect("fixed-size chunk is valid");
assert_eq!(node.arity(), 1);
assert_eq!(node.covered_bytes(), BLOB_CHUNK_SIZE_BYTES);
}
#[test]
fn leaf_accepts_at_cdc_max_chunk_size() {
let max = TREE_LEAF_CHUNK_MAX_BYTES as u32;
let node = TreeNode::leaf(vec![ChunkRefV3::data(h(0x66), max)])
.expect("at-cap chunk is valid (CDC max boundary)");
assert_eq!(node.covered_bytes(), TREE_LEAF_CHUNK_MAX_BYTES);
}
#[test]
fn internal_round_trips_through_wire() {
let children = vec![
(h(0x01), 4 * 1024 * 1024u64),
(h(0x02), 4 * 1024 * 1024),
(h(0x03), 1024 * 1024),
];
let node = TreeNode::internal(children.clone()).unwrap();
let bytes = node.encode().unwrap();
let decoded = TreeNode::decode(&bytes).unwrap();
assert_eq!(node, decoded);
match decoded {
TreeNode::Internal { children: c } => assert_eq!(c, children),
TreeNode::Leaf { .. } => panic!("expected Internal"),
TreeNode::ErasureLeaf { .. } => panic!("expected Internal"),
}
}
#[test]
fn leaf_round_trips_data_chunks_through_wire() {
let chunks = vec![
ChunkRefV3::data(h(0x10), 4 * 1024 * 1024),
ChunkRefV3::data(h(0x11), 4 * 1024 * 1024),
ChunkRefV3::data(h(0x12), 1024),
];
let node = TreeNode::leaf(chunks.clone()).unwrap();
let bytes = node.encode().unwrap();
let decoded = TreeNode::decode(&bytes).unwrap();
assert_eq!(node, decoded);
if let TreeNode::Leaf { chunks: c } = decoded {
assert_eq!(c, chunks);
assert!(c.iter().all(ChunkRefV3::is_data));
} else {
panic!("expected Leaf");
}
}
#[test]
fn leaf_round_trips_mixed_data_parity_chunks() {
let chunks = vec![
ChunkRefV3::data(h(0x20), 4 * 1024 * 1024),
ChunkRefV3::data(h(0x21), 4 * 1024 * 1024),
ChunkRefV3::parity(h(0x22), 4 * 1024 * 1024, 0),
ChunkRefV3::parity(h(0x23), 4 * 1024 * 1024, 0),
];
let node = TreeNode::leaf(chunks.clone()).unwrap();
let bytes = node.encode().unwrap();
let decoded = TreeNode::decode(&bytes).unwrap();
assert_eq!(node, decoded);
if let TreeNode::Leaf { chunks: c } = decoded {
assert_eq!(c.iter().filter(|x| x.is_data()).count(), 2);
assert_eq!(c.iter().filter(|x| x.is_parity()).count(), 2);
} else {
panic!("expected Leaf");
}
}
#[test]
fn decode_rejects_oversize_wire_bytes() {
let bytes = vec![0u8; TREE_NODE_MAX_WIRE_BYTES + 1];
let err = TreeNode::decode(&bytes).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("exceeds cap"), "got: {msg}");
}
#[test]
fn encode_cap_is_reachable_only_under_pathological_inputs() {
let children: Vec<_> = (0..TREE_FANOUT as u8).map(|i| (h(i), 1u64 << 32)).collect();
let node = TreeNode::internal(children).unwrap();
let bytes = node.encode().unwrap();
assert!(
bytes.len() < TREE_NODE_MAX_WIRE_BYTES,
"max-fanout internal node should fit comfortably; got {} bytes",
bytes.len()
);
}
#[test]
fn decode_rejects_malformed_postcard_bytes() {
let garbage = vec![0xFFu8; 32];
let err = TreeNode::decode(&garbage).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("decode failed"), "got: {msg}");
}
#[test]
fn decode_re_validates_invariants() {
let bad = TreeNode::Internal {
children: Vec::new(),
};
let bytes = postcard::to_allocvec(&bad).unwrap();
let err = TreeNode::decode(&bytes).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("must have at least one child"), "got: {msg}");
}
#[test]
fn covered_bytes_internal_sums_subtree_sizes() {
let node = TreeNode::internal(vec![(h(0x01), 100), (h(0x02), 200), (h(0x03), 50)]).unwrap();
assert_eq!(node.covered_bytes(), 350);
assert_eq!(node.arity(), 3);
assert!(node.is_internal());
assert!(!node.is_leaf());
}
#[test]
fn covered_bytes_leaf_sums_chunk_sizes() {
let node = TreeNode::leaf(vec![
ChunkRefV3::data(h(0x10), 1000),
ChunkRefV3::data(h(0x11), 2000),
])
.unwrap();
assert_eq!(node.covered_bytes(), 3000);
assert_eq!(node.arity(), 2);
assert!(node.is_leaf());
assert!(!node.is_internal());
}
#[test]
fn fanout_and_depth_yield_expected_max_address() {
let leaf_bytes = TREE_FANOUT as u128 * BLOB_CHUNK_SIZE_BYTES as u128;
let depth_1 = leaf_bytes * TREE_FANOUT as u128;
let depth_2 = depth_1 * TREE_FANOUT as u128;
let depth_3 = depth_2 * TREE_FANOUT as u128;
let depth_4 = depth_3 * TREE_FANOUT as u128;
let pib_128 = 128u128 * (1u128 << 50);
assert_eq!(
depth_4, pib_128,
"fanout {} + depth {} + chunk {} should address 128 PiB",
TREE_FANOUT, MAX_TREE_DEPTH, BLOB_CHUNK_SIZE_BYTES
);
assert_eq!(leaf_bytes, 512u128 * 1024 * 1024, "leaf addresses 512 MiB");
assert_eq!(
depth_1,
64u128 * 1024 * 1024 * 1024,
"depth-1 addresses 64 GiB"
);
assert_eq!(depth_2, 8u128 * (1u128 << 40), "depth-2 addresses 8 TiB");
assert_eq!(depth_3, 1u128 << 50, "depth-3 addresses 1 PiB");
}
#[test]
fn threshold_matches_documented_break_even() {
assert_eq!(TREE_THRESHOLD_BYTES, 32u64 * 1024 * 1024 * 1024);
}
#[test]
fn max_tree_depth_pinned_at_four() {
assert_eq!(MAX_TREE_DEPTH, 4);
}
fn n_chunks(n: usize) -> Vec<ChunkRefV3> {
(0..n)
.map(|i| {
let mut hash = [0u8; 32];
hash[0] = (i & 0xFF) as u8;
hash[1] = ((i >> 8) & 0xFF) as u8;
hash[2] = ((i >> 16) & 0xFF) as u8;
ChunkRefV3::data(hash, BLOB_CHUNK_SIZE_BYTES as u32)
})
.collect()
}
#[test]
fn builder_empty_finalize_errors() {
let b = TreeBuilder::new();
let err = b.finalize().unwrap_err();
let msg = err.to_string();
assert!(msg.contains("no chunks"), "got: {msg}");
}
#[test]
fn builder_rejects_zero_size_chunk() {
let mut b = TreeBuilder::new();
let err = b.push_chunk(ChunkRefV3::data([0u8; 32], 0)).unwrap_err();
assert!(err.to_string().contains("zero-size"), "got: {err}");
}
#[test]
fn builder_rejects_oversize_chunk() {
let mut b = TreeBuilder::new();
let err = b
.push_chunk(ChunkRefV3::data(
[0u8; 32],
(TREE_LEAF_CHUNK_MAX_BYTES + 1) as u32,
))
.unwrap_err();
assert!(err.to_string().contains("oversize"), "got: {err}");
}
#[test]
fn builder_single_chunk_root_is_leaf() {
let mut b = TreeBuilder::new();
let emitted = b
.push_chunk(n_chunks(1).into_iter().next().unwrap())
.unwrap();
assert!(
emitted.is_empty(),
"single push below fanout emits no closed nodes"
);
let out = b.finalize().unwrap();
assert_eq!(out.root_depth, 1);
assert_eq!(out.total_bytes, BLOB_CHUNK_SIZE_BYTES);
assert_eq!(out.chunk_count, 1);
assert!(out.trailing_nodes.is_empty());
let root = TreeNode::decode(&out.root_bytes).unwrap();
assert!(root.is_leaf());
assert_eq!(root.arity(), 1);
let computed: [u8; 32] = blake3::hash(&out.root_bytes).into();
assert_eq!(computed, out.root_hash);
}
#[test]
fn builder_one_full_leaf_peels_to_streamed_leaf_root() {
let mut b = TreeBuilder::new();
let mut mid_closed = Vec::new();
for c in n_chunks(TREE_FANOUT) {
mid_closed.extend(b.push_chunk(c).unwrap());
}
assert_eq!(mid_closed.len(), 1);
assert_eq!(mid_closed[0].level, 0);
let leaf_hash = mid_closed[0].hash;
let out = b.finalize().unwrap();
assert_eq!(
out.root_depth, 1,
"FANOUT-chunk input peels to depth=1 (the streamed leaf \
becomes the root); pre-fix this was depth=2 with a 1-child \
internal wrapping the streamed leaf"
);
assert!(
out.root_bytes.is_empty(),
"streamed peel emits empty root_bytes as skip-persist signal"
);
assert_eq!(
out.root_hash, leaf_hash,
"root_hash promotes to the streamed leaf's hash"
);
assert_eq!(out.total_bytes, BLOB_CHUNK_SIZE_BYTES * TREE_FANOUT as u64);
assert_eq!(out.chunk_count, TREE_FANOUT as u64);
}
#[test]
fn builder_peels_partial_leaf_when_child_in_trailing() {
let count = TREE_FANOUT / 2;
let mut b = TreeBuilder::new();
for c in n_chunks(count) {
let emitted = b.push_chunk(c).unwrap();
assert!(emitted.is_empty(), "no cascade below fanout");
}
let out = b.finalize().unwrap();
assert_eq!(out.root_depth, 1);
assert!(out.trailing_nodes.is_empty());
let root = TreeNode::decode(&out.root_bytes).unwrap();
assert!(root.is_leaf());
assert_eq!(root.arity(), count);
}
#[test]
fn builder_two_leaves_yields_depth_two() {
let mut b = TreeBuilder::new();
let mut closed = Vec::new();
for c in n_chunks(TREE_FANOUT * 2) {
closed.extend(b.push_chunk(c).unwrap());
}
let leaf_closes = closed.iter().filter(|n| n.level == 0).count();
assert_eq!(leaf_closes, 2, "two full leaves close during streaming");
let out = b.finalize().unwrap();
assert_eq!(out.root_depth, 2);
assert_eq!(
out.total_bytes,
BLOB_CHUNK_SIZE_BYTES * (TREE_FANOUT * 2) as u64
);
let root = TreeNode::decode(&out.root_bytes).unwrap();
assert!(root.is_internal());
assert_eq!(root.arity(), 2);
}
#[test]
fn builder_fanout_plus_one_yields_depth_two_with_partial_leaf() {
let mut b = TreeBuilder::new();
for c in n_chunks(TREE_FANOUT + 1) {
let _ = b.push_chunk(c).unwrap();
}
let out = b.finalize().unwrap();
assert_eq!(out.root_depth, 2);
let root = TreeNode::decode(&out.root_bytes).unwrap();
assert!(root.is_internal());
assert_eq!(
root.arity(),
2,
"first leaf full, second leaf has one chunk"
);
}
#[test]
fn builder_fanout_squared_clean_boundary_peels_streamed_child_to_depth_two() {
let chunk_count = TREE_FANOUT * TREE_FANOUT;
let mut b = TreeBuilder::new();
for i in 0..chunk_count {
let mut hash = [0u8; 32];
hash[0] = (i & 0xFF) as u8;
hash[1] = ((i >> 8) & 0xFF) as u8;
hash[2] = ((i >> 16) & 0xFF) as u8;
let _ = b.push_chunk(ChunkRefV3::data(hash, 1024)).unwrap();
}
let out = b.finalize().unwrap();
assert_eq!(
out.root_depth, 2,
"FANOUT² clean-boundary tree must peel to depth=2 \
(one level-2 internal wrapping FANOUT level-1 internals)"
);
assert_eq!(out.chunk_count, chunk_count as u64);
assert!(
out.root_bytes.is_empty(),
"streamed peel emits empty root_bytes as skip-persist signal"
);
}
#[test]
fn builder_fanout_squared_plus_one_produces_depth_three_with_known_wart() {
let chunk_count = TREE_FANOUT * TREE_FANOUT + 1;
let mut b = TreeBuilder::new();
for i in 0..chunk_count {
let mut hash = [0u8; 32];
hash[0] = (i & 0xFF) as u8;
hash[1] = ((i >> 8) & 0xFF) as u8;
hash[2] = ((i >> 16) & 0xFF) as u8;
let _ = b.push_chunk(ChunkRefV3::data(hash, 1024)).unwrap();
}
let out = b.finalize().unwrap();
assert_eq!(out.root_depth, 3);
assert_eq!(out.chunk_count, chunk_count as u64);
let root = TreeNode::decode(&out.root_bytes).unwrap();
assert!(root.is_internal());
assert_eq!(root.arity(), 2);
}
#[test]
fn builder_is_deterministic_across_runs() {
let chunks = n_chunks(TREE_FANOUT * 3 + 17);
let mut b1 = TreeBuilder::new();
let mut b2 = TreeBuilder::new();
for c in &chunks {
let _ = b1.push_chunk(*c).unwrap();
let _ = b2.push_chunk(*c).unwrap();
}
let o1 = b1.finalize().unwrap();
let o2 = b2.finalize().unwrap();
assert_eq!(o1.root_hash, o2.root_hash);
assert_eq!(o1.root_depth, o2.root_depth);
assert_eq!(o1.total_bytes, o2.total_bytes);
assert_eq!(o1.chunk_count, o2.chunk_count);
}
#[test]
fn builder_emits_hashes_that_match_blake3_of_bytes() {
let mut b = TreeBuilder::new();
let mut all_closed: Vec<ClosedNode> = Vec::new();
for c in n_chunks(TREE_FANOUT * 2 + 5) {
all_closed.extend(b.push_chunk(c).unwrap());
}
let out = b.finalize().unwrap();
all_closed.extend(out.trailing_nodes.clone());
all_closed.push(ClosedNode {
hash: out.root_hash,
bytes: out.root_bytes.clone(),
level: out.root_depth - 1,
});
for node in &all_closed {
let computed: [u8; 32] = blake3::hash(&node.bytes).into();
assert_eq!(
computed, node.hash,
"ClosedNode at level {} has hash that doesn't match blake3(bytes)",
node.level
);
}
}
#[test]
fn builder_root_covers_all_accumulated_bytes() {
let chunks = n_chunks(TREE_FANOUT * 4 + 33);
let total: u64 = chunks.iter().map(|c| c.size as u64).sum();
let mut b = TreeBuilder::new();
for c in &chunks {
let _ = b.push_chunk(*c).unwrap();
}
let out = b.finalize().unwrap();
let root = TreeNode::decode(&out.root_bytes).unwrap();
assert_eq!(root.covered_bytes(), total);
assert_eq!(out.total_bytes, total);
}
#[test]
fn builder_memory_is_bounded_independent_of_input_size() {
let mut b = TreeBuilder::new();
let mut total_closed = 0;
for i in 0..(100 * TREE_FANOUT) {
let mut hash = [0u8; 32];
hash[0] = (i & 0xFF) as u8;
hash[1] = ((i >> 8) & 0xFF) as u8;
total_closed += b.push_chunk(ChunkRefV3::data(hash, 1024)).unwrap().len();
}
let total_open_entries: usize =
b.leaf_chunks.len() + b.internals.iter().map(|v| v.len()).sum::<usize>();
assert!(
total_open_entries <= TREE_FANOUT * (MAX_TREE_DEPTH as usize),
"builder has {} open entries; expected <= {} (fanout × MAX_DEPTH)",
total_open_entries,
TREE_FANOUT * (MAX_TREE_DEPTH as usize)
);
assert!(total_closed > 0, "some cascades should have emitted nodes");
let _ = b.finalize().unwrap();
}
#[test]
fn builder_root_independent_of_push_batching() {
let chunks = n_chunks(TREE_FANOUT + 50);
let mut b1 = TreeBuilder::new();
for c in &chunks {
let _ = b1.push_chunk(*c).unwrap();
}
let o1 = b1.finalize().unwrap();
let mut b2 = TreeBuilder::new();
for c in &chunks {
let _ = b2.push_chunk(*c).unwrap();
}
let o2 = b2.finalize().unwrap();
assert_eq!(o1.root_hash, o2.root_hash);
}
}