use std::{collections::BTreeSet, iter};
use crate::{
block::{
BlockData, BlockList, DirtyFilter, FileId, LogicalBlockData, LogicalBlockStore,
MerkleNodePosition, MerkleNodeStore,
},
hash::{HoleMerkleCache, LogicalBlockHasher, MerkleHash, MerkleNodeHasher},
settings::Settings,
};
#[derive(Debug)]
pub struct MerkleStore<Store> {
store: Store,
merkle_node_hasher: MerkleNodeHasher,
logical_block_hasher: LogicalBlockHasher,
logical_block_size: usize,
merkle_branch_factor: u32,
hole_merkle_cache: HoleMerkleCache,
}
impl<Store> MerkleStore<Store> {
pub fn new(store: Store, settings: &Settings) -> Self {
Self {
store,
merkle_node_hasher: MerkleNodeHasher::new(settings.merkle_branch_factor),
logical_block_hasher: LogicalBlockHasher::new(settings.logical_block_size),
logical_block_size: settings.logical_block_size,
merkle_branch_factor: settings.merkle_branch_factor,
hole_merkle_cache: HoleMerkleCache::new(
settings.merkle_branch_factor,
settings.logical_block_size,
),
}
}
}
impl<Store> MerkleStore<Store>
where
Store: MerkleNodeStore + LogicalBlockStore,
{
pub fn compute_hash(
&mut self,
file: FileId,
block_list: &mut BlockList,
) -> crate::Result<MerkleHash> {
if block_list.is_empty() {
let empty_data = LogicalBlockData::new(BlockData::Data { bytes: vec![] });
return Ok(self.logical_block_hasher.hash(&empty_data).into());
}
if block_list.is_entirely_hole() {
let file_byte_len = block_list.file_len();
let num_logical_blocks = (file_byte_len - 1) / self.logical_block_size as u64 + 1;
return Ok(self.compute_pure_hole_root_hash(num_logical_blocks));
}
match self.store.get_root(file)? {
Some(root_node) if !root_node.dirty => Ok(root_node.hash),
Some(_) | None => {
let file_byte_len = block_list.file_len();
let num_logical_blocks = if file_byte_len == 0 {
0
} else {
((file_byte_len - 1) / self.logical_block_size as u64 + 1) as usize
};
let written_leaf_indices =
self.recompute_leaf_nodes(file, block_list, num_logical_blocks)?;
self.recompute_interior_nodes(file, num_logical_blocks, written_leaf_indices)?;
Ok(self
.store
.get_root(file)?
.expect("The root merkle node should have already been computed.")
.hash)
}
}
}
fn compute_pure_hole_root_hash(&mut self, num_leaves: u64) -> MerkleHash {
if num_leaves == 0 {
panic!("Empty file should be handled separately.");
}
if num_leaves == 1 {
return self.hole_merkle_cache.get_or_compute(
0,
&mut self.merkle_node_hasher,
&mut self.logical_block_hasher,
);
}
let branch_factor = self.merkle_branch_factor as u64;
let mut nodes_at_level = num_leaves;
let mut level: usize = 0;
let mut rightmost_is_nonstandard = false;
let mut rightmost_hash: Option<MerkleHash> = None;
while nodes_at_level > 1 {
let full_parents = nodes_at_level / branch_factor;
let remainder = nodes_at_level % branch_factor;
let standard_hash = self.hole_merkle_cache.get_or_compute(
level,
&mut self.merkle_node_hasher,
&mut self.logical_block_hasher,
);
let rightmost_parent_child_count = if remainder > 0 {
remainder
} else {
branch_factor
};
let new_rightmost_is_nonstandard =
rightmost_parent_child_count < branch_factor || rightmost_is_nonstandard;
if new_rightmost_is_nonstandard {
let children: Vec<MerkleHash> = if rightmost_is_nonstandard {
iter::repeat_n(
standard_hash.clone(),
(rightmost_parent_child_count - 1) as usize,
)
.chain(iter::once(rightmost_hash.take().unwrap()))
.collect()
} else {
iter::repeat_n(standard_hash.clone(), rightmost_parent_child_count as usize)
.collect()
};
rightmost_hash = Some(self.merkle_node_hasher.hash(children));
}
rightmost_is_nonstandard = new_rightmost_is_nonstandard;
nodes_at_level = full_parents + if remainder > 0 { 1 } else { 0 };
level += 1;
}
if rightmost_is_nonstandard {
rightmost_hash.unwrap()
} else {
self.hole_merkle_cache.get_or_compute(
level,
&mut self.merkle_node_hasher,
&mut self.logical_block_hasher,
)
}
}
fn recompute_interior_nodes(
&mut self,
file: FileId,
num_logical_blocks: usize,
written_child_indices: Vec<usize>,
) -> crate::Result<()> {
let branch_factor = self.merkle_branch_factor as usize;
let mut prev_written_indices = written_child_indices;
for level in 1u32.. {
let dirty_indices = self.store.node_indices(file, level, DirtyFilter::Dirty)?;
let needed_indices: BTreeSet<usize> = prev_written_indices
.iter()
.map(|&idx| idx / branch_factor)
.collect();
let all_indices = dirty_indices
.into_iter()
.collect::<BTreeSet<_>>()
.union(&needed_indices)
.copied()
.collect::<Vec<_>>();
if all_indices.is_empty() {
break;
}
let nodes_at_this_level =
Self::nodes_at_level(num_logical_blocks, level as usize, branch_factor);
let is_root_level = nodes_at_this_level <= 1;
let child_hole_hash = self.hole_merkle_cache.get_or_compute(
(level - 1) as usize,
&mut self.merkle_node_hasher,
&mut self.logical_block_hasher,
);
let mut written_at_this_level = Vec::new();
for dirty_node_index in all_indices {
let first_child_index = dirty_node_index * branch_factor;
let last_child_index = first_child_index + branch_factor;
let existing_children =
self.store
.list_nodes(file, level - 1, first_child_index..last_child_index)?;
let nodes_at_child_level =
Self::nodes_at_level(num_logical_blocks, (level - 1) as usize, branch_factor);
let actual_last_child = last_child_index.min(nodes_at_child_level);
let num_children = actual_last_child.saturating_sub(first_child_index);
let child_hashes: Vec<MerkleHash> = (first_child_index..actual_last_child)
.map(|child_idx| {
existing_children
.iter()
.find(|node| node.pos.index == child_idx)
.map(|node| node.hash.clone())
.unwrap_or_else(|| child_hole_hash.clone())
})
.collect();
if num_children == 0 {
continue;
}
let parent_hash = self.merkle_node_hasher.hash(child_hashes);
self.store.put_node(
MerkleNodePosition {
file,
level,
index: dirty_node_index,
},
parent_hash,
)?;
written_at_this_level.push(dirty_node_index);
}
if is_root_level {
break;
}
prev_written_indices = written_at_this_level;
}
Ok(())
}
fn nodes_at_level(num_leaves: usize, level: usize, branch_factor: usize) -> usize {
let mut nodes = num_leaves;
for _ in 0..level {
nodes = nodes.div_ceil(branch_factor);
}
nodes
}
fn recompute_leaf_nodes(
&mut self,
file: FileId,
block_list: &BlockList,
num_logical_blocks: usize,
) -> crate::Result<Vec<usize>> {
let all_indices = self.store.node_indices(file, 0, DirtyFilter::All)?;
let indices_to_compute = if all_indices.is_empty() {
block_list.data_indices(num_logical_blocks, self.logical_block_size)
} else {
self.store.node_indices(file, 0, DirtyFilter::Dirty)?
};
let mut written_indices = Vec::new();
for logical_block_index in indices_to_compute {
let logical_block_data = self
.store
.get_block_at_index(logical_block_index, block_list)?;
if let Some(data) = logical_block_data {
let leaf_hash = self.logical_block_hasher.hash(&data).into();
let pos = MerkleNodePosition {
file,
level: 0,
index: logical_block_index,
};
self.store.put_node(pos, leaf_hash)?;
written_indices.push(logical_block_index);
} else {
self.store.delete_node(file, logical_block_index)?;
}
}
Ok(written_indices)
}
}