liteboxfs 0.2.0

A modern POSIX filesystem in a SQLite database
Documentation
use std::ops::RangeBounds;

use crate::hash::MerkleHash;

use super::{
    store::BlockStore,
    types::{
        BlockData, BlockLen, BlockList, BlockOffset, BlockSignature, FileId, FileOffset, HoleLen,
    },
};

/// The index of a merkle node within its level.
pub type MerkleNodeIndex = usize;

/// The level of a merkle node within the merkle tree.
pub type MerkleNodeLevel = u32;

/// The position of a merkle node within the merkle tree.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct MerkleNodePosition {
    pub file: FileId,
    pub level: MerkleNodeLevel,
    pub index: MerkleNodeIndex,
}

/// A merkle node in the merkle tree.
///
/// Nodes can be marked dirty. A dirty leaf node indicates the corresponding logical block has been
/// modified and the hash needs to be recomputed. A dirty internal node indicates that one or more
/// of its children are dirty, and so its hash needs to be recomputed.
#[derive(Debug, Clone)]
pub struct MerkleNode {
    pub pos: MerkleNodePosition,
    pub hash: MerkleHash,
    pub dirty: bool,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DirtyFilter {
    Dirty,
    #[allow(dead_code)]
    NotDirty,
    All,
}

/// An abstraction for storing and retrieving nodes in a merkle tree.
pub trait MerkleNodeStore {
    /// Get the root node in the merkle tree.
    ///
    /// The root node may be dirty; it is up to the caller to handle this.
    fn get_root(&mut self, file: FileId) -> crate::Result<Option<MerkleNode>>;

    /// Get the indices of dirty merkle nodes at the given level.
    fn node_indices(
        &mut self,
        file: FileId,
        level: u32,
        filter: DirtyFilter,
    ) -> crate::Result<Vec<usize>>;

    /// Get a range of merkle nodes at the given level.
    fn list_nodes<R>(
        &mut self,
        file: FileId,
        level: u32,
        indices: R,
    ) -> crate::Result<Vec<MerkleNode>>
    where
        R: RangeBounds<usize>;

    /// Store the given merkle node in the tree.
    ///
    /// This marks the new node as not dirty and all ancestor nodes as dirty.
    fn put_node(&mut self, pos: MerkleNodePosition, hash: MerkleHash) -> crate::Result<()>;

    /// Delete the merkle leaf node at the given position in the tree.
    ///
    /// This also deletes any ancestor nodes that become orphaned as a result.
    fn delete_node(&mut self, file: FileId, index: usize) -> crate::Result<()>;

    /// Mark merkle leaf nodes as dirty.
    ///
    /// This also marks all ancestor nodes as dirty. If a leaf node does not exist, this seeds it
    /// with an empty hash.
    fn mark_dirty<R>(&mut self, file: FileId, indices: R) -> crate::Result<()>
    where
        R: RangeBounds<usize>;
}

/// The index of a logical block within a file.
pub type LogicalBlockIndex = usize;

#[derive(Debug)]
pub struct LogicalBlockData(BlockData);

impl LogicalBlockData {
    pub fn new(data: BlockData) -> Self {
        Self(data)
    }
}

impl AsRef<BlockData> for LogicalBlockData {
    fn as_ref(&self) -> &BlockData {
        &self.0
    }
}

impl From<LogicalBlockData> for BlockData {
    fn from(value: LogicalBlockData) -> Self {
        value.0
    }
}

impl From<BlockData> for LogicalBlockData {
    fn from(value: BlockData) -> Self {
        Self(value)
    }
}

pub trait LogicalBlockStore {
    /// Get the logical block at the given index in the given file.
    ///
    /// This returns `None` if the index is beyond the end of the file.
    fn get_block_at_index(
        &mut self,
        index: LogicalBlockIndex,
        block_list: &BlockList,
    ) -> crate::Result<Option<LogicalBlockData>>;
}

#[derive(Debug)]
pub struct LogicalBlockStoreAdapter<Store> {
    store: Store,
    logical_block_size: BlockLen,
}

impl<Store> LogicalBlockStoreAdapter<Store> {
    pub fn new(store: Store, logical_block_size: BlockLen) -> Self {
        Self {
            store,
            logical_block_size,
        }
    }
}

impl<Store> LogicalBlockStore for LogicalBlockStoreAdapter<Store>
where
    Store: BlockStore,
{
    fn get_block_at_index(
        &mut self,
        index: LogicalBlockIndex,
        block_list: &BlockList,
    ) -> crate::Result<Option<LogicalBlockData>> {
        // Calculate the byte range for this logical block.
        let logical_start = (index * self.logical_block_size) as FileOffset;
        let logical_end = logical_start + self.logical_block_size as FileOffset;

        // Accumulate data from overlapping physical blocks.
        let mut current_offset: FileOffset = 0;
        let mut found_any = false;
        let mut contains_data = false;

        let mut result = Vec::with_capacity(self.logical_block_size);
        let mut block_buf = Vec::with_capacity(self.logical_block_size);

        for block in block_list.iter() {
            let physical_start = current_offset;
            let physical_end = current_offset + block.len() as FileOffset;

            // Skip blocks before the logical block range.
            if physical_end <= logical_start {
                current_offset = physical_end;
                continue;
            }

            // Stop if we've passed the logical block range.
            if physical_start >= logical_end {
                break;
            }

            found_any = true;

            // Calculate the overlap between the logical and physical blocks.
            let overlap_start = physical_start.max(logical_start);
            let overlap_end = physical_end.min(logical_end);
            let offset_in_block = (overlap_start - physical_start) as BlockOffset;
            let overlap_len = (overlap_end - overlap_start) as BlockLen;

            match &block.signature {
                BlockSignature::Data { .. } => {
                    contains_data = true;

                    block_buf.clear();
                    self.store.read_block(block.id, &mut block_buf).map(drop)?;

                    let slice = block_buf
                        .get(offset_in_block..offset_in_block + overlap_len)
                        .expect("Block slice out of bounds.");

                    result.extend_from_slice(slice);
                }
                BlockSignature::Hole { .. } => {
                    // Represent a hole as zeros in the logical block.
                    result.resize(result.len() + overlap_len, 0);
                }
            }

            current_offset = physical_end;
        }

        if !found_any {
            Ok(None)
        } else if !contains_data {
            // Entire logical block is a hole.
            Ok(Some(LogicalBlockData::new(BlockData::Hole {
                len: self.logical_block_size as HoleLen,
            })))
        } else {
            // This logical block goes past the end of the file. Pad with zeros.
            if result.len() < self.logical_block_size {
                result.resize(self.logical_block_size, 0);
            }

            Ok(Some(LogicalBlockData::new(BlockData::Data {
                bytes: result,
            })))
        }
    }
}

impl<Store> MerkleNodeStore for LogicalBlockStoreAdapter<Store>
where
    Store: MerkleNodeStore,
{
    fn get_root(&mut self, file: FileId) -> crate::Result<Option<MerkleNode>> {
        self.store.get_root(file)
    }

    fn node_indices(
        &mut self,
        file: FileId,
        level: u32,
        filter: DirtyFilter,
    ) -> crate::Result<Vec<usize>> {
        self.store.node_indices(file, level, filter)
    }

    fn list_nodes<R>(
        &mut self,
        file: FileId,
        level: u32,
        indices: R,
    ) -> crate::Result<Vec<MerkleNode>>
    where
        R: RangeBounds<usize>,
    {
        self.store.list_nodes(file, level, indices)
    }

    fn put_node(&mut self, pos: MerkleNodePosition, hash: MerkleHash) -> crate::Result<()> {
        self.store.put_node(pos, hash)
    }

    fn delete_node(&mut self, file: FileId, index: usize) -> crate::Result<()> {
        self.store.delete_node(file, index)
    }

    fn mark_dirty<R>(&mut self, file: FileId, indices: R) -> crate::Result<()>
    where
        R: RangeBounds<usize>,
    {
        self.store.mark_dirty(file, indices)
    }
}