liteboxfs 0.2.0

A modern POSIX filesystem in a SQLite database
Documentation
use std::ops::RangeInclusive;

use crate::{
    block::{BlockList, FileOffset},
    cache::CachedBlock,
    chunker::{ChunkSlice, ChunkerGuard},
    settings::Settings,
};

#[derive(Clone, Copy, Default)]
struct RegularFilePos {
    /// The "logical" seek position, which is the seek position as exposed by `Seek::seek`.
    pub logical: FileOffset,
    /// The "physical" seek position, which is the position up to which data has been flushed to
    /// the database.
    pub physical: FileOffset,
}

/// Tracks ranges of logical blocks which are dirty due to writes or truncations.
///
/// We store this information in the database, but we also track it in memory to avoid an extra
/// database write on every write or truncate operation. This way, we can mark nodes in the merkle
/// tree as dirty only when the file is flushed, the file is dropped, or we need to compute a
/// content ID.
#[derive(Debug, Clone)]
pub struct MerkleDirtyRanges {
    logical_block_ranges: Vec<RangeInclusive<usize>>,
    logical_block_size: usize,
}

impl MerkleDirtyRanges {
    fn new(logical_block_size: usize) -> Self {
        Self {
            logical_block_ranges: Vec::new(),
            logical_block_size,
        }
    }

    /// Mark a range of logical blocks as dirty due to a write at `offset` of length `len`.
    pub fn mark_dirty_write(&mut self, offset: FileOffset, len: FileOffset) {
        if len == 0 {
            return;
        }

        let logical_block_size = self.logical_block_size as FileOffset;
        let first_logical_block = (offset / logical_block_size) as usize;
        let last_logical_block = ((offset + len - 1) / logical_block_size) as usize;

        self.logical_block_ranges
            .push(first_logical_block..=last_logical_block);
    }

    /// Mark a range of logical blocks as dirty due to a truncation from `old_len` to `new_len`.
    pub fn mark_dirty_truncate(&mut self, old_len: FileOffset, new_len: FileOffset) {
        if new_len >= old_len {
            return;
        }

        let logical_block_size = self.logical_block_size as FileOffset;
        let first_logical_block = (new_len / logical_block_size) as usize;
        let last_logical_block = ((old_len.saturating_sub(1)) / logical_block_size) as usize;

        if first_logical_block <= last_logical_block {
            self.logical_block_ranges
                .push(first_logical_block..=last_logical_block);
        }
    }

    /// Take the current set of dirty logical block ranges, collapsing overlapping and adjacent
    /// ranges.
    pub fn take_ranges_collapsed(&mut self) -> Vec<RangeInclusive<usize>> {
        let mut ranges = std::mem::take(&mut self.logical_block_ranges);
        ranges.sort_by_key(|r| *r.start());

        let mut collapsed_ranges = Vec::<RangeInclusive<usize>>::new();

        for range in ranges {
            if let Some(last_range) = collapsed_ranges.last_mut()
                && *last_range.end() + 1 >= *range.start()
            {
                // Ranges overlap or are adjacent, so merge them.
                *last_range = *last_range.start()..=*range.end().max(last_range.end());
                continue;
            }

            collapsed_ranges.push(range);
        }

        collapsed_ranges
    }
}

// A guard which rolls back state and invalidates caches if an error occurs. This ensures
// correctness in case an error interrupts the normal control flow. This is necessary because while
// the database savepoint ensures the database state is rolled back, we need the in-memory state to
// match.
pub struct RegularFileStateGuard {
    pos: RegularFilePos,
    chunker: ChunkerGuard,
    cached_block: Option<CachedBlock>,
    cached_block_list: Option<BlockList>,
    merkle_dirty_ranges: MerkleDirtyRanges,
}

impl RegularFileStateGuard {
    pub fn new(settings: &Settings) -> Self {
        Self {
            pos: Default::default(),
            chunker: ChunkerGuard::new(settings.chunker()),
            cached_block: None,
            cached_block_list: None,
            merkle_dirty_ranges: MerkleDirtyRanges::new(settings.logical_block_size),
        }
    }

    /// Run a closure with mutable access to the wrapped state, rolling it back if an error occurs.
    pub fn with_rewind<R, E, F>(&mut self, f: F) -> Result<R, E>
    where
        F: FnOnce(RegularFileStateContext) -> Result<R, E>,
    {
        let prev_pos = self.pos;
        let prev_merkle_dirty_ranges_index = self.merkle_dirty_ranges.logical_block_ranges.len();

        let result = self.chunker.with_rewind(|chunker| {
            let ctx = RegularFileStateContext {
                logical_pos: &mut self.pos.logical,
                physical_pos: &mut self.pos.physical,
                cached_block: &mut self.cached_block,
                cached_block_list: &mut self.cached_block_list,
                merkle_dirty_ranges: &mut self.merkle_dirty_ranges,
                chunker,
            };

            f(ctx)
        });

        match result {
            Ok(value) => Ok(value),
            Err(err) => {
                self.pos = prev_pos;

                // We don't need to clone this vector; because we're only appending, we can just
                // truncate it to implement the rollback behavior.
                self.merkle_dirty_ranges
                    .logical_block_ranges
                    .truncate(prev_merkle_dirty_ranges_index);

                // Rather than clone and roll these back, we just invalidate them. Cloning these
                // values would be expensive for the happy path.
                self.cached_block = None;
                self.cached_block_list = None;

                Err(err)
            }
        }
    }

    pub fn logical_pos(&self) -> FileOffset {
        self.pos.logical
    }

    pub fn physical_pos(&self) -> FileOffset {
        self.pos.physical
    }
}

pub struct RegularFileStateContext<'a> {
    pub logical_pos: &'a mut FileOffset,
    pub physical_pos: &'a mut FileOffset,
    pub cached_block: &'a mut Option<CachedBlock>,
    pub cached_block_list: &'a mut Option<BlockList>,
    pub merkle_dirty_ranges: &'a mut MerkleDirtyRanges,
    pub chunker: &'a mut dyn ChunkSlice,
}

pub enum FileState {
    Regular(RegularFileStateGuard),
    Special,
}