lucisearch 0.8.0

Embeddable, in-process search engine — the SQLite/DuckDB of Elasticsearch
Documentation
use crate::core::{LuciError, Result};

use crate::storage::block::{BlockId, Extent};

/// Block allocator with extent-based free-list tracking.
///
/// Manages space within a `.luci` file as fixed 256 KB blocks. The header
/// occupies the first 4 KB of the file; blocks start immediately after.
/// Allocation searches the free list first (first-fit), then extends the file
/// if no suitable free extent exists. Deallocation returns extents to the free
/// list and coalesces adjacent entries.
///
/// See [[architecture-storage-format#Block Allocator]].
#[derive(Debug)]
pub struct BlockAllocator {
    /// Total number of data blocks the file currently spans.
    /// The header is separate and not counted here.
    total_blocks: u64,
    /// Free extents, kept sorted by start block for efficient coalescing.
    free_list: Vec<Extent>,
}

impl BlockAllocator {
    /// Create an allocator for a new, empty file.
    ///
    /// A fresh file has only the 4 KB header — no data blocks yet.
    /// All allocations extend the file.
    pub fn new() -> Self {
        Self {
            total_blocks: 0,
            free_list: Vec::new(),
        }
    }

    /// Reconstruct an allocator from persisted state.
    ///
    /// Called during recovery: the metadata root block stores `total_blocks`
    /// and the serialized free list.
    pub fn from_state(total_blocks: u64, free_list: Vec<Extent>) -> Self {
        let mut alloc = Self {
            total_blocks,
            free_list,
        };
        alloc.free_list.sort_by_key(|e| e.start);
        alloc
    }

    /// Allocate `count` contiguous blocks.
    ///
    /// Strategy: first-fit scan of the free list. If no free extent is large
    /// enough, extends the file by appending blocks at the end.
    ///
    /// # Errors
    ///
    /// Returns `LuciError::InvalidQuery` if `count` is zero.
    pub fn allocate(&mut self, count: u32) -> Result<Extent> {
        if count == 0 {
            return Err(LuciError::InvalidQuery(
                "cannot allocate zero blocks".into(),
            ));
        }

        // First-fit scan of free list.
        if let Some(idx) = self.free_list.iter().position(|e| e.count >= count) {
            let extent = self.free_list[idx];
            if extent.count == count {
                // Exact fit — remove from free list.
                self.free_list.remove(idx);
            } else {
                // Split: return the first `count` blocks, keep the remainder.
                self.free_list[idx] =
                    Extent::new(BlockId(extent.start.0 + count as u64), extent.count - count);
            }
            return Ok(Extent::new(extent.start, count));
        }

        // No suitable free extent — extend the file.
        let start = BlockId(self.total_blocks);
        self.total_blocks += count as u64;
        Ok(Extent::new(start, count))
    }

    /// Return an extent to the free list.
    ///
    /// The freed blocks become available for future allocations. Adjacent
    /// free extents are coalesced to reduce fragmentation.
    ///
    /// # Panics
    ///
    /// Debug-asserts that the extent falls within `[0, total_blocks)` and
    /// does not overlap any existing free extent.
    pub fn free(&mut self, extent: Extent) {
        debug_assert!(
            extent.end().0 <= self.total_blocks,
            "extent {extent} exceeds file bounds (total_blocks = {})",
            self.total_blocks
        );

        // Insert in sorted order.
        let pos = self
            .free_list
            .binary_search_by_key(&extent.start, |e| e.start)
            .unwrap_or_else(|i| i);

        debug_assert!(
            !self.overlaps_free_list(&extent),
            "double-free: {extent} overlaps an existing free extent"
        );

        self.free_list.insert(pos, extent);
        self.coalesce_at(pos);
    }

    /// Total number of data blocks the file spans (excludes the header).
    pub fn total_blocks(&self) -> u64 {
        self.total_blocks
    }

    /// Total number of free (reusable) blocks.
    pub fn free_block_count(&self) -> u64 {
        self.free_list.iter().map(|e| e.count as u64).sum()
    }

    /// Snapshot of the current free list (for persistence in metadata root).
    pub fn free_list(&self) -> &[Extent] {
        &self.free_list
    }

    // --- internals ---

    /// Coalesce the extent at `pos` with its neighbours if they are adjacent.
    fn coalesce_at(&mut self, pos: usize) {
        // Merge with the next extent first (so `pos` stays valid for the
        // backward merge).
        if pos + 1 < self.free_list.len()
            && self.free_list[pos].is_adjacent_to(&self.free_list[pos + 1])
        {
            self.free_list[pos] = Extent::new(
                self.free_list[pos].start,
                self.free_list[pos].count + self.free_list[pos + 1].count,
            );
            self.free_list.remove(pos + 1);
        }

        // Merge with the previous extent.
        if pos > 0 && self.free_list[pos - 1].is_adjacent_to(&self.free_list[pos]) {
            self.free_list[pos - 1] = Extent::new(
                self.free_list[pos - 1].start,
                self.free_list[pos - 1].count + self.free_list[pos].count,
            );
            self.free_list.remove(pos);
        }
    }

    /// Check whether `extent` overlaps any entry in the free list.
    fn overlaps_free_list(&self, extent: &Extent) -> bool {
        self.free_list
            .iter()
            .any(|free| extent.start.0 < free.end().0 && free.start.0 < extent.end().0)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn new_allocator_is_empty() {
        let alloc = BlockAllocator::new();
        assert_eq!(alloc.total_blocks(), 0);
        assert_eq!(alloc.free_block_count(), 0);
    }

    #[test]
    fn allocate_extends_file() {
        let mut alloc = BlockAllocator::new();
        let ext = alloc.allocate(4).unwrap();
        assert_eq!(ext, Extent::new(BlockId(0), 4));
        assert_eq!(alloc.total_blocks(), 4);
    }

    #[test]
    fn sequential_allocations_are_contiguous() {
        let mut alloc = BlockAllocator::new();
        let a = alloc.allocate(2).unwrap();
        let b = alloc.allocate(3).unwrap();
        assert_eq!(a, Extent::new(BlockId(0), 2));
        assert_eq!(b, Extent::new(BlockId(2), 3));
        assert_eq!(alloc.total_blocks(), 5);
    }

    #[test]
    fn allocate_zero_is_error() {
        let mut alloc = BlockAllocator::new();
        assert!(alloc.allocate(0).is_err());
    }

    #[test]
    fn free_and_reuse() {
        let mut alloc = BlockAllocator::new();
        let a = alloc.allocate(4).unwrap();
        let _b = alloc.allocate(2).unwrap();

        // Free the first allocation.
        alloc.free(a);
        assert_eq!(alloc.free_block_count(), 4);

        // Next allocation of the same size reuses freed blocks.
        let c = alloc.allocate(4).unwrap();
        assert_eq!(c, a);
        assert_eq!(alloc.free_block_count(), 0);
    }

    #[test]
    fn free_and_split() {
        let mut alloc = BlockAllocator::new();
        let a = alloc.allocate(8).unwrap();
        alloc.free(a);

        // Allocate fewer blocks than the free extent — should split.
        let b = alloc.allocate(3).unwrap();
        assert_eq!(b, Extent::new(BlockId(0), 3));
        assert_eq!(alloc.free_block_count(), 5);

        // The remainder is reusable.
        let c = alloc.allocate(5).unwrap();
        assert_eq!(c, Extent::new(BlockId(3), 5));
        assert_eq!(alloc.free_block_count(), 0);
    }

    #[test]
    fn coalesce_adjacent_frees() {
        let mut alloc = BlockAllocator::new();
        let a = alloc.allocate(3).unwrap(); // blocks [0..3)
        let b = alloc.allocate(3).unwrap(); // blocks [3..6)
        let c = alloc.allocate(3).unwrap(); // blocks [6..9)

        // Free b then a — they should coalesce into [0..6).
        alloc.free(b);
        alloc.free(a);
        assert_eq!(alloc.free_list().len(), 1);
        assert_eq!(alloc.free_list()[0], Extent::new(BlockId(0), 6));

        // Free c — coalesces into [0..9).
        alloc.free(c);
        assert_eq!(alloc.free_list().len(), 1);
        assert_eq!(alloc.free_list()[0], Extent::new(BlockId(0), 9));
    }

    #[test]
    fn non_adjacent_frees_stay_separate() {
        let mut alloc = BlockAllocator::new();
        let a = alloc.allocate(2).unwrap(); // [0..2)
        let _b = alloc.allocate(2).unwrap(); // [2..4) — kept allocated
        let c = alloc.allocate(2).unwrap(); // [4..6)

        alloc.free(a);
        alloc.free(c);
        assert_eq!(alloc.free_list().len(), 2);
        assert_eq!(alloc.free_list()[0], Extent::new(BlockId(0), 2));
        assert_eq!(alloc.free_list()[1], Extent::new(BlockId(4), 2));
    }

    #[test]
    fn first_fit_prefers_first_free_extent() {
        let mut alloc = BlockAllocator::new();
        let a = alloc.allocate(4).unwrap(); // [0..4)
        let _b = alloc.allocate(2).unwrap(); // [4..6)
        let c = alloc.allocate(6).unwrap(); // [6..12)

        alloc.free(a);
        alloc.free(c);

        // Allocating 3 blocks should use the first free extent [0..4).
        let d = alloc.allocate(3).unwrap();
        assert_eq!(d, Extent::new(BlockId(0), 3));
    }

    #[test]
    fn allocate_falls_through_to_file_extension_when_free_list_too_small() {
        let mut alloc = BlockAllocator::new();
        let a = alloc.allocate(2).unwrap();
        alloc.free(a);

        // Free list has 2 blocks, but we need 5 — must extend file.
        let b = alloc.allocate(5).unwrap();
        assert_eq!(b, Extent::new(BlockId(2), 5));
        // Original free extent still available.
        assert_eq!(alloc.free_block_count(), 2);
    }

    #[test]
    fn from_state_reconstructs_correctly() {
        let free_list = vec![Extent::new(BlockId(10), 3), Extent::new(BlockId(5), 2)];
        let alloc = BlockAllocator::from_state(20, free_list);
        assert_eq!(alloc.total_blocks(), 20);
        assert_eq!(alloc.free_block_count(), 5);
        // Should be sorted by start block.
        assert_eq!(alloc.free_list()[0].start, BlockId(5));
        assert_eq!(alloc.free_list()[1].start, BlockId(10));
    }

    #[test]
    fn from_state_allocates_from_free_list() {
        let free_list = vec![Extent::new(BlockId(5), 4)];
        let mut alloc = BlockAllocator::from_state(20, free_list);

        let ext = alloc.allocate(2).unwrap();
        assert_eq!(ext, Extent::new(BlockId(5), 2));
        assert_eq!(alloc.free_block_count(), 2);
        // File size unchanged — reused free space.
        assert_eq!(alloc.total_blocks(), 20);
    }
}