lucisearch 0.8.1

Embeddable, in-process search engine — the SQLite/DuckDB of search
Documentation
/// Size of the file header in bytes: 4 KB.
///
/// The header is page-aligned and sized to fit within a single disk sector,
/// enabling sector-atomic writes for crash-safe commit. Blocks start
/// immediately after the header at this byte offset.
///
/// See [[architecture-storage-format#Atomic Commit Protocol]].
pub const HEADER_SIZE: u32 = 4096;

/// Block size in bytes: 256 KB.
///
/// Fixed for the format — stored in the file header for self-description but
/// not configurable at runtime. Balances manageable segment block counts
/// against free-list fragmentation, and aligns to OS page sizes and I/O
/// boundaries.
///
/// See [[architecture-storage-format#Block Size]].
pub const BLOCK_SIZE: u32 = 256 * 1024;

/// Identifies a block by its zero-based index within the data region.
///
/// Block 0 is the first data block, starting at byte offset `HEADER_SIZE`.
/// The byte offset of block N is `HEADER_SIZE + N * BLOCK_SIZE`.
///
/// See [[architecture-storage-format#Block Allocator]].
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct BlockId(pub u64);

impl BlockId {
    pub const fn new(id: u64) -> Self {
        Self(id)
    }

    pub const fn as_u64(self) -> u64 {
        self.0
    }

    /// Byte offset of this block within the file.
    ///
    /// Accounts for the 4 KB header that precedes the block region.
    pub const fn byte_offset(self) -> u64 {
        HEADER_SIZE as u64 + self.0 * BLOCK_SIZE as u64
    }
}

impl std::fmt::Display for BlockId {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "block:{}", self.0)
    }
}

/// A contiguous range of blocks: `[start, start + count)`.
///
/// Extents are the unit of allocation and free-list tracking. Each segment's
/// data is stored as one or more extents in the segment directory.
///
/// See [[architecture-storage-format#Extent Tracking]].
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Extent {
    /// First block in the range.
    pub start: BlockId,
    /// Number of contiguous blocks.
    pub count: u32,
}

impl Extent {
    pub const fn new(start: BlockId, count: u32) -> Self {
        Self { start, count }
    }

    /// One past the last block in the extent.
    pub const fn end(&self) -> BlockId {
        BlockId(self.start.0 + self.count as u64)
    }

    /// Total bytes covered by this extent.
    pub const fn byte_len(&self) -> u64 {
        self.count as u64 * BLOCK_SIZE as u64
    }

    /// Whether this extent is immediately followed by `other`.
    pub const fn is_adjacent_to(&self, other: &Extent) -> bool {
        self.end().0 == other.start.0
    }
}

impl std::fmt::Display for Extent {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "blocks:[{}..{})",
            self.start.0,
            self.start.0 + self.count as u64
        )
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn block_id_byte_offset() {
        // Block 0 starts right after the 4 KB header.
        assert_eq!(BlockId(0).byte_offset(), 4096);
        assert_eq!(BlockId(1).byte_offset(), 4096 + 256 * 1024);
        assert_eq!(BlockId(4).byte_offset(), 4096 + 4 * 256 * 1024);
    }

    #[test]
    fn block_id_display() {
        assert_eq!(format!("{}", BlockId(42)), "block:42");
    }

    #[test]
    fn extent_end() {
        let ext = Extent::new(BlockId(10), 5);
        assert_eq!(ext.end(), BlockId(15));
    }

    #[test]
    fn extent_byte_len() {
        let ext = Extent::new(BlockId(0), 4);
        assert_eq!(ext.byte_len(), 4 * 256 * 1024);
    }

    #[test]
    fn extent_adjacency() {
        let a = Extent::new(BlockId(10), 5);
        let b = Extent::new(BlockId(15), 3);
        let c = Extent::new(BlockId(20), 2);
        assert!(a.is_adjacent_to(&b));
        assert!(!a.is_adjacent_to(&c));
        assert!(b.is_adjacent_to(&Extent::new(BlockId(18), 2)));
        assert!(!b.is_adjacent_to(&c));
    }

    #[test]
    fn extent_display() {
        let ext = Extent::new(BlockId(3), 7);
        assert_eq!(format!("{ext}"), "blocks:[3..10)");
    }

    #[test]
    fn block_size_is_256kb() {
        assert_eq!(BLOCK_SIZE, 262_144);
    }

    #[test]
    fn header_size_is_4kb() {
        assert_eq!(HEADER_SIZE, 4096);
    }
}