lucisearch 0.8.0

Embeddable, in-process search engine — the SQLite/DuckDB of Elasticsearch
Documentation
// Obsidian [[wikilinks]] in doc comments are intentional — they link to
// design and reference docs in docs/. Rustdoc doesn't understand them.
#![allow(rustdoc::broken_intra_doc_links)]

//! `luci-storage` — block-based storage engine for Luci.
//!
//! This crate implements the [[single-file-format]] that backs every Luci
//! index. It manages disk space as fixed-size blocks, tracks free space with
//! extent-based free lists, and provides atomic commit via a two-root-pointer
//! technique.
//!
//! See [[architecture-storage-format]] for the full design.

mod allocator;
mod block;
mod directory;
mod header;
pub mod lock;
mod single_file;
mod wal;

pub use allocator::BlockAllocator;
pub use block::{BLOCK_SIZE, BlockId, Extent, HEADER_SIZE};
pub use directory::{MetadataSnapshot, SegmentEntry, VectorIndexEntry};
pub use header::{ActiveRoot, FORMAT_VERSION, FileHeader, MAGIC, RootPointer, xxh3_checksum};
#[cfg(unix)]
pub use single_file::SingleFileDirectory;
pub use wal::{DurabilityMode, Wal, WalRecord, replay_wal};

use crate::core::{FieldId, Result, SegmentId};

/// Trait abstracting over storage backends.
///
/// [`SingleFileDirectory`] is the sole implementation today. The trait
/// exists so that consumers (`IndexReader`, the index writer, etc.)
/// depend on a stable storage interface rather than a concrete type.
pub trait Storage: Send {
    fn write_segment(&mut self, segment_id: SegmentId, data: &[u8]) -> Result<()>;
    fn read_segment(&self, segment_id: SegmentId) -> Result<Vec<u8>>;
    fn commit(&mut self) -> Result<()>;
    fn segments(&self) -> &[SegmentEntry];
    fn generation(&self) -> u64;
    fn set_user_metadata(&mut self, metadata: Vec<u8>);
    fn user_metadata(&self) -> &[u8];
    /// Mark segments for removal on the next commit. Their storage space
    /// is reclaimed when the commit completes.
    fn remove_segments(&mut self, segment_ids: &[SegmentId]);

    /// Write a per-field vector index (e.g., serialized HNSW graph) as
    /// an index-wide artifact, separate from segments. Replaces any
    /// previously-committed bytes for the same `field_id` on next
    /// commit. See [[global-vector-indices]].
    fn write_vector_index(&mut self, field_id: FieldId, data: &[u8]) -> Result<()>;

    /// Read the committed bytes for a per-field vector index. Returns
    /// `None` if no index exists for that field.
    fn read_vector_index(&self, field_id: FieldId) -> Result<Option<Vec<u8>>>;

    /// List the fields that have a committed vector index.
    fn vector_index_fields(&self) -> Vec<FieldId>;

    /// Mark the vector index for `field_id` for removal on the next
    /// commit. No-op if the field has no committed index.
    fn remove_vector_index(&mut self, field_id: FieldId);

    /// Set the timeout for acquiring the cross-process write lock.
    ///
    /// Default: 5 seconds.
    fn set_write_timeout(&mut self, _timeout: std::time::Duration) {}
}