lucisearch 0.8.0

Embeddable, in-process search engine — the SQLite/DuckDB of Elasticsearch
Documentation
//! Owned column reader with erased lifetime.
//!
//! `OwnedColumn` wraps a `ColumnReader` with its lifetime erased to `'static`
//! via `unsafe transmute`. It's used by per-query caches that outlive any
//! particular `&SegmentReader` borrow but live within the lifecycle of an
//! object (collector, `SearchResults`, etc.) that holds the underlying segment
//! data alive.
//!
//! SAFETY: The `SegmentReader` (and the `Arc<SegmentStore>` rooting it) must
//! outlive any `OwnedColumn` constructed from it. This invariant is enforced
//! at the call site by struct-field-order discipline: in any owning struct,
//! declare the `OwnedColumn` (or a collection containing one) before the
//! `SegmentReader` reference, so it drops first.
//!
//! Originated in `agg::bucket` (March 2026) for the aggregation collection
//! path. Promoted to `columnar::owned` in May 2026 to be reusable by
//! `SearchResults`'s field-reader cache. See
//! [[optimize-hit-id-column-reader-cache]] and
//! [[strategy-lazy-materialization]].

use std::mem::transmute;

use crate::core::FieldId;

use crate::columnar::reader::ColumnReader;
use crate::segment::reader::SegmentReader;

pub(crate) struct OwnedColumn(Box<ColumnReader<'static>>);

impl OwnedColumn {
    /// Acquire a column reader from a segment. Returns `None` if the field
    /// has no columnar data.
    pub fn new(field_id: Option<FieldId>, reader: &SegmentReader) -> Option<Self> {
        #[cfg(test)]
        COLUMN_OPENS.with(|c| c.set(c.get() + 1));
        let col = field_id.and_then(|fid| reader.column(fid))?;
        // SAFETY: The caller must guarantee the `SegmentReader` outlives this
        // struct (per module docs).
        let static_col: ColumnReader<'static> = unsafe { transmute(col) };
        Some(Self(Box::new(static_col)))
    }

    /// Access the underlying `ColumnReader`. The returned borrow — and any
    /// string borrows derived from it via `ordinal_to_string`/`keyword_value`
    /// — are tied to `&self`, so callers cannot smuggle them out past the
    /// `OwnedColumn`'s lifetime.
    pub fn inner<'s>(&'s self) -> &'s ColumnReader<'s> {
        let col: &'s ColumnReader<'static> = &self.0;
        // SAFETY: shrink the stored `'static` inner lifetime down to the
        // `&self` borrow `'s`. Always sound — a reader valid for `'static` is
        // valid for any shorter lifetime — and exactly what covariance did
        // automatically before `KeywordDict`'s `OnceCell<Vec<&'a str>>` made
        // `ColumnReader<'a>` invariant over `'a`. Preserving the shrink keeps
        // the contract that strings derived from the reader borrow `&self`, not
        // `'static`, so they cannot outlive this column.
        unsafe { transmute::<&'s ColumnReader<'static>, &'s ColumnReader<'s>>(col) }
    }

    #[inline]
    pub fn numeric_value(&self, doc_id: u32) -> Option<f64> {
        self.0.numeric_value(doc_id)
    }

    #[inline]
    pub fn keyword_ordinal(&self, doc_id: u32) -> Option<u32> {
        self.0.keyword_ordinal(doc_id)
    }

    #[inline]
    pub fn ordinal_to_string(&self, ordinal: u32) -> Option<&str> {
        self.0.ordinal_to_string(ordinal)
    }

    #[inline]
    pub fn dict_size(&self) -> usize {
        self.0.dict_size()
    }

    /// Bulk opt-in pass-through (see `ColumnReader::ensure_dict`). Lets a
    /// terms-agg collector materialize its `KeywordBlocked` dictionary once
    /// before resolving every observed ordinal in `finish()`.
    #[inline]
    pub fn ensure_dict(&self) {
        self.0.ensure_dict();
    }
}

#[cfg(test)]
thread_local! {
    /// Test-only counter of `OwnedColumn::new` invocations. Tests in
    /// [[optimize-hit-id-column-reader-cache]] read this to assert exact
    /// column-open counts (deterministic, vs flaky wall-clock timing).
    pub(crate) static COLUMN_OPENS: std::cell::Cell<usize> = std::cell::Cell::new(0);
}