Struct ColumnStore

Source

pub struct ColumnStore<P: Pager> { /* private fields */ }

Expand description

Columnar storage engine for managing Arrow-based data.

ColumnStore provides the primary interface for persisting and retrieving columnar data using Apache Arrow RecordBatches. It manages:

Column descriptors and metadata (chunk locations, row counts, min/max values)
Data type caching for efficient schema queries
Index management (presence indexes, value indexes)
Integration with the Pager for persistent storage

§Namespaces

Columns are identified by LogicalFieldId, which combines a namespace, table ID, and field ID. This prevents collisions between user data, row IDs, and MVCC metadata:

UserData: Regular table columns
RowIdShadow: Internal row ID tracking
TxnCreatedBy: MVCC transaction creation timestamps
TxnDeletedBy: MVCC transaction deletion timestamps

§Thread Safety

ColumnStore is Send + Sync and can be safely shared across threads via Arc. Internal state (catalog, caches) uses RwLock for concurrent access.

§Test Harness Integration

SQLite sqllogictest: Every upstream case exercises the column store, providing a compatibility baseline but not full parity with SQLite yet.
DuckDB suites: Early dialect-specific tests stress MVCC and typed casts, informing future work rather than proving comprehensive DuckDB coverage.
Hardening mandate: Failures uncovered by the suites result in storage fixes, not filtered tests, to preserve confidence in OLAP scenarios built atop this crate.

Struct ColumnStore Copy item path

§Namespaces

§Thread Safety

§Test Harness Integration

Implementations§

impl<P> ColumnStore<P>where P: Pager<Blob = EntryHandle> + Send + Sync,

pub fn open(pager: Arc<P>) -> Result<Self>

§Errors

pub fn write_hints(&self) -> ColumnStoreWriteHints

pub fn register_index( &self, field_id: LogicalFieldId, kind: IndexKind, ) -> Result<()>

§Errors

pub fn has_field(&self, field_id: LogicalFieldId) -> bool

pub fn unregister_index( &self, field_id: LogicalFieldId, kind: IndexKind, ) -> Result<()>

§Errors

pub fn data_type(&self, field_id: LogicalFieldId) -> Result<DataType>

§Errors

pub fn update_data_type( &self, field_id: LogicalFieldId, new_data_type: &DataType, ) -> Result<()>

§Errors

pub fn ensure_column_registered( &self, field_id: LogicalFieldId, data_type: &DataType, ) -> Result<()>

pub fn filter_row_ids<T>( &self, field_id: LogicalFieldId, predicate: &Predicate<T::Value>, ) -> Result<Vec<u64>>where T: FilterDispatch,

§Errors

pub fn filter_matches<T, F>( &self, field_id: LogicalFieldId, predicate: F, ) -> Result<FilterResult>where T: FilterPrimitive, F: FnMut(T::Native) -> bool,

§Arguments

§Errors

pub fn list_persisted_indexes( &self, field_id: LogicalFieldId, ) -> Result<Vec<IndexKind>>

§Errors

pub fn total_rows_for_field(&self, field_id: LogicalFieldId) -> Result<u64>

§Errors

pub fn total_rows_for_table(&self, table_id: TableId) -> Result<u64>

§Errors

pub fn user_field_ids_for_table(&self, table_id: TableId) -> Vec<LogicalFieldId>

pub fn remove_column(&self, field_id: LogicalFieldId) -> Result<()>

§Arguments

§Errors

pub fn has_row_id( &self, field_id: LogicalFieldId, row_id: RowId, ) -> Result<bool>

§Errors

pub fn append(&self, batch: &RecordBatch) -> Result<()>

§Last-Write-Wins Updates

§Row ID Ordering

§Table Separation

§Errors

pub fn delete_rows( &self, fields: &[LogicalFieldId], rows_to_delete: &[RowId], ) -> Result<()>

pub fn verify_integrity(&self) -> Result<()>

pub fn get_layout_stats(&self) -> Result<Vec<ColumnLayoutStats>>

impl<P> ColumnStore<P>where P: Pager<Blob = EntryHandle>,

pub fn scan( &self, field_id: LogicalFieldId, opts: ScanOptions, visitor: &mut dyn PrimitiveFullVisitor, ) -> Result<()>

impl<P> ColumnStore<P>where P: Pager<Blob = EntryHandle> + Send + Sync,

pub fn gather_rows( &self, field_ids: &[LogicalFieldId], row_ids: &[u64], policy: GatherNullPolicy, ) -> Result<RecordBatch>

pub fn gather_rows_with_schema( &self, field_ids: &[LogicalFieldId], row_ids: &[u64], policy: GatherNullPolicy, expected_schema: Option<Arc<Schema>>, ) -> Result<RecordBatch>

pub fn prepare_gather_context( &self, field_ids: &[LogicalFieldId], ) -> Result<MultiGatherContext>

pub fn gather_rows_with_reusable_context( &self, ctx: &mut MultiGatherContext, row_ids: &[u64], policy: GatherNullPolicy, ) -> Result<RecordBatch>

Trait Implementations§

impl<P> Clone for ColumnStore<P>where P: Pager<Blob = EntryHandle> + Send + Sync,

fn clone(&self) -> Self

fn clone_from(&mut self, source: &Self)

impl<P: Pager> ColumnStoreDebug for ColumnStore<P>

fn render_storage_as_formatted_string(&self) -> String

fn render_storage_as_dot( &self, batch_colors: &HashMap<PhysicalKey, usize>, ) -> String

Auto Trait Implementations§

impl<P> !Freeze for ColumnStore<P>

impl<P> RefUnwindSafe for ColumnStore<P>where P: RefUnwindSafe,

impl<P> Send for ColumnStore<P>

impl<P> Sync for ColumnStore<P>

impl<P> Unpin for ColumnStore<P>

impl<P> UnwindSafe for ColumnStore<P>where P: RefUnwindSafe,

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

Struct ColumnStore

impl<P> ColumnStore<P>
where P: Pager<Blob = EntryHandle> + Send + Sync,

pub fn filter_row_ids<T>( &self, field_id: LogicalFieldId, predicate: &Predicate<T::Value>, ) -> Result<Vec<u64>>
where T: FilterDispatch,

pub fn filter_matches<T, F>( &self, field_id: LogicalFieldId, predicate: F, ) -> Result<FilterResult>
where T: FilterPrimitive, F: FnMut(T::Native) -> bool,

impl<P> ColumnStore<P>
where P: Pager<Blob = EntryHandle>,

impl<P> ColumnStore<P>
where P: Pager<Blob = EntryHandle> + Send + Sync,

impl<P> Clone for ColumnStore<P>
where P: Pager<Blob = EntryHandle> + Send + Sync,

impl<P> RefUnwindSafe for ColumnStore<P>
where P: RefUnwindSafe,

impl<P> UnwindSafe for ColumnStore<P>
where P: RefUnwindSafe,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,