llkv_column_map/lib.rs
1//! Columnar storage engine for LLKV.
2//!
3//! This crate provides the low-level columnar layer that persists Apache Arrow
4//! [`RecordBatch`]es to disk and supports efficient scans, filters, and updates.
5//! It serves as the foundation for [`llkv-table`] and higher-level query
6//! execution.
7//!
8//! # Role in the Story
9//!
10//! The column map is where LLKV’s Arrow-first design meets pager-backed
11//! persistence. Every [`sqllogictest`](https://sqlite.org/sqllogictest/doc/trunk/about.wiki) shipped with SQLite—and an expanding set of
12//! DuckDB suites—ultimately routes through these descriptors and chunk walkers.
13//! The storage layer therefore carries the burden of matching SQLite semantics
14//! while staying efficient enough for OLAP workloads. Gaps uncovered by the
15//! logic tests are treated as defects in this crate, not harness exceptions.
16//!
17//! The engine is maintained in the open by a single developer. These docs aim
18//! to give newcomers the same context captured in the README and DeepWiki pages
19//! so the story remains accessible as the project grows.
20//!
21//! # Architecture
22//!
23//! The storage engine is organized into several key components:
24//!
25//! - **[`ColumnStore`]**: Primary interface for storing and retrieving columnar data.
26//! Manages column descriptors, metadata catalogs, and coordinates with the pager
27//! for persistent storage.
28//!
29//! - **[`ScanBuilder`]**: Builder pattern for constructing column scans with various
30//! options (filters, ordering, row ID inclusion).
31//!
32//! - **Visitor Pattern**: Scans emit data through visitor callbacks rather than
33//! materializing entire columns in memory, enabling streaming and aggregation.
34//!
35//! # Storage Model
36//!
37//! Data is stored in columnar chunks:
38//! - Each column is identified by a `LogicalFieldId`
39//! - Columns are broken into chunks for incremental writes
40//! - Each chunk stores Arrow-serialized data plus metadata (row count, min/max values)
41//! - Shadow columns track row IDs separately from user data
42//! - MVCC columns (`created_by`, `deleted_by`) track transaction visibility
43//!
44//! # Namespaces
45//!
46//! Columns are organized into namespaces to prevent ID collisions:
47//! - `UserData`: Regular table columns
48//! - `RowIdShadow`: Internal row ID tracking for each column
49//! - `TxnCreatedBy`: MVCC transaction that created each row
50//! - `TxnDeletedBy`: MVCC transaction that deleted each row
51//!
52//! # Test Coverage
53//!
54//! - **SQLite suites**: The storage layer powers every SQLite [`sqllogictest`](https://sqlite.org/sqllogictest/doc/trunk/about.wiki)
55//! case that upstream publishes. Passing those suites provides a baseline for
56//! SQLite compatibility, but LLKV still diverges from SQLite behavior in
57//! places and should not be treated as a drop-in replacement yet.
58//! - **DuckDB extensions**: DuckDB-focused suites exercise MVCC edge cases and
59//! typed transaction flows. Coverage is early and informs the roadmap rather
60//! than proving full DuckDB parity today. All suites run through the
61//! [`sqllogictest` crate](https://crates.io/crates/sqllogictest).
62//!
63//! # Thread Safety
64//!
65//! `ColumnStore` is thread-safe (`Send + Sync`) with internal locking for
66//! catalog updates. Read operations can occur concurrently; writes are
67//! serialized through the catalog lock.
68//!
69//! [`RecordBatch`]: arrow::record_batch::RecordBatch
70//! [`llkv-table`]: https://docs.rs/llkv-table
71//! [`ColumnStore`]: store::ColumnStore
72//! [`ScanBuilder`]: scan::ScanBuilder
73//!
74//! # Macros and Type Dispatch
75//!
76//! This crate provides macros for efficient type-specific operations without runtime
77//! dispatch overhead. See [`with_integer_arrow_type!`] for details.
78
79// NOTE: rustfmt currently re-indents portions of macro_rules! blocks in this
80// file (observed when running `cargo fmt`). This produces noisy diffs and
81// churn because rustfmt will flip formatting between runs. The problematic
82// locations in this module are the macro_rules! dispatch macros declared
83// below. Until the underlying rustfmt bug is fixed, we intentionally opt out
84// of automatic formatting for those specific macros using `#[rustfmt::skip]`,
85// while keeping the rest of the module formatted normally.
86//
87// Reproduction / debugging tips for contributors:
88// - Run `rustup run stable rustfmt -- --version` to confirm the rustfmt
89// version, then `cargo fmt` to reproduce the behavior.
90// - Narrow the change by running rustfmt on this file only:
91// rustfmt llkv-column-map/src/store/scan/unsorted.rs
92// - If you can produce a minimal self-contained example that triggers the
93// re-indent, open an issue with rustfmt (include rustfmt version and the
94// minimal example) and link it here.
95//
96// NOTE: Once a minimal reproducer for the rustfmt regression exists, link the
97// upstream issue here and remove the `#[rustfmt::skip]` attributes so the file
98// can return to standard formatting. Progress is tracked at
99// https://github.com/rust-lang/rustfmt/issues/6629#issuecomment-3395446770.
100
101/// Dispatches to type-specific code based on an Arrow `DataType`.
102///
103/// This macro eliminates runtime type checking by expanding to type-specific code
104/// at compile time. It matches the provided `DataType` against supported numeric types
105/// and binds the corresponding Arrow primitive type to the specified identifier.
106///
107/// # Parameters
108///
109/// - `$dtype` - Expression evaluating to `&arrow::datatypes::DataType`
110/// - `$ty` - Identifier to bind the Arrow primitive type to (e.g., `UInt64Type`)
111/// - `$body` - Code to execute with `$ty` bound to the matched type
112/// - `$unsupported` - Fallback expression if the type is not supported
113///
114/// # Performance
115///
116/// This macro is used in hot paths to avoid runtime `match` statements and virtual
117/// dispatch. The compiler generates specialized code for each type.
118#[macro_export]
119#[rustfmt::skip]
120macro_rules! with_integer_arrow_type {
121 ($dtype:expr, |$ty:ident| $body:expr, $unsupported:expr $(,)?) => {{
122 use std::borrow::Borrow;
123
124 let dtype_value = $dtype;
125 let dtype_ref: &arrow::datatypes::DataType = dtype_value.borrow();
126 let mut result: Option<_> = None;
127
128 macro_rules! __llkv_dispatch_integer_arrow_type {
129 (
130 $base:ident,
131 $chunk_fn:ident,
132 $chunk_with_rids_fn:ident,
133 $run_fn:ident,
134 $run_with_rids_fn:ident,
135 $array_ty:ty,
136 $physical_ty:ty,
137 $dtype_expr:expr,
138 $native_ty:ty,
139 $cast_expr:expr
140 ) => {
141 if dtype_ref == &$dtype_expr {
142 type $ty = $physical_ty;
143 result = Some($body);
144 }
145 };
146 }
147
148 llkv_for_each_arrow_numeric!(__llkv_dispatch_integer_arrow_type);
149
150 result.unwrap_or_else(|| $unsupported)
151 }};
152}
153
154/// Invokes a macro for each supported Arrow numeric type.
155///
156/// This is a helper macro that generates repetitive type-specific code. It calls
157/// the provided macro once for each numeric Arrow type with metadata about that type.
158///
159/// # Macro Arguments Provided to Callback
160///
161/// For each type, the callback macro receives:
162/// 1. Base type name (e.g., `u64`, `i32`, `f64`)
163/// 2. Chunk visitor method name (e.g., `u64_chunk`)
164/// 3. Chunk with row IDs visitor method name (e.g., `u64_chunk_with_rids`)
165/// 4. Run visitor method name (e.g., `u64_run`)
166/// 5. Run with row IDs visitor method name (e.g., `u64_run_with_rids`)
167/// 6. Arrow array type (e.g., `arrow::array::UInt64Array`)
168/// 7. Arrow physical type (e.g., `arrow::datatypes::UInt64Type`)
169/// 8. Arrow DataType enum variant (e.g., `arrow::datatypes::DataType::UInt64`)
170/// 9. Native Rust type (e.g., `u64`)
171/// 10. Cast expression for type conversion
172#[macro_export]
173#[rustfmt::skip]
174macro_rules! llkv_for_each_arrow_numeric {
175 ($macro:ident) => {
176 $macro!(
177 u64,
178 u64_chunk,
179 u64_chunk_with_rids,
180 u64_run,
181 u64_run_with_rids,
182 arrow::array::UInt64Array,
183 arrow::datatypes::UInt64Type,
184 arrow::datatypes::DataType::UInt64,
185 u64,
186 |v: u64| v as f64
187 );
188 $macro!(
189 u32,
190 u32_chunk,
191 u32_chunk_with_rids,
192 u32_run,
193 u32_run_with_rids,
194 arrow::array::UInt32Array,
195 arrow::datatypes::UInt32Type,
196 arrow::datatypes::DataType::UInt32,
197 u32,
198 |v: u32| v as f64
199 );
200 $macro!(
201 u16,
202 u16_chunk,
203 u16_chunk_with_rids,
204 u16_run,
205 u16_run_with_rids,
206 arrow::array::UInt16Array,
207 arrow::datatypes::UInt16Type,
208 arrow::datatypes::DataType::UInt16,
209 u16,
210 |v: u16| v as f64
211 );
212 $macro!(
213 u8,
214 u8_chunk,
215 u8_chunk_with_rids,
216 u8_run,
217 u8_run_with_rids,
218 arrow::array::UInt8Array,
219 arrow::datatypes::UInt8Type,
220 arrow::datatypes::DataType::UInt8,
221 u8,
222 |v: u8| v as f64
223 );
224 $macro!(
225 i64,
226 i64_chunk,
227 i64_chunk_with_rids,
228 i64_run,
229 i64_run_with_rids,
230 arrow::array::Int64Array,
231 arrow::datatypes::Int64Type,
232 arrow::datatypes::DataType::Int64,
233 i64,
234 |v: i64| v as f64
235 );
236 $macro!(
237 i32,
238 i32_chunk,
239 i32_chunk_with_rids,
240 i32_run,
241 i32_run_with_rids,
242 arrow::array::Int32Array,
243 arrow::datatypes::Int32Type,
244 arrow::datatypes::DataType::Int32,
245 i32,
246 |v: i32| v as f64
247 );
248 $macro!(
249 i16,
250 i16_chunk,
251 i16_chunk_with_rids,
252 i16_run,
253 i16_run_with_rids,
254 arrow::array::Int16Array,
255 arrow::datatypes::Int16Type,
256 arrow::datatypes::DataType::Int16,
257 i16,
258 |v: i16| v as f64
259 );
260 $macro!(
261 i8,
262 i8_chunk,
263 i8_chunk_with_rids,
264 i8_run,
265 i8_run_with_rids,
266 arrow::array::Int8Array,
267 arrow::datatypes::Int8Type,
268 arrow::datatypes::DataType::Int8,
269 i8,
270 |v: i8| v as f64
271 );
272 $macro!(
273 f64,
274 f64_chunk,
275 f64_chunk_with_rids,
276 f64_run,
277 f64_run_with_rids,
278 arrow::array::Float64Array,
279 arrow::datatypes::Float64Type,
280 arrow::datatypes::DataType::Float64,
281 f64,
282 |v: f64| v
283 );
284 $macro!(
285 f32,
286 f32_chunk,
287 f32_chunk_with_rids,
288 f32_run,
289 f32_run_with_rids,
290 arrow::array::Float32Array,
291 arrow::datatypes::Float32Type,
292 arrow::datatypes::DataType::Float32,
293 f32,
294 |v: f32| v as f64
295 );
296 $macro!(
297 date64,
298 date64_chunk,
299 date64_chunk_with_rids,
300 date64_run,
301 date64_run_with_rids,
302 arrow::array::Date64Array,
303 arrow::datatypes::Date64Type,
304 arrow::datatypes::DataType::Date64,
305 i64,
306 |v: i64| v as f64
307 );
308 $macro!(
309 date32,
310 date32_chunk,
311 date32_chunk_with_rids,
312 date32_run,
313 date32_run_with_rids,
314 arrow::array::Date32Array,
315 arrow::datatypes::Date32Type,
316 arrow::datatypes::DataType::Date32,
317 i32,
318 |v: i32| v as f64
319 );
320 };
321}
322
323#[macro_export]
324#[rustfmt::skip]
325macro_rules! llkv_for_each_arrow_boolean {
326 ($macro:ident) => {
327 $macro!(
328 bool,
329 bool_chunk,
330 bool_chunk_with_rids,
331 bool_run,
332 bool_run_with_rids,
333 arrow::array::BooleanArray,
334 arrow::datatypes::BooleanType,
335 arrow::datatypes::DataType::Boolean,
336 bool,
337 |v: bool| if v { 1.0 } else { 0.0 }
338 );
339 };
340}
341
342#[macro_export]
343#[rustfmt::skip]
344macro_rules! llkv_for_each_arrow_string {
345 ($macro:ident) => {
346 $macro!(
347 utf8,
348 utf8_chunk,
349 utf8_chunk_with_rids,
350 utf8_run,
351 utf8_run_with_rids,
352 arrow::array::StringArray,
353 arrow::datatypes::Utf8Type,
354 arrow::datatypes::DataType::Utf8,
355 &str,
356 |_v: &str| 0.0
357 );
358 };
359}
360
361pub fn is_supported_arrow_type(dtype: &arrow::datatypes::DataType) -> bool {
362 use arrow::datatypes::DataType;
363
364 if matches!(dtype, DataType::Utf8 | DataType::LargeUtf8) {
365 return true;
366 }
367
368 let mut matched = false;
369
370 macro_rules! __llkv_match_dtype {
371 (
372 $base:ident,
373 $chunk_fn:ident,
374 $chunk_with_rids_fn:ident,
375 $run_fn:ident,
376 $run_with_rids_fn:ident,
377 $array_ty:ty,
378 $physical_ty:ty,
379 $dtype_expr:expr,
380 $native_ty:ty,
381 $cast_expr:expr
382 ) => {
383 if dtype == &$dtype_expr {
384 matched = true;
385 }
386 };
387 }
388
389 llkv_for_each_arrow_numeric!(__llkv_match_dtype);
390 llkv_for_each_arrow_boolean!(__llkv_match_dtype);
391
392 matched
393}
394
395pub fn supported_arrow_types() -> Vec<arrow::datatypes::DataType> {
396 use arrow::datatypes::DataType;
397
398 let mut types = vec![DataType::Utf8, DataType::LargeUtf8];
399
400 macro_rules! __llkv_push_dtype {
401 (
402 $base:ident,
403 $chunk_fn:ident,
404 $chunk_with_rids_fn:ident,
405 $run_fn:ident,
406 $run_with_rids_fn:ident,
407 $array_ty:ty,
408 $physical_ty:ty,
409 $dtype_expr:expr,
410 $native_ty:ty,
411 $cast_expr:expr
412 ) => {
413 types.push($dtype_expr.clone());
414 };
415 }
416
417 llkv_for_each_arrow_numeric!(__llkv_push_dtype);
418 llkv_for_each_arrow_boolean!(__llkv_push_dtype);
419
420 types
421}
422
423pub fn ensure_supported_arrow_type(dtype: &arrow::datatypes::DataType) -> Result<()> {
424 if is_supported_arrow_type(dtype) {
425 return Ok(());
426 }
427
428 let mut supported = supported_arrow_types()
429 .into_iter()
430 .map(|dtype| format!("{dtype:?}"))
431 .collect::<Vec<_>>();
432 supported.sort();
433 supported.dedup();
434
435 Err(Error::InvalidArgumentError(format!(
436 "unsupported Arrow type {dtype:?}; supported types are {}",
437 supported.join(", ")
438 )))
439}
440
441pub mod codecs;
442pub mod gather;
443pub mod parallel;
444pub mod serialization;
445pub mod store;
446
447pub use llkv_result::{Error, Result};
448pub use store::{
449 ColumnStore, IndexKind, ROW_ID_COLUMN_NAME,
450 scan::{self, ScanBuilder},
451};
452
453pub mod debug {
454 pub use super::store::debug::*;
455}