llkv_column_map/
lib.rs

1// NOTE: rustfmt appears to repeatedly re-indent portions of some macros in
2// this file when running `cargo fmt` (likely a rustfmt bug). To avoid noisy
3// diffs and churn, skip automatic formatting on the affected macro_rules!
4// declarations. Keep the rest of the module formatted normally.
5
6/// Expands to the provided body with `$ty` bound to the concrete Arrow primitive type that
7/// matches the supplied `DataType`. Integer and floating-point primitives are supported; any
8/// other `DataType` triggers the `$unsupported` expression. This is used to avoid dynamic
9/// dispatch in hot paths like scans and row gathers.
10#[macro_export]
11#[rustfmt::skip]
12macro_rules! with_integer_arrow_type {
13    ($dtype:expr, |$ty:ident| $body:expr, $unsupported:expr $(,)?) => {{
14        use std::borrow::Borrow;
15
16        let dtype_value = $dtype;
17        let dtype_ref: &arrow::datatypes::DataType = dtype_value.borrow();
18        let mut result: Option<_> = None;
19
20        macro_rules! __llkv_dispatch_integer_arrow_type {
21            (
22                        $base:ident,
23                        $chunk_fn:ident,
24                        $chunk_with_rids_fn:ident,
25                        $run_fn:ident,
26                        $run_with_rids_fn:ident,
27                        $array_ty:ty,
28                        $physical_ty:ty,
29                        $dtype_expr:expr,
30                        $native_ty:ty,
31                        $cast_expr:expr
32                    ) => {
33                if dtype_ref == &$dtype_expr {
34                    type $ty = $physical_ty;
35                    result = Some($body);
36                }
37            };
38        }
39
40        llkv_for_each_arrow_numeric!(__llkv_dispatch_integer_arrow_type);
41
42        result.unwrap_or_else(|| $unsupported)
43    }};
44}
45
46/// Invokes `$macro` with metadata for each supported Arrow numeric primitive.
47#[macro_export]
48#[rustfmt::skip]
49macro_rules! llkv_for_each_arrow_numeric {
50    ($macro:ident) => {
51        $macro!(
52            u64,
53            u64_chunk,
54            u64_chunk_with_rids,
55            u64_run,
56            u64_run_with_rids,
57            arrow::array::UInt64Array,
58            arrow::datatypes::UInt64Type,
59            arrow::datatypes::DataType::UInt64,
60            u64,
61            |v: u64| v as f64
62        );
63        $macro!(
64            u32,
65            u32_chunk,
66            u32_chunk_with_rids,
67            u32_run,
68            u32_run_with_rids,
69            arrow::array::UInt32Array,
70            arrow::datatypes::UInt32Type,
71            arrow::datatypes::DataType::UInt32,
72            u32,
73            |v: u32| v as f64
74        );
75        $macro!(
76            u16,
77            u16_chunk,
78            u16_chunk_with_rids,
79            u16_run,
80            u16_run_with_rids,
81            arrow::array::UInt16Array,
82            arrow::datatypes::UInt16Type,
83            arrow::datatypes::DataType::UInt16,
84            u16,
85            |v: u16| v as f64
86        );
87        $macro!(
88            u8,
89            u8_chunk,
90            u8_chunk_with_rids,
91            u8_run,
92            u8_run_with_rids,
93            arrow::array::UInt8Array,
94            arrow::datatypes::UInt8Type,
95            arrow::datatypes::DataType::UInt8,
96            u8,
97            |v: u8| v as f64
98        );
99        $macro!(
100            i64,
101            i64_chunk,
102            i64_chunk_with_rids,
103            i64_run,
104            i64_run_with_rids,
105            arrow::array::Int64Array,
106            arrow::datatypes::Int64Type,
107            arrow::datatypes::DataType::Int64,
108            i64,
109            |v: i64| v as f64
110        );
111        $macro!(
112            i32,
113            i32_chunk,
114            i32_chunk_with_rids,
115            i32_run,
116            i32_run_with_rids,
117            arrow::array::Int32Array,
118            arrow::datatypes::Int32Type,
119            arrow::datatypes::DataType::Int32,
120            i32,
121            |v: i32| v as f64
122        );
123        $macro!(
124            i16,
125            i16_chunk,
126            i16_chunk_with_rids,
127            i16_run,
128            i16_run_with_rids,
129            arrow::array::Int16Array,
130            arrow::datatypes::Int16Type,
131            arrow::datatypes::DataType::Int16,
132            i16,
133            |v: i16| v as f64
134        );
135        $macro!(
136            i8,
137            i8_chunk,
138            i8_chunk_with_rids,
139            i8_run,
140            i8_run_with_rids,
141            arrow::array::Int8Array,
142            arrow::datatypes::Int8Type,
143            arrow::datatypes::DataType::Int8,
144            i8,
145            |v: i8| v as f64
146        );
147        $macro!(
148            f64,
149            f64_chunk,
150            f64_chunk_with_rids,
151            f64_run,
152            f64_run_with_rids,
153            arrow::array::Float64Array,
154            arrow::datatypes::Float64Type,
155            arrow::datatypes::DataType::Float64,
156            f64,
157            |v: f64| v
158        );
159        $macro!(
160            f32,
161            f32_chunk,
162            f32_chunk_with_rids,
163            f32_run,
164            f32_run_with_rids,
165            arrow::array::Float32Array,
166            arrow::datatypes::Float32Type,
167            arrow::datatypes::DataType::Float32,
168            f32,
169            |v: f32| v as f64
170        );
171        $macro!(
172            date64,
173            date64_chunk,
174            date64_chunk_with_rids,
175            date64_run,
176            date64_run_with_rids,
177            arrow::array::Date64Array,
178            arrow::datatypes::Date64Type,
179            arrow::datatypes::DataType::Date64,
180            i64,
181            |v: i64| v as f64
182        );
183        $macro!(
184            date32,
185            date32_chunk,
186            date32_chunk_with_rids,
187            date32_run,
188            date32_run_with_rids,
189            arrow::array::Date32Array,
190            arrow::datatypes::Date32Type,
191            arrow::datatypes::DataType::Date32,
192            i32,
193            |v: i32| v as f64
194        );
195    };
196}
197
198#[macro_export]
199#[rustfmt::skip]
200macro_rules! llkv_for_each_arrow_boolean {
201    ($macro:ident) => {
202        $macro!(
203            bool,
204            bool_chunk,
205            bool_chunk_with_rids,
206            bool_run,
207            bool_run_with_rids,
208            arrow::array::BooleanArray,
209            arrow::datatypes::BooleanType,
210            arrow::datatypes::DataType::Boolean,
211            bool,
212            |v: bool| if v { 1.0 } else { 0.0 }
213        );
214    };
215}
216
217pub fn is_supported_arrow_type(dtype: &arrow::datatypes::DataType) -> bool {
218    use arrow::datatypes::DataType;
219
220    if matches!(dtype, DataType::Utf8 | DataType::LargeUtf8) {
221        return true;
222    }
223
224    let mut matched = false;
225
226    macro_rules! __llkv_match_dtype {
227        (
228            $base:ident,
229            $chunk_fn:ident,
230            $chunk_with_rids_fn:ident,
231            $run_fn:ident,
232            $run_with_rids_fn:ident,
233            $array_ty:ty,
234            $physical_ty:ty,
235            $dtype_expr:expr,
236            $native_ty:ty,
237            $cast_expr:expr
238        ) => {
239            if dtype == &$dtype_expr {
240                matched = true;
241            }
242        };
243    }
244
245    llkv_for_each_arrow_numeric!(__llkv_match_dtype);
246    llkv_for_each_arrow_boolean!(__llkv_match_dtype);
247
248    matched
249}
250
251pub fn supported_arrow_types() -> Vec<arrow::datatypes::DataType> {
252    use arrow::datatypes::DataType;
253
254    let mut types = vec![DataType::Utf8, DataType::LargeUtf8];
255
256    macro_rules! __llkv_push_dtype {
257        (
258            $base:ident,
259            $chunk_fn:ident,
260            $chunk_with_rids_fn:ident,
261            $run_fn:ident,
262            $run_with_rids_fn:ident,
263            $array_ty:ty,
264            $physical_ty:ty,
265            $dtype_expr:expr,
266            $native_ty:ty,
267            $cast_expr:expr
268        ) => {
269            types.push($dtype_expr.clone());
270        };
271    }
272
273    llkv_for_each_arrow_numeric!(__llkv_push_dtype);
274    llkv_for_each_arrow_boolean!(__llkv_push_dtype);
275
276    types
277}
278
279pub fn ensure_supported_arrow_type(dtype: &arrow::datatypes::DataType) -> Result<()> {
280    if is_supported_arrow_type(dtype) {
281        return Ok(());
282    }
283
284    let mut supported = supported_arrow_types()
285        .into_iter()
286        .map(|dtype| format!("{dtype:?}"))
287        .collect::<Vec<_>>();
288    supported.sort();
289    supported.dedup();
290
291    Err(Error::InvalidArgumentError(format!(
292        "unsupported Arrow type {dtype:?}; supported types are {}",
293        supported.join(", ")
294    )))
295}
296
297pub mod parallel;
298pub mod store;
299pub mod types;
300
301pub use llkv_result::{Error, Result};
302pub use store::{
303    ColumnStore, IndexKind, ROW_ID_COLUMN_NAME,
304    scan::{self, ScanBuilder},
305};
306
307pub mod debug {
308    pub use super::store::debug::*;
309}