Skip to main content

kyu_executor/
value_vector.rs

1//! ValueVector — columnar value container that preserves flat byte buffers.
2//!
3//! Instead of converting ColumnChunk's flat bytes into Vec<TypedValue> (~32 bytes
4//! per tagged enum), ValueVector keeps the native representation: flat byte buffers
5//! for fixed-size types, packed bits for booleans, and SmolStr vecs for strings.
6//! Values are extracted on-demand via `get_value()`, avoiding upfront materialization.
7
8use kyu_storage::{BoolChunkData, ColumnChunkData, NullMask, StringChunkData};
9use kyu_types::{LogicalType, TypedValue};
10use smol_str::SmolStr;
11
12/// A column of values: either backed by a flat byte buffer (from storage),
13/// packed bits (booleans), SmolStr vec (strings), or owned TypedValues
14/// (computed by operators).
15#[derive(Clone, Debug)]
16pub enum ValueVector {
17    /// Fixed-size values in a flat byte buffer + NullMask.
18    Flat(FlatVector),
19    /// Packed 1-bit booleans + NullMask.
20    Bool(BoolVector),
21    /// Variable-length strings.
22    String(StringVector),
23    /// Operator-computed values (fallback for expressions, aggregates, etc.).
24    Owned(Vec<TypedValue>),
25}
26
27/// Fixed-size values stored in a flat byte buffer with a separate null mask.
28/// For i64 columns: 8 bytes/value vs 32 bytes for TypedValue::Int64.
29#[derive(Clone, Debug)]
30pub struct FlatVector {
31    data: Vec<u8>,
32    null_mask: NullMask,
33    logical_type: LogicalType,
34    num_values: usize,
35    stride: usize,
36}
37
38/// Packed 1-bit boolean values + separate null mask.
39#[derive(Clone, Debug)]
40pub struct BoolVector {
41    values: NullMask,
42    null_mask: NullMask,
43    num_values: usize,
44}
45
46/// Variable-length string values.
47#[derive(Clone, Debug)]
48pub struct StringVector {
49    data: Vec<Option<SmolStr>>,
50    num_values: usize,
51}
52
53/// Logical selection over a ValueVector / DataChunk.
54/// `None` indices = identity selection [0, 1, 2, ..., N-1].
55#[derive(Clone, Debug)]
56pub struct SelectionVector {
57    indices: Option<Vec<u32>>,
58    count: usize,
59}
60
61// ---------------------------------------------------------------------------
62// SelectionVector
63// ---------------------------------------------------------------------------
64
65impl SelectionVector {
66    /// All rows selected in order.
67    pub fn identity(count: usize) -> Self {
68        Self {
69            indices: None,
70            count,
71        }
72    }
73
74    /// Explicit subset of physical row indices.
75    pub fn from_indices(indices: Vec<u32>) -> Self {
76        let count = indices.len();
77        Self {
78            indices: Some(indices),
79            count,
80        }
81    }
82
83    /// Map a logical index to a physical index.
84    #[inline]
85    pub fn get(&self, logical: usize) -> usize {
86        match &self.indices {
87            None => logical,
88            Some(idx) => idx[logical] as usize,
89        }
90    }
91
92    #[inline]
93    pub fn len(&self) -> usize {
94        self.count
95    }
96
97    #[inline]
98    pub fn is_empty(&self) -> bool {
99        self.count == 0
100    }
101
102    /// Returns `true` when this is an identity selection (no indirection).
103    #[inline]
104    pub fn is_identity(&self) -> bool {
105        self.indices.is_none()
106    }
107
108    /// Raw pointer to the indices array, or null for identity selection (for JIT).
109    #[inline]
110    pub fn indices_ptr(&self) -> *const u32 {
111        match &self.indices {
112            Some(v) => v.as_ptr(),
113            None => std::ptr::null(),
114        }
115    }
116}
117
118// ---------------------------------------------------------------------------
119// FlatVector
120// ---------------------------------------------------------------------------
121
122impl FlatVector {
123    /// Construct from a ColumnChunkData by copying its byte buffer and NullMask.
124    pub fn from_column_chunk(c: &ColumnChunkData, num_rows: usize) -> Self {
125        let stride = c.num_bytes_per_value();
126        let byte_count = num_rows * stride;
127        Self {
128            data: c.buffer()[..byte_count].to_vec(),
129            null_mask: c.null_mask().clone(),
130            logical_type: c.data_type().clone(),
131            num_values: num_rows,
132            stride,
133        }
134    }
135
136    /// Construct from raw components (used by JIT projection output).
137    pub fn from_raw(
138        data: Vec<u8>,
139        null_mask: NullMask,
140        logical_type: LogicalType,
141        num_values: usize,
142        stride: usize,
143    ) -> Self {
144        Self {
145            data,
146            null_mask,
147            logical_type,
148            num_values,
149            stride,
150        }
151    }
152
153    /// Extract a single TypedValue at a physical index.
154    pub fn get_value(&self, idx: usize) -> TypedValue {
155        if self.null_mask.is_null(idx as u64) {
156            return TypedValue::Null;
157        }
158        let offset = idx * self.stride;
159        let bytes = &self.data[offset..offset + self.stride];
160        match &self.logical_type {
161            LogicalType::Int8 => {
162                TypedValue::Int8(i8::from_ne_bytes(bytes[..1].try_into().unwrap()))
163            }
164            LogicalType::Int16 => {
165                TypedValue::Int16(i16::from_ne_bytes(bytes[..2].try_into().unwrap()))
166            }
167            LogicalType::Int32 => {
168                TypedValue::Int32(i32::from_ne_bytes(bytes[..4].try_into().unwrap()))
169            }
170            LogicalType::Int64 | LogicalType::Serial => {
171                TypedValue::Int64(i64::from_ne_bytes(bytes[..8].try_into().unwrap()))
172            }
173            LogicalType::Float => {
174                TypedValue::Float(f32::from_ne_bytes(bytes[..4].try_into().unwrap()))
175            }
176            LogicalType::Double => {
177                TypedValue::Double(f64::from_ne_bytes(bytes[..8].try_into().unwrap()))
178            }
179            _ => TypedValue::Null,
180        }
181    }
182
183    #[inline]
184    pub fn is_null(&self, idx: usize) -> bool {
185        self.null_mask.is_null(idx as u64)
186    }
187
188    #[inline]
189    pub fn len(&self) -> usize {
190        self.num_values
191    }
192
193    #[inline]
194    pub fn is_empty(&self) -> bool {
195        self.num_values == 0
196    }
197
198    /// Direct access to the null mask for batch evaluation.
199    #[inline]
200    pub fn null_mask(&self) -> &NullMask {
201        &self.null_mask
202    }
203
204    /// The logical type of values in this vector.
205    #[inline]
206    pub fn logical_type(&self) -> &LogicalType {
207        &self.logical_type
208    }
209
210    /// Raw pointer to the flat byte buffer (for JIT compiled code).
211    #[inline]
212    pub fn data_ptr(&self) -> *const u8 {
213        self.data.as_ptr()
214    }
215
216    /// Value stride in bytes.
217    #[inline]
218    pub fn stride(&self) -> usize {
219        self.stride
220    }
221
222    /// Reinterpret the flat byte buffer as a typed i64 slice.
223    /// Caller must ensure `logical_type` is Int64 or Serial.
224    pub fn data_as_i64_slice(&self) -> &[i64] {
225        debug_assert_eq!(self.stride, 8);
226        let ptr = self.data.as_ptr() as *const i64;
227        unsafe { std::slice::from_raw_parts(ptr, self.num_values) }
228    }
229
230    /// Reinterpret the flat byte buffer as a typed i32 slice.
231    pub fn data_as_i32_slice(&self) -> &[i32] {
232        debug_assert_eq!(self.stride, 4);
233        let ptr = self.data.as_ptr() as *const i32;
234        unsafe { std::slice::from_raw_parts(ptr, self.num_values) }
235    }
236
237    /// Reinterpret the flat byte buffer as a typed f64 slice.
238    pub fn data_as_f64_slice(&self) -> &[f64] {
239        debug_assert_eq!(self.stride, 8);
240        let ptr = self.data.as_ptr() as *const f64;
241        unsafe { std::slice::from_raw_parts(ptr, self.num_values) }
242    }
243
244    /// Reinterpret the flat byte buffer as a typed f32 slice.
245    pub fn data_as_f32_slice(&self) -> &[f32] {
246        debug_assert_eq!(self.stride, 4);
247        let ptr = self.data.as_ptr() as *const f32;
248        unsafe { std::slice::from_raw_parts(ptr, self.num_values) }
249    }
250}
251
252// ---------------------------------------------------------------------------
253// BoolVector
254// ---------------------------------------------------------------------------
255
256impl BoolVector {
257    /// Construct from a BoolChunkData by cloning both NullMasks.
258    pub fn from_bool_chunk(c: &BoolChunkData, num_rows: usize) -> Self {
259        Self {
260            values: c.values_mask().clone(),
261            null_mask: c.null_mask().clone(),
262            num_values: num_rows,
263        }
264    }
265
266    pub fn get_value(&self, idx: usize) -> TypedValue {
267        if self.null_mask.is_null(idx as u64) {
268            TypedValue::Null
269        } else {
270            TypedValue::Bool(self.values.is_null(idx as u64))
271        }
272    }
273
274    pub fn is_null(&self, idx: usize) -> bool {
275        self.null_mask.is_null(idx as u64)
276    }
277
278    pub fn len(&self) -> usize {
279        self.num_values
280    }
281
282    pub fn is_empty(&self) -> bool {
283        self.num_values == 0
284    }
285}
286
287// ---------------------------------------------------------------------------
288// StringVector
289// ---------------------------------------------------------------------------
290
291impl StringVector {
292    /// Construct from a StringChunkData by copying the data slice.
293    pub fn from_string_chunk(c: &StringChunkData, num_rows: usize) -> Self {
294        Self {
295            data: c.data_slice()[..num_rows].to_vec(),
296            num_values: num_rows,
297        }
298    }
299
300    pub fn get_value(&self, idx: usize) -> TypedValue {
301        match &self.data[idx] {
302            Some(s) => TypedValue::String(s.clone()),
303            None => TypedValue::Null,
304        }
305    }
306
307    pub fn is_null(&self, idx: usize) -> bool {
308        self.data[idx].is_none()
309    }
310
311    pub fn len(&self) -> usize {
312        self.num_values
313    }
314
315    pub fn is_empty(&self) -> bool {
316        self.num_values == 0
317    }
318
319    /// Direct access to the underlying string data for batch evaluation.
320    pub fn data(&self) -> &[Option<SmolStr>] {
321        &self.data
322    }
323}
324
325// ---------------------------------------------------------------------------
326// ValueVector dispatch
327// ---------------------------------------------------------------------------
328
329impl ValueVector {
330    /// Extract a TypedValue at a physical index.
331    pub fn get_value(&self, idx: usize) -> TypedValue {
332        match self {
333            Self::Flat(v) => v.get_value(idx),
334            Self::Bool(v) => v.get_value(idx),
335            Self::String(v) => v.get_value(idx),
336            Self::Owned(v) => v[idx].clone(),
337        }
338    }
339
340    /// Check if the value at a physical index is null.
341    pub fn is_null(&self, idx: usize) -> bool {
342        match self {
343            Self::Flat(v) => v.is_null(idx),
344            Self::Bool(v) => v.is_null(idx),
345            Self::String(v) => v.is_null(idx),
346            Self::Owned(v) => v[idx].is_null(),
347        }
348    }
349
350    /// Number of physical values stored.
351    pub fn len(&self) -> usize {
352        match self {
353            Self::Flat(v) => v.len(),
354            Self::Bool(v) => v.len(),
355            Self::String(v) => v.len(),
356            Self::Owned(v) => v.len(),
357        }
358    }
359
360    pub fn is_empty(&self) -> bool {
361        self.len() == 0
362    }
363
364    /// Push a value onto an Owned vector. Panics on non-Owned variants.
365    pub fn push(&mut self, val: TypedValue) {
366        match self {
367            Self::Owned(v) => v.push(val),
368            _ => panic!("ValueVector::push only supported on Owned variant"),
369        }
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376
377    #[test]
378    fn flat_vector_int64_roundtrip() {
379        let mut chunk = ColumnChunkData::new(LogicalType::Int64, 8);
380        chunk.append_value::<i64>(42);
381        chunk.append_value::<i64>(-7);
382        chunk.append_null();
383
384        let vec = FlatVector::from_column_chunk(&chunk, 3);
385        assert_eq!(vec.len(), 3);
386        assert_eq!(vec.get_value(0), TypedValue::Int64(42));
387        assert_eq!(vec.get_value(1), TypedValue::Int64(-7));
388        assert_eq!(vec.get_value(2), TypedValue::Null);
389        assert!(!vec.is_null(0));
390        assert!(vec.is_null(2));
391    }
392
393    #[test]
394    fn flat_vector_double_roundtrip() {
395        let mut chunk = ColumnChunkData::new(LogicalType::Double, 4);
396        chunk.append_value::<f64>(3.14);
397        chunk.append_value::<f64>(-2.5);
398
399        let vec = FlatVector::from_column_chunk(&chunk, 2);
400        assert_eq!(vec.get_value(0), TypedValue::Double(3.14));
401        assert_eq!(vec.get_value(1), TypedValue::Double(-2.5));
402    }
403
404    #[test]
405    fn bool_vector_roundtrip() {
406        let mut chunk = BoolChunkData::new(8);
407        chunk.append_bool(true);
408        chunk.append_bool(false);
409        chunk.append_null();
410
411        let vec = BoolVector::from_bool_chunk(&chunk, 3);
412        assert_eq!(vec.len(), 3);
413        assert_eq!(vec.get_value(0), TypedValue::Bool(true));
414        assert_eq!(vec.get_value(1), TypedValue::Bool(false));
415        assert_eq!(vec.get_value(2), TypedValue::Null);
416    }
417
418    #[test]
419    fn string_vector_roundtrip() {
420        let mut chunk = StringChunkData::new(8);
421        chunk.append_string(SmolStr::new("hello"));
422        chunk.append_null();
423        chunk.append_string(SmolStr::new("world"));
424
425        let vec = StringVector::from_string_chunk(&chunk, 3);
426        assert_eq!(vec.len(), 3);
427        assert_eq!(vec.get_value(0), TypedValue::String(SmolStr::new("hello")));
428        assert_eq!(vec.get_value(1), TypedValue::Null);
429        assert_eq!(vec.get_value(2), TypedValue::String(SmolStr::new("world")));
430    }
431
432    #[test]
433    fn owned_push_and_get() {
434        let mut vec = ValueVector::Owned(Vec::new());
435        vec.push(TypedValue::Int64(1));
436        vec.push(TypedValue::Int64(2));
437        assert_eq!(vec.len(), 2);
438        assert_eq!(vec.get_value(0), TypedValue::Int64(1));
439        assert_eq!(vec.get_value(1), TypedValue::Int64(2));
440    }
441
442    #[test]
443    fn value_vector_dispatch() {
444        let mut chunk = ColumnChunkData::new(LogicalType::Int64, 4);
445        chunk.append_value::<i64>(99);
446        let vv = ValueVector::Flat(FlatVector::from_column_chunk(&chunk, 1));
447        assert_eq!(vv.get_value(0), TypedValue::Int64(99));
448        assert!(!vv.is_null(0));
449        assert_eq!(vv.len(), 1);
450    }
451
452    #[test]
453    fn selection_vector_identity() {
454        let sel = SelectionVector::identity(5);
455        assert_eq!(sel.len(), 5);
456        assert_eq!(sel.get(0), 0);
457        assert_eq!(sel.get(4), 4);
458    }
459
460    #[test]
461    fn selection_vector_explicit() {
462        let sel = SelectionVector::from_indices(vec![2, 5, 7]);
463        assert_eq!(sel.len(), 3);
464        assert_eq!(sel.get(0), 2);
465        assert_eq!(sel.get(1), 5);
466        assert_eq!(sel.get(2), 7);
467    }
468
469    #[test]
470    fn selection_vector_empty() {
471        let sel = SelectionVector::from_indices(vec![]);
472        assert!(sel.is_empty());
473        assert_eq!(sel.len(), 0);
474    }
475}