1use kyu_storage::{BoolChunkData, ColumnChunkData, NullMask, StringChunkData};
9use kyu_types::{LogicalType, TypedValue};
10use smol_str::SmolStr;
11
12#[derive(Clone, Debug)]
16pub enum ValueVector {
17 Flat(FlatVector),
19 Bool(BoolVector),
21 String(StringVector),
23 Owned(Vec<TypedValue>),
25}
26
27#[derive(Clone, Debug)]
30pub struct FlatVector {
31 data: Vec<u8>,
32 null_mask: NullMask,
33 logical_type: LogicalType,
34 num_values: usize,
35 stride: usize,
36}
37
38#[derive(Clone, Debug)]
40pub struct BoolVector {
41 values: NullMask,
42 null_mask: NullMask,
43 num_values: usize,
44}
45
46#[derive(Clone, Debug)]
48pub struct StringVector {
49 data: Vec<Option<SmolStr>>,
50 num_values: usize,
51}
52
53#[derive(Clone, Debug)]
56pub struct SelectionVector {
57 indices: Option<Vec<u32>>,
58 count: usize,
59}
60
61impl SelectionVector {
66 pub fn identity(count: usize) -> Self {
68 Self {
69 indices: None,
70 count,
71 }
72 }
73
74 pub fn from_indices(indices: Vec<u32>) -> Self {
76 let count = indices.len();
77 Self {
78 indices: Some(indices),
79 count,
80 }
81 }
82
83 #[inline]
85 pub fn get(&self, logical: usize) -> usize {
86 match &self.indices {
87 None => logical,
88 Some(idx) => idx[logical] as usize,
89 }
90 }
91
92 #[inline]
93 pub fn len(&self) -> usize {
94 self.count
95 }
96
97 #[inline]
98 pub fn is_empty(&self) -> bool {
99 self.count == 0
100 }
101
102 #[inline]
104 pub fn is_identity(&self) -> bool {
105 self.indices.is_none()
106 }
107
108 #[inline]
110 pub fn indices_ptr(&self) -> *const u32 {
111 match &self.indices {
112 Some(v) => v.as_ptr(),
113 None => std::ptr::null(),
114 }
115 }
116}
117
118impl FlatVector {
123 pub fn from_column_chunk(c: &ColumnChunkData, num_rows: usize) -> Self {
125 let stride = c.num_bytes_per_value();
126 let byte_count = num_rows * stride;
127 Self {
128 data: c.buffer()[..byte_count].to_vec(),
129 null_mask: c.null_mask().clone(),
130 logical_type: c.data_type().clone(),
131 num_values: num_rows,
132 stride,
133 }
134 }
135
136 pub fn from_raw(
138 data: Vec<u8>,
139 null_mask: NullMask,
140 logical_type: LogicalType,
141 num_values: usize,
142 stride: usize,
143 ) -> Self {
144 Self {
145 data,
146 null_mask,
147 logical_type,
148 num_values,
149 stride,
150 }
151 }
152
153 pub fn get_value(&self, idx: usize) -> TypedValue {
155 if self.null_mask.is_null(idx as u64) {
156 return TypedValue::Null;
157 }
158 let offset = idx * self.stride;
159 let bytes = &self.data[offset..offset + self.stride];
160 match &self.logical_type {
161 LogicalType::Int8 => {
162 TypedValue::Int8(i8::from_ne_bytes(bytes[..1].try_into().unwrap()))
163 }
164 LogicalType::Int16 => {
165 TypedValue::Int16(i16::from_ne_bytes(bytes[..2].try_into().unwrap()))
166 }
167 LogicalType::Int32 => {
168 TypedValue::Int32(i32::from_ne_bytes(bytes[..4].try_into().unwrap()))
169 }
170 LogicalType::Int64 | LogicalType::Serial => {
171 TypedValue::Int64(i64::from_ne_bytes(bytes[..8].try_into().unwrap()))
172 }
173 LogicalType::Float => {
174 TypedValue::Float(f32::from_ne_bytes(bytes[..4].try_into().unwrap()))
175 }
176 LogicalType::Double => {
177 TypedValue::Double(f64::from_ne_bytes(bytes[..8].try_into().unwrap()))
178 }
179 _ => TypedValue::Null,
180 }
181 }
182
183 #[inline]
184 pub fn is_null(&self, idx: usize) -> bool {
185 self.null_mask.is_null(idx as u64)
186 }
187
188 #[inline]
189 pub fn len(&self) -> usize {
190 self.num_values
191 }
192
193 #[inline]
194 pub fn is_empty(&self) -> bool {
195 self.num_values == 0
196 }
197
198 #[inline]
200 pub fn null_mask(&self) -> &NullMask {
201 &self.null_mask
202 }
203
204 #[inline]
206 pub fn logical_type(&self) -> &LogicalType {
207 &self.logical_type
208 }
209
210 #[inline]
212 pub fn data_ptr(&self) -> *const u8 {
213 self.data.as_ptr()
214 }
215
216 #[inline]
218 pub fn stride(&self) -> usize {
219 self.stride
220 }
221
222 pub fn data_as_i64_slice(&self) -> &[i64] {
225 debug_assert_eq!(self.stride, 8);
226 let ptr = self.data.as_ptr() as *const i64;
227 unsafe { std::slice::from_raw_parts(ptr, self.num_values) }
228 }
229
230 pub fn data_as_i32_slice(&self) -> &[i32] {
232 debug_assert_eq!(self.stride, 4);
233 let ptr = self.data.as_ptr() as *const i32;
234 unsafe { std::slice::from_raw_parts(ptr, self.num_values) }
235 }
236
237 pub fn data_as_f64_slice(&self) -> &[f64] {
239 debug_assert_eq!(self.stride, 8);
240 let ptr = self.data.as_ptr() as *const f64;
241 unsafe { std::slice::from_raw_parts(ptr, self.num_values) }
242 }
243
244 pub fn data_as_f32_slice(&self) -> &[f32] {
246 debug_assert_eq!(self.stride, 4);
247 let ptr = self.data.as_ptr() as *const f32;
248 unsafe { std::slice::from_raw_parts(ptr, self.num_values) }
249 }
250}
251
252impl BoolVector {
257 pub fn from_bool_chunk(c: &BoolChunkData, num_rows: usize) -> Self {
259 Self {
260 values: c.values_mask().clone(),
261 null_mask: c.null_mask().clone(),
262 num_values: num_rows,
263 }
264 }
265
266 pub fn get_value(&self, idx: usize) -> TypedValue {
267 if self.null_mask.is_null(idx as u64) {
268 TypedValue::Null
269 } else {
270 TypedValue::Bool(self.values.is_null(idx as u64))
271 }
272 }
273
274 pub fn is_null(&self, idx: usize) -> bool {
275 self.null_mask.is_null(idx as u64)
276 }
277
278 pub fn len(&self) -> usize {
279 self.num_values
280 }
281
282 pub fn is_empty(&self) -> bool {
283 self.num_values == 0
284 }
285}
286
287impl StringVector {
292 pub fn from_string_chunk(c: &StringChunkData, num_rows: usize) -> Self {
294 Self {
295 data: c.data_slice()[..num_rows].to_vec(),
296 num_values: num_rows,
297 }
298 }
299
300 pub fn get_value(&self, idx: usize) -> TypedValue {
301 match &self.data[idx] {
302 Some(s) => TypedValue::String(s.clone()),
303 None => TypedValue::Null,
304 }
305 }
306
307 pub fn is_null(&self, idx: usize) -> bool {
308 self.data[idx].is_none()
309 }
310
311 pub fn len(&self) -> usize {
312 self.num_values
313 }
314
315 pub fn is_empty(&self) -> bool {
316 self.num_values == 0
317 }
318
319 pub fn data(&self) -> &[Option<SmolStr>] {
321 &self.data
322 }
323}
324
325impl ValueVector {
330 pub fn get_value(&self, idx: usize) -> TypedValue {
332 match self {
333 Self::Flat(v) => v.get_value(idx),
334 Self::Bool(v) => v.get_value(idx),
335 Self::String(v) => v.get_value(idx),
336 Self::Owned(v) => v[idx].clone(),
337 }
338 }
339
340 pub fn is_null(&self, idx: usize) -> bool {
342 match self {
343 Self::Flat(v) => v.is_null(idx),
344 Self::Bool(v) => v.is_null(idx),
345 Self::String(v) => v.is_null(idx),
346 Self::Owned(v) => v[idx].is_null(),
347 }
348 }
349
350 pub fn len(&self) -> usize {
352 match self {
353 Self::Flat(v) => v.len(),
354 Self::Bool(v) => v.len(),
355 Self::String(v) => v.len(),
356 Self::Owned(v) => v.len(),
357 }
358 }
359
360 pub fn is_empty(&self) -> bool {
361 self.len() == 0
362 }
363
364 pub fn push(&mut self, val: TypedValue) {
366 match self {
367 Self::Owned(v) => v.push(val),
368 _ => panic!("ValueVector::push only supported on Owned variant"),
369 }
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376
377 #[test]
378 fn flat_vector_int64_roundtrip() {
379 let mut chunk = ColumnChunkData::new(LogicalType::Int64, 8);
380 chunk.append_value::<i64>(42);
381 chunk.append_value::<i64>(-7);
382 chunk.append_null();
383
384 let vec = FlatVector::from_column_chunk(&chunk, 3);
385 assert_eq!(vec.len(), 3);
386 assert_eq!(vec.get_value(0), TypedValue::Int64(42));
387 assert_eq!(vec.get_value(1), TypedValue::Int64(-7));
388 assert_eq!(vec.get_value(2), TypedValue::Null);
389 assert!(!vec.is_null(0));
390 assert!(vec.is_null(2));
391 }
392
393 #[test]
394 fn flat_vector_double_roundtrip() {
395 let mut chunk = ColumnChunkData::new(LogicalType::Double, 4);
396 chunk.append_value::<f64>(3.14);
397 chunk.append_value::<f64>(-2.5);
398
399 let vec = FlatVector::from_column_chunk(&chunk, 2);
400 assert_eq!(vec.get_value(0), TypedValue::Double(3.14));
401 assert_eq!(vec.get_value(1), TypedValue::Double(-2.5));
402 }
403
404 #[test]
405 fn bool_vector_roundtrip() {
406 let mut chunk = BoolChunkData::new(8);
407 chunk.append_bool(true);
408 chunk.append_bool(false);
409 chunk.append_null();
410
411 let vec = BoolVector::from_bool_chunk(&chunk, 3);
412 assert_eq!(vec.len(), 3);
413 assert_eq!(vec.get_value(0), TypedValue::Bool(true));
414 assert_eq!(vec.get_value(1), TypedValue::Bool(false));
415 assert_eq!(vec.get_value(2), TypedValue::Null);
416 }
417
418 #[test]
419 fn string_vector_roundtrip() {
420 let mut chunk = StringChunkData::new(8);
421 chunk.append_string(SmolStr::new("hello"));
422 chunk.append_null();
423 chunk.append_string(SmolStr::new("world"));
424
425 let vec = StringVector::from_string_chunk(&chunk, 3);
426 assert_eq!(vec.len(), 3);
427 assert_eq!(vec.get_value(0), TypedValue::String(SmolStr::new("hello")));
428 assert_eq!(vec.get_value(1), TypedValue::Null);
429 assert_eq!(vec.get_value(2), TypedValue::String(SmolStr::new("world")));
430 }
431
432 #[test]
433 fn owned_push_and_get() {
434 let mut vec = ValueVector::Owned(Vec::new());
435 vec.push(TypedValue::Int64(1));
436 vec.push(TypedValue::Int64(2));
437 assert_eq!(vec.len(), 2);
438 assert_eq!(vec.get_value(0), TypedValue::Int64(1));
439 assert_eq!(vec.get_value(1), TypedValue::Int64(2));
440 }
441
442 #[test]
443 fn value_vector_dispatch() {
444 let mut chunk = ColumnChunkData::new(LogicalType::Int64, 4);
445 chunk.append_value::<i64>(99);
446 let vv = ValueVector::Flat(FlatVector::from_column_chunk(&chunk, 1));
447 assert_eq!(vv.get_value(0), TypedValue::Int64(99));
448 assert!(!vv.is_null(0));
449 assert_eq!(vv.len(), 1);
450 }
451
452 #[test]
453 fn selection_vector_identity() {
454 let sel = SelectionVector::identity(5);
455 assert_eq!(sel.len(), 5);
456 assert_eq!(sel.get(0), 0);
457 assert_eq!(sel.get(4), 4);
458 }
459
460 #[test]
461 fn selection_vector_explicit() {
462 let sel = SelectionVector::from_indices(vec![2, 5, 7]);
463 assert_eq!(sel.len(), 3);
464 assert_eq!(sel.get(0), 2);
465 assert_eq!(sel.get(1), 5);
466 assert_eq!(sel.get(2), 7);
467 }
468
469 #[test]
470 fn selection_vector_empty() {
471 let sel = SelectionVector::from_indices(vec![]);
472 assert!(sel.is_empty());
473 assert_eq!(sel.len(), 0);
474 }
475}