Skip to main content

grafeo_core/execution/
vector.rs

1//! ValueVector for columnar data storage.
2
3use arcstr::ArcStr;
4
5use grafeo_common::types::{EdgeId, LogicalType, NodeId, Value};
6
7/// Default vector capacity (tuples per vector).
8pub const DEFAULT_VECTOR_CAPACITY: usize = 2048;
9
10/// A columnar vector of values.
11///
12/// ValueVector stores data in columnar format for efficient SIMD processing
13/// and cache utilization during query execution.
14#[derive(Debug, Clone)]
15pub struct ValueVector {
16    /// The logical type of values in this vector.
17    data_type: LogicalType,
18    /// The actual data storage.
19    data: VectorData,
20    /// Number of valid entries.
21    len: usize,
22    /// Validity bitmap (true = valid, false = null).
23    validity: Option<Vec<bool>>,
24}
25
26/// Internal storage for vector data.
27#[derive(Debug, Clone)]
28enum VectorData {
29    /// Boolean values.
30    Bool(Vec<bool>),
31    /// 64-bit integers.
32    Int64(Vec<i64>),
33    /// 64-bit floats.
34    Float64(Vec<f64>),
35    /// Strings (stored as ArcStr for cheap cloning).
36    String(Vec<ArcStr>),
37    /// Node IDs.
38    NodeId(Vec<NodeId>),
39    /// Edge IDs.
40    EdgeId(Vec<EdgeId>),
41    /// Generic values (fallback for complex types).
42    Generic(Vec<Value>),
43}
44
45impl ValueVector {
46    /// Creates a new empty generic vector.
47    #[must_use]
48    pub fn new() -> Self {
49        Self::with_capacity(LogicalType::Any, DEFAULT_VECTOR_CAPACITY)
50    }
51
52    /// Creates a new empty vector with the given type.
53    #[must_use]
54    pub fn with_type(data_type: LogicalType) -> Self {
55        Self::with_capacity(data_type, DEFAULT_VECTOR_CAPACITY)
56    }
57
58    /// Creates a vector from a slice of values.
59    pub fn from_values(values: &[Value]) -> Self {
60        let mut vec = Self::new();
61        for value in values {
62            vec.push_value(value.clone());
63        }
64        vec
65    }
66
67    /// Creates a new vector with the given capacity.
68    #[must_use]
69    pub fn with_capacity(data_type: LogicalType, capacity: usize) -> Self {
70        let data = match &data_type {
71            LogicalType::Bool => VectorData::Bool(Vec::with_capacity(capacity)),
72            LogicalType::Int8 | LogicalType::Int16 | LogicalType::Int32 | LogicalType::Int64 => {
73                VectorData::Int64(Vec::with_capacity(capacity))
74            }
75            LogicalType::Float32 | LogicalType::Float64 => {
76                VectorData::Float64(Vec::with_capacity(capacity))
77            }
78            LogicalType::String => VectorData::String(Vec::with_capacity(capacity)),
79            LogicalType::Node => VectorData::NodeId(Vec::with_capacity(capacity)),
80            LogicalType::Edge => VectorData::EdgeId(Vec::with_capacity(capacity)),
81            _ => VectorData::Generic(Vec::with_capacity(capacity)),
82        };
83
84        Self {
85            data_type,
86            data,
87            len: 0,
88            validity: None,
89        }
90    }
91
92    /// Returns the data type of this vector.
93    #[must_use]
94    pub fn data_type(&self) -> &LogicalType {
95        &self.data_type
96    }
97
98    /// Returns the number of entries in this vector.
99    #[must_use]
100    pub fn len(&self) -> usize {
101        self.len
102    }
103
104    /// Returns true if this vector is empty.
105    #[must_use]
106    pub fn is_empty(&self) -> bool {
107        self.len == 0
108    }
109
110    /// Returns true if the value at index is null.
111    #[must_use]
112    pub fn is_null(&self, index: usize) -> bool {
113        self.validity
114            .as_ref()
115            .map_or(false, |v| !v.get(index).copied().unwrap_or(true))
116    }
117
118    /// Sets the value at index to null.
119    pub fn set_null(&mut self, index: usize) {
120        if self.validity.is_none() {
121            self.validity = Some(vec![true; index + 1]);
122        }
123        if let Some(validity) = &mut self.validity {
124            if validity.len() <= index {
125                validity.resize(index + 1, true);
126            }
127            validity[index] = false;
128        }
129    }
130
131    /// Pushes a boolean value.
132    pub fn push_bool(&mut self, value: bool) {
133        match &mut self.data {
134            VectorData::Bool(vec) => {
135                vec.push(value);
136                self.len += 1;
137            }
138            VectorData::Generic(vec) => {
139                vec.push(Value::Bool(value));
140                self.len += 1;
141            }
142            _ => {}
143        }
144    }
145
146    /// Pushes an integer value.
147    pub fn push_int64(&mut self, value: i64) {
148        match &mut self.data {
149            VectorData::Int64(vec) => {
150                vec.push(value);
151                self.len += 1;
152            }
153            VectorData::Generic(vec) => {
154                vec.push(Value::Int64(value));
155                self.len += 1;
156            }
157            _ => {}
158        }
159    }
160
161    /// Pushes a float value.
162    pub fn push_float64(&mut self, value: f64) {
163        match &mut self.data {
164            VectorData::Float64(vec) => {
165                vec.push(value);
166                self.len += 1;
167            }
168            VectorData::Generic(vec) => {
169                vec.push(Value::Float64(value));
170                self.len += 1;
171            }
172            _ => {}
173        }
174    }
175
176    /// Pushes a string value.
177    pub fn push_string(&mut self, value: impl Into<ArcStr>) {
178        match &mut self.data {
179            VectorData::String(vec) => {
180                vec.push(value.into());
181                self.len += 1;
182            }
183            VectorData::Generic(vec) => {
184                vec.push(Value::String(value.into()));
185                self.len += 1;
186            }
187            _ => {}
188        }
189    }
190
191    /// Pushes a node ID.
192    pub fn push_node_id(&mut self, value: NodeId) {
193        match &mut self.data {
194            VectorData::NodeId(vec) => {
195                vec.push(value);
196                self.len += 1;
197            }
198            VectorData::Generic(vec) => {
199                // reason: entity IDs stored as i64, standard encoding
200                #[allow(clippy::cast_possible_wrap)]
201                vec.push(Value::Int64(value.as_u64() as i64));
202                self.len += 1;
203            }
204            _ => {}
205        }
206    }
207
208    /// Pushes an edge ID.
209    pub fn push_edge_id(&mut self, value: EdgeId) {
210        match &mut self.data {
211            VectorData::EdgeId(vec) => {
212                vec.push(value);
213                self.len += 1;
214            }
215            VectorData::Generic(vec) => {
216                // reason: entity IDs stored as i64, standard encoding
217                #[allow(clippy::cast_possible_wrap)]
218                vec.push(Value::Int64(value.as_u64() as i64));
219                self.len += 1;
220            }
221            _ => {}
222        }
223    }
224
225    /// Pushes a generic value.
226    pub fn push_value(&mut self, value: Value) {
227        // Handle null values specially - push a default and mark as null
228        if matches!(value, Value::Null) {
229            match &mut self.data {
230                VectorData::Bool(vec) => vec.push(false),
231                VectorData::Int64(vec) => vec.push(0),
232                VectorData::Float64(vec) => vec.push(0.0),
233                VectorData::String(vec) => vec.push("".into()),
234                VectorData::NodeId(vec) => vec.push(NodeId::new(0)),
235                VectorData::EdgeId(vec) => vec.push(EdgeId::new(0)),
236                VectorData::Generic(vec) => vec.push(Value::Null),
237            }
238            self.len += 1;
239            self.set_null(self.len - 1);
240            return;
241        }
242
243        match (&mut self.data, &value) {
244            (VectorData::Bool(vec), Value::Bool(b)) => vec.push(*b),
245            (VectorData::Int64(vec), Value::Int64(i)) => vec.push(*i),
246            (VectorData::Float64(vec), Value::Float64(f)) => vec.push(*f),
247            (VectorData::String(vec), Value::String(s)) => vec.push(s.clone()),
248            // Handle Int64 -> NodeId conversion (from get_value roundtrip)
249            // reason: ID encoding: i64 <-> u64 round-trip
250            #[allow(clippy::cast_sign_loss)]
251            (VectorData::NodeId(vec), Value::Int64(i)) => vec.push(NodeId::new(*i as u64)),
252            // Handle Int64 -> EdgeId conversion (from get_value roundtrip)
253            // reason: ID encoding: i64 <-> u64 round-trip
254            #[allow(clippy::cast_sign_loss)]
255            (VectorData::EdgeId(vec), Value::Int64(i)) => vec.push(EdgeId::new(*i as u64)),
256            (VectorData::Generic(vec), _) => vec.push(value),
257            _ => {
258                // Type mismatch - push a default value to maintain vector alignment
259                match &mut self.data {
260                    VectorData::Bool(vec) => vec.push(false),
261                    VectorData::Int64(vec) => vec.push(0),
262                    VectorData::Float64(vec) => vec.push(0.0),
263                    VectorData::String(vec) => vec.push("".into()),
264                    VectorData::NodeId(vec) => vec.push(NodeId::new(0)),
265                    VectorData::EdgeId(vec) => vec.push(EdgeId::new(0)),
266                    VectorData::Generic(vec) => vec.push(value),
267                }
268            }
269        }
270        self.len += 1;
271    }
272
273    /// Gets a boolean value at index.
274    #[must_use]
275    pub fn get_bool(&self, index: usize) -> Option<bool> {
276        if self.is_null(index) {
277            return None;
278        }
279        if let VectorData::Bool(vec) = &self.data {
280            vec.get(index).copied()
281        } else {
282            None
283        }
284    }
285
286    /// Gets an integer value at index.
287    #[must_use]
288    pub fn get_int64(&self, index: usize) -> Option<i64> {
289        if self.is_null(index) {
290            return None;
291        }
292        if let VectorData::Int64(vec) = &self.data {
293            vec.get(index).copied()
294        } else {
295            None
296        }
297    }
298
299    /// Gets a float value at index.
300    #[must_use]
301    pub fn get_float64(&self, index: usize) -> Option<f64> {
302        if self.is_null(index) {
303            return None;
304        }
305        if let VectorData::Float64(vec) = &self.data {
306            vec.get(index).copied()
307        } else {
308            None
309        }
310    }
311
312    /// Gets a string value at index.
313    #[must_use]
314    pub fn get_string(&self, index: usize) -> Option<&str> {
315        if self.is_null(index) {
316            return None;
317        }
318        if let VectorData::String(vec) = &self.data {
319            vec.get(index).map(|s| s.as_ref())
320        } else {
321            None
322        }
323    }
324
325    /// Gets a node ID at index.
326    #[must_use]
327    pub fn get_node_id(&self, index: usize) -> Option<NodeId> {
328        if self.is_null(index) {
329            return None;
330        }
331        match &self.data {
332            VectorData::NodeId(vec) => vec.get(index).copied(),
333            // Handle Generic vectors that contain node IDs stored as Int64
334            VectorData::Generic(vec) => match vec.get(index) {
335                // reason: ID encoding: i64 <-> u64 round-trip
336                #[allow(clippy::cast_sign_loss)]
337                Some(Value::Int64(i)) => Some(NodeId::new(*i as u64)),
338                _ => None,
339            },
340            _ => None,
341        }
342    }
343
344    /// Gets an edge ID at index.
345    #[must_use]
346    pub fn get_edge_id(&self, index: usize) -> Option<EdgeId> {
347        if self.is_null(index) {
348            return None;
349        }
350        match &self.data {
351            VectorData::EdgeId(vec) => vec.get(index).copied(),
352            // Handle Generic vectors that contain edge IDs stored as Int64
353            VectorData::Generic(vec) => match vec.get(index) {
354                // reason: ID encoding: i64 <-> u64 round-trip
355                #[allow(clippy::cast_sign_loss)]
356                Some(Value::Int64(i)) => Some(EdgeId::new(*i as u64)),
357                _ => None,
358            },
359            _ => None,
360        }
361    }
362
363    /// Gets a value at index as a generic Value.
364    #[must_use]
365    pub fn get_value(&self, index: usize) -> Option<Value> {
366        if self.is_null(index) {
367            return Some(Value::Null);
368        }
369
370        match &self.data {
371            VectorData::Bool(vec) => vec.get(index).map(|&v| Value::Bool(v)),
372            VectorData::Int64(vec) => vec.get(index).map(|&v| Value::Int64(v)),
373            VectorData::Float64(vec) => vec.get(index).map(|&v| Value::Float64(v)),
374            VectorData::String(vec) => vec.get(index).map(|v| Value::String(v.clone())),
375            // reason: entity IDs stored as i64, standard encoding
376            VectorData::NodeId(vec) => vec.get(index).map(|&v| {
377                // reason: entity IDs are sequential counters, well within i64::MAX
378                #[allow(clippy::cast_possible_wrap)]
379                let val = Value::Int64(v.as_u64() as i64);
380                val
381            }),
382            // reason: entity IDs stored as i64, standard encoding
383            // reason: entity IDs are sequential counters, well within i64::MAX
384            VectorData::EdgeId(vec) => vec.get(index).map(|&v| {
385                // reason: entity IDs are sequential counters, well within i64::MAX
386                #[allow(clippy::cast_possible_wrap)]
387                let val = Value::Int64(v.as_u64() as i64);
388                val
389            }),
390            VectorData::Generic(vec) => vec.get(index).cloned(),
391        }
392    }
393
394    /// Alias for get_value.
395    #[must_use]
396    pub fn get(&self, index: usize) -> Option<Value> {
397        self.get_value(index)
398    }
399
400    /// Alias for push_value.
401    pub fn push(&mut self, value: Value) {
402        self.push_value(value);
403    }
404
405    /// Returns a slice of the underlying boolean data.
406    #[must_use]
407    pub fn as_bool_slice(&self) -> Option<&[bool]> {
408        if let VectorData::Bool(vec) = &self.data {
409            Some(vec)
410        } else {
411            None
412        }
413    }
414
415    /// Returns a slice of the underlying integer data.
416    #[must_use]
417    pub fn as_int64_slice(&self) -> Option<&[i64]> {
418        if let VectorData::Int64(vec) = &self.data {
419            Some(vec)
420        } else {
421            None
422        }
423    }
424
425    /// Returns a slice of the underlying float data.
426    #[must_use]
427    pub fn as_float64_slice(&self) -> Option<&[f64]> {
428        if let VectorData::Float64(vec) = &self.data {
429            Some(vec)
430        } else {
431            None
432        }
433    }
434
435    /// Returns a slice of the underlying node ID data.
436    #[must_use]
437    pub fn as_node_id_slice(&self) -> Option<&[NodeId]> {
438        if let VectorData::NodeId(vec) = &self.data {
439            Some(vec)
440        } else {
441            None
442        }
443    }
444
445    /// Returns a slice of the underlying edge ID data.
446    #[must_use]
447    pub fn as_edge_id_slice(&self) -> Option<&[EdgeId]> {
448        if let VectorData::EdgeId(vec) = &self.data {
449            Some(vec)
450        } else {
451            None
452        }
453    }
454
455    /// Returns the logical type of this vector.
456    #[must_use]
457    pub fn logical_type(&self) -> LogicalType {
458        self.data_type.clone()
459    }
460
461    /// Copies a row from this vector to the destination vector.
462    ///
463    /// The destination vector should have a compatible type. The value at `row`
464    /// is read from this vector and pushed to the destination vector.
465    pub fn copy_row_to(&self, row: usize, dest: &mut ValueVector) {
466        if self.is_null(row) {
467            dest.push_value(Value::Null);
468            return;
469        }
470
471        match &self.data {
472            VectorData::Bool(vec) => {
473                if let Some(&v) = vec.get(row) {
474                    dest.push_bool(v);
475                }
476            }
477            VectorData::Int64(vec) => {
478                if let Some(&v) = vec.get(row) {
479                    dest.push_int64(v);
480                }
481            }
482            VectorData::Float64(vec) => {
483                if let Some(&v) = vec.get(row) {
484                    dest.push_float64(v);
485                }
486            }
487            VectorData::String(vec) => {
488                if let Some(v) = vec.get(row) {
489                    dest.push_string(v.clone());
490                }
491            }
492            VectorData::NodeId(vec) => {
493                if let Some(&v) = vec.get(row) {
494                    dest.push_node_id(v);
495                }
496            }
497            VectorData::EdgeId(vec) => {
498                if let Some(&v) = vec.get(row) {
499                    dest.push_edge_id(v);
500                }
501            }
502            VectorData::Generic(vec) => {
503                if let Some(v) = vec.get(row) {
504                    dest.push_value(v.clone());
505                }
506            }
507        }
508    }
509
510    /// Clears all data from this vector.
511    pub fn clear(&mut self) {
512        match &mut self.data {
513            VectorData::Bool(vec) => vec.clear(),
514            VectorData::Int64(vec) => vec.clear(),
515            VectorData::Float64(vec) => vec.clear(),
516            VectorData::String(vec) => vec.clear(),
517            VectorData::NodeId(vec) => vec.clear(),
518            VectorData::EdgeId(vec) => vec.clear(),
519            VectorData::Generic(vec) => vec.clear(),
520        }
521        self.len = 0;
522        self.validity = None;
523    }
524}
525
526impl Default for ValueVector {
527    fn default() -> Self {
528        Self::new()
529    }
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535
536    #[test]
537    fn test_int64_vector() {
538        let mut vec = ValueVector::with_type(LogicalType::Int64);
539
540        vec.push_int64(1);
541        vec.push_int64(2);
542        vec.push_int64(3);
543
544        assert_eq!(vec.len(), 3);
545        assert_eq!(vec.get_int64(0), Some(1));
546        assert_eq!(vec.get_int64(1), Some(2));
547        assert_eq!(vec.get_int64(2), Some(3));
548    }
549
550    #[test]
551    fn test_string_vector() {
552        let mut vec = ValueVector::with_type(LogicalType::String);
553
554        vec.push_string("hello");
555        vec.push_string("world");
556
557        assert_eq!(vec.len(), 2);
558        assert_eq!(vec.get_string(0), Some("hello"));
559        assert_eq!(vec.get_string(1), Some("world"));
560    }
561
562    #[test]
563    fn test_null_values() {
564        let mut vec = ValueVector::with_type(LogicalType::Int64);
565
566        vec.push_int64(1);
567        vec.push_int64(2);
568        vec.push_int64(3);
569
570        assert!(!vec.is_null(1));
571        vec.set_null(1);
572        assert!(vec.is_null(1));
573
574        assert_eq!(vec.get_int64(0), Some(1));
575        assert_eq!(vec.get_int64(1), None); // Null
576        assert_eq!(vec.get_int64(2), Some(3));
577    }
578
579    #[test]
580    fn test_get_value() {
581        let mut vec = ValueVector::with_type(LogicalType::Int64);
582        vec.push_int64(42);
583
584        let value = vec.get_value(0);
585        assert_eq!(value, Some(Value::Int64(42)));
586    }
587
588    #[test]
589    fn test_slice_access() {
590        let mut vec = ValueVector::with_type(LogicalType::Int64);
591        vec.push_int64(1);
592        vec.push_int64(2);
593        vec.push_int64(3);
594
595        let slice = vec.as_int64_slice().unwrap();
596        assert_eq!(slice, &[1, 2, 3]);
597    }
598
599    /// Typed push methods fall back to VectorData::Generic when the vector
600    /// was created with LogicalType::Any. This exercises the safety-net arms
601    /// added to prevent silent data loss on type mismatch.
602    #[test]
603    fn test_generic_fallback_push_int64() {
604        let mut vec = ValueVector::with_type(LogicalType::Any);
605        vec.push_int64(42);
606        vec.push_int64(-7);
607        assert_eq!(vec.len(), 2);
608        assert_eq!(vec.get_value(0), Some(Value::Int64(42)));
609        assert_eq!(vec.get_value(1), Some(Value::Int64(-7)));
610    }
611
612    #[test]
613    fn test_generic_fallback_push_bool() {
614        let mut vec = ValueVector::with_type(LogicalType::Any);
615        vec.push_bool(true);
616        vec.push_bool(false);
617        assert_eq!(vec.len(), 2);
618        assert_eq!(vec.get_value(0), Some(Value::Bool(true)));
619        assert_eq!(vec.get_value(1), Some(Value::Bool(false)));
620    }
621
622    #[test]
623    fn test_generic_fallback_push_float64() {
624        let mut vec = ValueVector::with_type(LogicalType::Any);
625        vec.push_float64(1.23);
626        vec.push_float64(-0.5);
627        assert_eq!(vec.len(), 2);
628        assert_eq!(vec.get_value(0), Some(Value::Float64(1.23)));
629        assert_eq!(vec.get_value(1), Some(Value::Float64(-0.5)));
630    }
631
632    #[test]
633    fn test_generic_fallback_push_string() {
634        let mut vec = ValueVector::with_type(LogicalType::Any);
635        vec.push_string("hello");
636        vec.push_string("world");
637        assert_eq!(vec.len(), 2);
638        assert_eq!(vec.get_value(0), Some(Value::String("hello".into())));
639        assert_eq!(vec.get_value(1), Some(Value::String("world".into())));
640    }
641
642    /// Mixed typed pushes into a Generic vector preserve each value's type.
643    #[test]
644    fn test_generic_fallback_mixed_types() {
645        let mut vec = ValueVector::with_type(LogicalType::Any);
646        vec.push_int64(1);
647        vec.push_string("two");
648        vec.push_bool(true);
649        vec.push_float64(99.5);
650        assert_eq!(vec.len(), 4);
651        assert_eq!(vec.get_value(0), Some(Value::Int64(1)));
652        assert_eq!(vec.get_value(1), Some(Value::String("two".into())));
653        assert_eq!(vec.get_value(2), Some(Value::Bool(true)));
654        assert_eq!(vec.get_value(3), Some(Value::Float64(99.5)));
655    }
656
657    /// Pushing a typed value into a mismatched non-Generic vector is a no-op.
658    #[test]
659    fn test_type_mismatch_noop() {
660        let mut vec = ValueVector::with_type(LogicalType::Int64);
661        vec.push_string("wrong type");
662        assert_eq!(vec.len(), 0);
663
664        let mut vec = ValueVector::with_type(LogicalType::String);
665        vec.push_int64(42);
666        assert_eq!(vec.len(), 0);
667    }
668
669    #[test]
670    fn test_clear() {
671        let mut vec = ValueVector::with_type(LogicalType::Int64);
672        vec.push_int64(1);
673        vec.push_int64(2);
674
675        vec.clear();
676
677        assert!(vec.is_empty());
678        assert_eq!(vec.len(), 0);
679    }
680}