Skip to main content

grafeo_core/execution/
vector.rs

1//! ValueVector for columnar data storage.
2
3use arcstr::ArcStr;
4
5use grafeo_common::types::{EdgeId, LogicalType, NodeId, Value};
6
7/// Default vector capacity (tuples per vector).
8pub const DEFAULT_VECTOR_CAPACITY: usize = 2048;
9
10/// A columnar vector of values.
11///
12/// ValueVector stores data in columnar format for efficient SIMD processing
13/// and cache utilization during query execution.
14#[derive(Debug, Clone)]
15pub struct ValueVector {
16    /// The logical type of values in this vector.
17    data_type: LogicalType,
18    /// The actual data storage.
19    data: VectorData,
20    /// Number of valid entries.
21    len: usize,
22    /// Validity bitmap (true = valid, false = null).
23    validity: Option<Vec<bool>>,
24}
25
26/// Internal storage for vector data.
27#[derive(Debug, Clone)]
28enum VectorData {
29    /// Boolean values.
30    Bool(Vec<bool>),
31    /// 64-bit integers.
32    Int64(Vec<i64>),
33    /// 64-bit floats.
34    Float64(Vec<f64>),
35    /// Strings (stored as ArcStr for cheap cloning).
36    String(Vec<ArcStr>),
37    /// Node IDs.
38    NodeId(Vec<NodeId>),
39    /// Edge IDs.
40    EdgeId(Vec<EdgeId>),
41    /// Generic values (fallback for complex types).
42    Generic(Vec<Value>),
43}
44
45impl ValueVector {
46    /// Creates a new empty generic vector.
47    #[must_use]
48    pub fn new() -> Self {
49        Self::with_capacity(LogicalType::Any, DEFAULT_VECTOR_CAPACITY)
50    }
51
52    /// Creates a new empty vector with the given type.
53    #[must_use]
54    pub fn with_type(data_type: LogicalType) -> Self {
55        Self::with_capacity(data_type, DEFAULT_VECTOR_CAPACITY)
56    }
57
58    /// Creates a vector from a slice of values.
59    pub fn from_values(values: &[Value]) -> Self {
60        let mut vec = Self::new();
61        for value in values {
62            vec.push_value(value.clone());
63        }
64        vec
65    }
66
67    /// Creates a new vector with the given capacity.
68    #[must_use]
69    pub fn with_capacity(data_type: LogicalType, capacity: usize) -> Self {
70        let data = match &data_type {
71            LogicalType::Bool => VectorData::Bool(Vec::with_capacity(capacity)),
72            LogicalType::Int8 | LogicalType::Int16 | LogicalType::Int32 | LogicalType::Int64 => {
73                VectorData::Int64(Vec::with_capacity(capacity))
74            }
75            LogicalType::Float32 | LogicalType::Float64 => {
76                VectorData::Float64(Vec::with_capacity(capacity))
77            }
78            LogicalType::String => VectorData::String(Vec::with_capacity(capacity)),
79            LogicalType::Node => VectorData::NodeId(Vec::with_capacity(capacity)),
80            LogicalType::Edge => VectorData::EdgeId(Vec::with_capacity(capacity)),
81            _ => VectorData::Generic(Vec::with_capacity(capacity)),
82        };
83
84        Self {
85            data_type,
86            data,
87            len: 0,
88            validity: None,
89        }
90    }
91
92    /// Returns the data type of this vector.
93    #[must_use]
94    pub fn data_type(&self) -> &LogicalType {
95        &self.data_type
96    }
97
98    /// Returns the number of entries in this vector.
99    #[must_use]
100    pub fn len(&self) -> usize {
101        self.len
102    }
103
104    /// Returns true if this vector is empty.
105    #[must_use]
106    pub fn is_empty(&self) -> bool {
107        self.len == 0
108    }
109
110    /// Returns true if the value at index is null.
111    #[must_use]
112    pub fn is_null(&self, index: usize) -> bool {
113        self.validity
114            .as_ref()
115            .map_or(false, |v| !v.get(index).copied().unwrap_or(true))
116    }
117
118    /// Sets the value at index to null.
119    pub fn set_null(&mut self, index: usize) {
120        if self.validity.is_none() {
121            self.validity = Some(vec![true; self.len]);
122        }
123        if let Some(validity) = &mut self.validity
124            && index < validity.len()
125        {
126            validity[index] = false;
127        }
128    }
129
130    /// Pushes a boolean value.
131    pub fn push_bool(&mut self, value: bool) {
132        if let VectorData::Bool(vec) = &mut self.data {
133            vec.push(value);
134            self.len += 1;
135        }
136    }
137
138    /// Pushes an integer value.
139    pub fn push_int64(&mut self, value: i64) {
140        if let VectorData::Int64(vec) = &mut self.data {
141            vec.push(value);
142            self.len += 1;
143        }
144    }
145
146    /// Pushes a float value.
147    pub fn push_float64(&mut self, value: f64) {
148        if let VectorData::Float64(vec) = &mut self.data {
149            vec.push(value);
150            self.len += 1;
151        }
152    }
153
154    /// Pushes a string value.
155    pub fn push_string(&mut self, value: impl Into<ArcStr>) {
156        if let VectorData::String(vec) = &mut self.data {
157            vec.push(value.into());
158            self.len += 1;
159        }
160    }
161
162    /// Pushes a node ID.
163    pub fn push_node_id(&mut self, value: NodeId) {
164        match &mut self.data {
165            VectorData::NodeId(vec) => {
166                vec.push(value);
167                self.len += 1;
168            }
169            VectorData::Generic(vec) => {
170                vec.push(Value::Int64(value.as_u64() as i64));
171                self.len += 1;
172            }
173            _ => {}
174        }
175    }
176
177    /// Pushes an edge ID.
178    pub fn push_edge_id(&mut self, value: EdgeId) {
179        match &mut self.data {
180            VectorData::EdgeId(vec) => {
181                vec.push(value);
182                self.len += 1;
183            }
184            VectorData::Generic(vec) => {
185                vec.push(Value::Int64(value.as_u64() as i64));
186                self.len += 1;
187            }
188            _ => {}
189        }
190    }
191
192    /// Pushes a generic value.
193    pub fn push_value(&mut self, value: Value) {
194        // Handle null values specially - push a default and mark as null
195        if matches!(value, Value::Null) {
196            match &mut self.data {
197                VectorData::Bool(vec) => vec.push(false),
198                VectorData::Int64(vec) => vec.push(0),
199                VectorData::Float64(vec) => vec.push(0.0),
200                VectorData::String(vec) => vec.push("".into()),
201                VectorData::NodeId(vec) => vec.push(NodeId::new(0)),
202                VectorData::EdgeId(vec) => vec.push(EdgeId::new(0)),
203                VectorData::Generic(vec) => vec.push(Value::Null),
204            }
205            self.len += 1;
206            self.set_null(self.len - 1);
207            return;
208        }
209
210        match (&mut self.data, &value) {
211            (VectorData::Bool(vec), Value::Bool(b)) => vec.push(*b),
212            (VectorData::Int64(vec), Value::Int64(i)) => vec.push(*i),
213            (VectorData::Float64(vec), Value::Float64(f)) => vec.push(*f),
214            (VectorData::String(vec), Value::String(s)) => vec.push(s.clone()),
215            // Handle Int64 -> NodeId conversion (from get_value roundtrip)
216            (VectorData::NodeId(vec), Value::Int64(i)) => vec.push(NodeId::new(*i as u64)),
217            // Handle Int64 -> EdgeId conversion (from get_value roundtrip)
218            (VectorData::EdgeId(vec), Value::Int64(i)) => vec.push(EdgeId::new(*i as u64)),
219            (VectorData::Generic(vec), _) => vec.push(value),
220            _ => {
221                // Type mismatch - push a default value to maintain vector alignment
222                match &mut self.data {
223                    VectorData::Bool(vec) => vec.push(false),
224                    VectorData::Int64(vec) => vec.push(0),
225                    VectorData::Float64(vec) => vec.push(0.0),
226                    VectorData::String(vec) => vec.push("".into()),
227                    VectorData::NodeId(vec) => vec.push(NodeId::new(0)),
228                    VectorData::EdgeId(vec) => vec.push(EdgeId::new(0)),
229                    VectorData::Generic(vec) => vec.push(value),
230                }
231            }
232        }
233        self.len += 1;
234    }
235
236    /// Gets a boolean value at index.
237    #[must_use]
238    pub fn get_bool(&self, index: usize) -> Option<bool> {
239        if self.is_null(index) {
240            return None;
241        }
242        if let VectorData::Bool(vec) = &self.data {
243            vec.get(index).copied()
244        } else {
245            None
246        }
247    }
248
249    /// Gets an integer value at index.
250    #[must_use]
251    pub fn get_int64(&self, index: usize) -> Option<i64> {
252        if self.is_null(index) {
253            return None;
254        }
255        if let VectorData::Int64(vec) = &self.data {
256            vec.get(index).copied()
257        } else {
258            None
259        }
260    }
261
262    /// Gets a float value at index.
263    #[must_use]
264    pub fn get_float64(&self, index: usize) -> Option<f64> {
265        if self.is_null(index) {
266            return None;
267        }
268        if let VectorData::Float64(vec) = &self.data {
269            vec.get(index).copied()
270        } else {
271            None
272        }
273    }
274
275    /// Gets a string value at index.
276    #[must_use]
277    pub fn get_string(&self, index: usize) -> Option<&str> {
278        if self.is_null(index) {
279            return None;
280        }
281        if let VectorData::String(vec) = &self.data {
282            vec.get(index).map(|s| s.as_ref())
283        } else {
284            None
285        }
286    }
287
288    /// Gets a node ID at index.
289    #[must_use]
290    pub fn get_node_id(&self, index: usize) -> Option<NodeId> {
291        if self.is_null(index) {
292            return None;
293        }
294        match &self.data {
295            VectorData::NodeId(vec) => vec.get(index).copied(),
296            // Handle Generic vectors that contain node IDs stored as Int64
297            VectorData::Generic(vec) => match vec.get(index) {
298                Some(Value::Int64(i)) => Some(NodeId::new(*i as u64)),
299                _ => None,
300            },
301            _ => None,
302        }
303    }
304
305    /// Gets an edge ID at index.
306    #[must_use]
307    pub fn get_edge_id(&self, index: usize) -> Option<EdgeId> {
308        if self.is_null(index) {
309            return None;
310        }
311        match &self.data {
312            VectorData::EdgeId(vec) => vec.get(index).copied(),
313            // Handle Generic vectors that contain edge IDs stored as Int64
314            VectorData::Generic(vec) => match vec.get(index) {
315                Some(Value::Int64(i)) => Some(EdgeId::new(*i as u64)),
316                _ => None,
317            },
318            _ => None,
319        }
320    }
321
322    /// Gets a value at index as a generic Value.
323    #[must_use]
324    pub fn get_value(&self, index: usize) -> Option<Value> {
325        if self.is_null(index) {
326            return Some(Value::Null);
327        }
328
329        match &self.data {
330            VectorData::Bool(vec) => vec.get(index).map(|&v| Value::Bool(v)),
331            VectorData::Int64(vec) => vec.get(index).map(|&v| Value::Int64(v)),
332            VectorData::Float64(vec) => vec.get(index).map(|&v| Value::Float64(v)),
333            VectorData::String(vec) => vec.get(index).map(|v| Value::String(v.clone())),
334            VectorData::NodeId(vec) => vec.get(index).map(|&v| Value::Int64(v.as_u64() as i64)),
335            VectorData::EdgeId(vec) => vec.get(index).map(|&v| Value::Int64(v.as_u64() as i64)),
336            VectorData::Generic(vec) => vec.get(index).cloned(),
337        }
338    }
339
340    /// Alias for get_value.
341    #[must_use]
342    pub fn get(&self, index: usize) -> Option<Value> {
343        self.get_value(index)
344    }
345
346    /// Alias for push_value.
347    pub fn push(&mut self, value: Value) {
348        self.push_value(value);
349    }
350
351    /// Returns a slice of the underlying boolean data.
352    #[must_use]
353    pub fn as_bool_slice(&self) -> Option<&[bool]> {
354        if let VectorData::Bool(vec) = &self.data {
355            Some(vec)
356        } else {
357            None
358        }
359    }
360
361    /// Returns a slice of the underlying integer data.
362    #[must_use]
363    pub fn as_int64_slice(&self) -> Option<&[i64]> {
364        if let VectorData::Int64(vec) = &self.data {
365            Some(vec)
366        } else {
367            None
368        }
369    }
370
371    /// Returns a slice of the underlying float data.
372    #[must_use]
373    pub fn as_float64_slice(&self) -> Option<&[f64]> {
374        if let VectorData::Float64(vec) = &self.data {
375            Some(vec)
376        } else {
377            None
378        }
379    }
380
381    /// Returns a slice of the underlying node ID data.
382    #[must_use]
383    pub fn as_node_id_slice(&self) -> Option<&[NodeId]> {
384        if let VectorData::NodeId(vec) = &self.data {
385            Some(vec)
386        } else {
387            None
388        }
389    }
390
391    /// Returns a slice of the underlying edge ID data.
392    #[must_use]
393    pub fn as_edge_id_slice(&self) -> Option<&[EdgeId]> {
394        if let VectorData::EdgeId(vec) = &self.data {
395            Some(vec)
396        } else {
397            None
398        }
399    }
400
401    /// Returns the logical type of this vector.
402    #[must_use]
403    pub fn logical_type(&self) -> LogicalType {
404        self.data_type.clone()
405    }
406
407    /// Copies a row from this vector to the destination vector.
408    ///
409    /// The destination vector should have a compatible type. The value at `row`
410    /// is read from this vector and pushed to the destination vector.
411    pub fn copy_row_to(&self, row: usize, dest: &mut ValueVector) {
412        if self.is_null(row) {
413            dest.push_value(Value::Null);
414            return;
415        }
416
417        match &self.data {
418            VectorData::Bool(vec) => {
419                if let Some(&v) = vec.get(row) {
420                    dest.push_bool(v);
421                }
422            }
423            VectorData::Int64(vec) => {
424                if let Some(&v) = vec.get(row) {
425                    dest.push_int64(v);
426                }
427            }
428            VectorData::Float64(vec) => {
429                if let Some(&v) = vec.get(row) {
430                    dest.push_float64(v);
431                }
432            }
433            VectorData::String(vec) => {
434                if let Some(v) = vec.get(row) {
435                    dest.push_string(v.clone());
436                }
437            }
438            VectorData::NodeId(vec) => {
439                if let Some(&v) = vec.get(row) {
440                    dest.push_node_id(v);
441                }
442            }
443            VectorData::EdgeId(vec) => {
444                if let Some(&v) = vec.get(row) {
445                    dest.push_edge_id(v);
446                }
447            }
448            VectorData::Generic(vec) => {
449                if let Some(v) = vec.get(row) {
450                    dest.push_value(v.clone());
451                }
452            }
453        }
454    }
455
456    /// Clears all data from this vector.
457    pub fn clear(&mut self) {
458        match &mut self.data {
459            VectorData::Bool(vec) => vec.clear(),
460            VectorData::Int64(vec) => vec.clear(),
461            VectorData::Float64(vec) => vec.clear(),
462            VectorData::String(vec) => vec.clear(),
463            VectorData::NodeId(vec) => vec.clear(),
464            VectorData::EdgeId(vec) => vec.clear(),
465            VectorData::Generic(vec) => vec.clear(),
466        }
467        self.len = 0;
468        self.validity = None;
469    }
470}
471
472impl Default for ValueVector {
473    fn default() -> Self {
474        Self::new()
475    }
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481
482    #[test]
483    fn test_int64_vector() {
484        let mut vec = ValueVector::with_type(LogicalType::Int64);
485
486        vec.push_int64(1);
487        vec.push_int64(2);
488        vec.push_int64(3);
489
490        assert_eq!(vec.len(), 3);
491        assert_eq!(vec.get_int64(0), Some(1));
492        assert_eq!(vec.get_int64(1), Some(2));
493        assert_eq!(vec.get_int64(2), Some(3));
494    }
495
496    #[test]
497    fn test_string_vector() {
498        let mut vec = ValueVector::with_type(LogicalType::String);
499
500        vec.push_string("hello");
501        vec.push_string("world");
502
503        assert_eq!(vec.len(), 2);
504        assert_eq!(vec.get_string(0), Some("hello"));
505        assert_eq!(vec.get_string(1), Some("world"));
506    }
507
508    #[test]
509    fn test_null_values() {
510        let mut vec = ValueVector::with_type(LogicalType::Int64);
511
512        vec.push_int64(1);
513        vec.push_int64(2);
514        vec.push_int64(3);
515
516        assert!(!vec.is_null(1));
517        vec.set_null(1);
518        assert!(vec.is_null(1));
519
520        assert_eq!(vec.get_int64(0), Some(1));
521        assert_eq!(vec.get_int64(1), None); // Null
522        assert_eq!(vec.get_int64(2), Some(3));
523    }
524
525    #[test]
526    fn test_get_value() {
527        let mut vec = ValueVector::with_type(LogicalType::Int64);
528        vec.push_int64(42);
529
530        let value = vec.get_value(0);
531        assert_eq!(value, Some(Value::Int64(42)));
532    }
533
534    #[test]
535    fn test_slice_access() {
536        let mut vec = ValueVector::with_type(LogicalType::Int64);
537        vec.push_int64(1);
538        vec.push_int64(2);
539        vec.push_int64(3);
540
541        let slice = vec.as_int64_slice().unwrap();
542        assert_eq!(slice, &[1, 2, 3]);
543    }
544
545    #[test]
546    fn test_clear() {
547        let mut vec = ValueVector::with_type(LogicalType::Int64);
548        vec.push_int64(1);
549        vec.push_int64(2);
550
551        vec.clear();
552
553        assert!(vec.is_empty());
554        assert_eq!(vec.len(), 0);
555    }
556}