Skip to main content

grafeo_core/execution/
vector.rs

1//! ValueVector for columnar data storage.
2
3use arcstr::ArcStr;
4
5use grafeo_common::types::{EdgeId, LogicalType, NodeId, Value};
6
7/// Default vector capacity (tuples per vector).
8pub const DEFAULT_VECTOR_CAPACITY: usize = 2048;
9
10/// A columnar vector of values.
11///
12/// ValueVector stores data in columnar format for efficient SIMD processing
13/// and cache utilization during query execution.
14#[derive(Debug, Clone)]
15pub struct ValueVector {
16    /// The logical type of values in this vector.
17    data_type: LogicalType,
18    /// The actual data storage.
19    data: VectorData,
20    /// Number of valid entries.
21    len: usize,
22    /// Validity bitmap (true = valid, false = null).
23    validity: Option<Vec<bool>>,
24}
25
26/// Internal storage for vector data.
27#[derive(Debug, Clone)]
28enum VectorData {
29    /// Boolean values.
30    Bool(Vec<bool>),
31    /// 64-bit integers.
32    Int64(Vec<i64>),
33    /// 64-bit floats.
34    Float64(Vec<f64>),
35    /// Strings (stored as ArcStr for cheap cloning).
36    String(Vec<ArcStr>),
37    /// Node IDs.
38    NodeId(Vec<NodeId>),
39    /// Edge IDs.
40    EdgeId(Vec<EdgeId>),
41    /// Generic values (fallback for complex types).
42    Generic(Vec<Value>),
43}
44
45impl ValueVector {
46    /// Creates a new empty generic vector.
47    #[must_use]
48    pub fn new() -> Self {
49        Self::with_capacity(LogicalType::Any, DEFAULT_VECTOR_CAPACITY)
50    }
51
52    /// Creates a new empty vector with the given type.
53    #[must_use]
54    pub fn with_type(data_type: LogicalType) -> Self {
55        Self::with_capacity(data_type, DEFAULT_VECTOR_CAPACITY)
56    }
57
58    /// Creates a vector from a slice of values.
59    pub fn from_values(values: &[Value]) -> Self {
60        let mut vec = Self::new();
61        for value in values {
62            vec.push_value(value.clone());
63        }
64        vec
65    }
66
67    /// Creates a new vector with the given capacity.
68    #[must_use]
69    pub fn with_capacity(data_type: LogicalType, capacity: usize) -> Self {
70        let data = match &data_type {
71            LogicalType::Bool => VectorData::Bool(Vec::with_capacity(capacity)),
72            LogicalType::Int8 | LogicalType::Int16 | LogicalType::Int32 | LogicalType::Int64 => {
73                VectorData::Int64(Vec::with_capacity(capacity))
74            }
75            LogicalType::Float32 | LogicalType::Float64 => {
76                VectorData::Float64(Vec::with_capacity(capacity))
77            }
78            LogicalType::String => VectorData::String(Vec::with_capacity(capacity)),
79            LogicalType::Node => VectorData::NodeId(Vec::with_capacity(capacity)),
80            LogicalType::Edge => VectorData::EdgeId(Vec::with_capacity(capacity)),
81            _ => VectorData::Generic(Vec::with_capacity(capacity)),
82        };
83
84        Self {
85            data_type,
86            data,
87            len: 0,
88            validity: None,
89        }
90    }
91
92    /// Returns the data type of this vector.
93    #[must_use]
94    pub fn data_type(&self) -> &LogicalType {
95        &self.data_type
96    }
97
98    /// Returns the number of entries in this vector.
99    #[must_use]
100    pub fn len(&self) -> usize {
101        self.len
102    }
103
104    /// Returns true if this vector is empty.
105    #[must_use]
106    pub fn is_empty(&self) -> bool {
107        self.len == 0
108    }
109
110    /// Returns true if the value at index is null.
111    #[must_use]
112    pub fn is_null(&self, index: usize) -> bool {
113        self.validity
114            .as_ref()
115            .map_or(false, |v| !v.get(index).copied().unwrap_or(true))
116    }
117
118    /// Sets the value at index to null.
119    pub fn set_null(&mut self, index: usize) {
120        if self.validity.is_none() {
121            self.validity = Some(vec![true; index + 1]);
122        }
123        if let Some(validity) = &mut self.validity {
124            if validity.len() <= index {
125                validity.resize(index + 1, true);
126            }
127            validity[index] = false;
128        }
129    }
130
131    /// Pushes a boolean value.
132    pub fn push_bool(&mut self, value: bool) {
133        if let VectorData::Bool(vec) = &mut self.data {
134            vec.push(value);
135            self.len += 1;
136        }
137    }
138
139    /// Pushes an integer value.
140    pub fn push_int64(&mut self, value: i64) {
141        if let VectorData::Int64(vec) = &mut self.data {
142            vec.push(value);
143            self.len += 1;
144        }
145    }
146
147    /// Pushes a float value.
148    pub fn push_float64(&mut self, value: f64) {
149        if let VectorData::Float64(vec) = &mut self.data {
150            vec.push(value);
151            self.len += 1;
152        }
153    }
154
155    /// Pushes a string value.
156    pub fn push_string(&mut self, value: impl Into<ArcStr>) {
157        if let VectorData::String(vec) = &mut self.data {
158            vec.push(value.into());
159            self.len += 1;
160        }
161    }
162
163    /// Pushes a node ID.
164    pub fn push_node_id(&mut self, value: NodeId) {
165        match &mut self.data {
166            VectorData::NodeId(vec) => {
167                vec.push(value);
168                self.len += 1;
169            }
170            VectorData::Generic(vec) => {
171                vec.push(Value::Int64(value.as_u64() as i64));
172                self.len += 1;
173            }
174            _ => {}
175        }
176    }
177
178    /// Pushes an edge ID.
179    pub fn push_edge_id(&mut self, value: EdgeId) {
180        match &mut self.data {
181            VectorData::EdgeId(vec) => {
182                vec.push(value);
183                self.len += 1;
184            }
185            VectorData::Generic(vec) => {
186                vec.push(Value::Int64(value.as_u64() as i64));
187                self.len += 1;
188            }
189            _ => {}
190        }
191    }
192
193    /// Pushes a generic value.
194    pub fn push_value(&mut self, value: Value) {
195        // Handle null values specially - push a default and mark as null
196        if matches!(value, Value::Null) {
197            match &mut self.data {
198                VectorData::Bool(vec) => vec.push(false),
199                VectorData::Int64(vec) => vec.push(0),
200                VectorData::Float64(vec) => vec.push(0.0),
201                VectorData::String(vec) => vec.push("".into()),
202                VectorData::NodeId(vec) => vec.push(NodeId::new(0)),
203                VectorData::EdgeId(vec) => vec.push(EdgeId::new(0)),
204                VectorData::Generic(vec) => vec.push(Value::Null),
205            }
206            self.len += 1;
207            self.set_null(self.len - 1);
208            return;
209        }
210
211        match (&mut self.data, &value) {
212            (VectorData::Bool(vec), Value::Bool(b)) => vec.push(*b),
213            (VectorData::Int64(vec), Value::Int64(i)) => vec.push(*i),
214            (VectorData::Float64(vec), Value::Float64(f)) => vec.push(*f),
215            (VectorData::String(vec), Value::String(s)) => vec.push(s.clone()),
216            // Handle Int64 -> NodeId conversion (from get_value roundtrip)
217            (VectorData::NodeId(vec), Value::Int64(i)) => vec.push(NodeId::new(*i as u64)),
218            // Handle Int64 -> EdgeId conversion (from get_value roundtrip)
219            (VectorData::EdgeId(vec), Value::Int64(i)) => vec.push(EdgeId::new(*i as u64)),
220            (VectorData::Generic(vec), _) => vec.push(value),
221            _ => {
222                // Type mismatch - push a default value to maintain vector alignment
223                match &mut self.data {
224                    VectorData::Bool(vec) => vec.push(false),
225                    VectorData::Int64(vec) => vec.push(0),
226                    VectorData::Float64(vec) => vec.push(0.0),
227                    VectorData::String(vec) => vec.push("".into()),
228                    VectorData::NodeId(vec) => vec.push(NodeId::new(0)),
229                    VectorData::EdgeId(vec) => vec.push(EdgeId::new(0)),
230                    VectorData::Generic(vec) => vec.push(value),
231                }
232            }
233        }
234        self.len += 1;
235    }
236
237    /// Gets a boolean value at index.
238    #[must_use]
239    pub fn get_bool(&self, index: usize) -> Option<bool> {
240        if self.is_null(index) {
241            return None;
242        }
243        if let VectorData::Bool(vec) = &self.data {
244            vec.get(index).copied()
245        } else {
246            None
247        }
248    }
249
250    /// Gets an integer value at index.
251    #[must_use]
252    pub fn get_int64(&self, index: usize) -> Option<i64> {
253        if self.is_null(index) {
254            return None;
255        }
256        if let VectorData::Int64(vec) = &self.data {
257            vec.get(index).copied()
258        } else {
259            None
260        }
261    }
262
263    /// Gets a float value at index.
264    #[must_use]
265    pub fn get_float64(&self, index: usize) -> Option<f64> {
266        if self.is_null(index) {
267            return None;
268        }
269        if let VectorData::Float64(vec) = &self.data {
270            vec.get(index).copied()
271        } else {
272            None
273        }
274    }
275
276    /// Gets a string value at index.
277    #[must_use]
278    pub fn get_string(&self, index: usize) -> Option<&str> {
279        if self.is_null(index) {
280            return None;
281        }
282        if let VectorData::String(vec) = &self.data {
283            vec.get(index).map(|s| s.as_ref())
284        } else {
285            None
286        }
287    }
288
289    /// Gets a node ID at index.
290    #[must_use]
291    pub fn get_node_id(&self, index: usize) -> Option<NodeId> {
292        if self.is_null(index) {
293            return None;
294        }
295        match &self.data {
296            VectorData::NodeId(vec) => vec.get(index).copied(),
297            // Handle Generic vectors that contain node IDs stored as Int64
298            VectorData::Generic(vec) => match vec.get(index) {
299                Some(Value::Int64(i)) => Some(NodeId::new(*i as u64)),
300                _ => None,
301            },
302            _ => None,
303        }
304    }
305
306    /// Gets an edge ID at index.
307    #[must_use]
308    pub fn get_edge_id(&self, index: usize) -> Option<EdgeId> {
309        if self.is_null(index) {
310            return None;
311        }
312        match &self.data {
313            VectorData::EdgeId(vec) => vec.get(index).copied(),
314            // Handle Generic vectors that contain edge IDs stored as Int64
315            VectorData::Generic(vec) => match vec.get(index) {
316                Some(Value::Int64(i)) => Some(EdgeId::new(*i as u64)),
317                _ => None,
318            },
319            _ => None,
320        }
321    }
322
323    /// Gets a value at index as a generic Value.
324    #[must_use]
325    pub fn get_value(&self, index: usize) -> Option<Value> {
326        if self.is_null(index) {
327            return Some(Value::Null);
328        }
329
330        match &self.data {
331            VectorData::Bool(vec) => vec.get(index).map(|&v| Value::Bool(v)),
332            VectorData::Int64(vec) => vec.get(index).map(|&v| Value::Int64(v)),
333            VectorData::Float64(vec) => vec.get(index).map(|&v| Value::Float64(v)),
334            VectorData::String(vec) => vec.get(index).map(|v| Value::String(v.clone())),
335            VectorData::NodeId(vec) => vec.get(index).map(|&v| Value::Int64(v.as_u64() as i64)),
336            VectorData::EdgeId(vec) => vec.get(index).map(|&v| Value::Int64(v.as_u64() as i64)),
337            VectorData::Generic(vec) => vec.get(index).cloned(),
338        }
339    }
340
341    /// Alias for get_value.
342    #[must_use]
343    pub fn get(&self, index: usize) -> Option<Value> {
344        self.get_value(index)
345    }
346
347    /// Alias for push_value.
348    pub fn push(&mut self, value: Value) {
349        self.push_value(value);
350    }
351
352    /// Returns a slice of the underlying boolean data.
353    #[must_use]
354    pub fn as_bool_slice(&self) -> Option<&[bool]> {
355        if let VectorData::Bool(vec) = &self.data {
356            Some(vec)
357        } else {
358            None
359        }
360    }
361
362    /// Returns a slice of the underlying integer data.
363    #[must_use]
364    pub fn as_int64_slice(&self) -> Option<&[i64]> {
365        if let VectorData::Int64(vec) = &self.data {
366            Some(vec)
367        } else {
368            None
369        }
370    }
371
372    /// Returns a slice of the underlying float data.
373    #[must_use]
374    pub fn as_float64_slice(&self) -> Option<&[f64]> {
375        if let VectorData::Float64(vec) = &self.data {
376            Some(vec)
377        } else {
378            None
379        }
380    }
381
382    /// Returns a slice of the underlying node ID data.
383    #[must_use]
384    pub fn as_node_id_slice(&self) -> Option<&[NodeId]> {
385        if let VectorData::NodeId(vec) = &self.data {
386            Some(vec)
387        } else {
388            None
389        }
390    }
391
392    /// Returns a slice of the underlying edge ID data.
393    #[must_use]
394    pub fn as_edge_id_slice(&self) -> Option<&[EdgeId]> {
395        if let VectorData::EdgeId(vec) = &self.data {
396            Some(vec)
397        } else {
398            None
399        }
400    }
401
402    /// Returns the logical type of this vector.
403    #[must_use]
404    pub fn logical_type(&self) -> LogicalType {
405        self.data_type.clone()
406    }
407
408    /// Copies a row from this vector to the destination vector.
409    ///
410    /// The destination vector should have a compatible type. The value at `row`
411    /// is read from this vector and pushed to the destination vector.
412    pub fn copy_row_to(&self, row: usize, dest: &mut ValueVector) {
413        if self.is_null(row) {
414            dest.push_value(Value::Null);
415            return;
416        }
417
418        match &self.data {
419            VectorData::Bool(vec) => {
420                if let Some(&v) = vec.get(row) {
421                    dest.push_bool(v);
422                }
423            }
424            VectorData::Int64(vec) => {
425                if let Some(&v) = vec.get(row) {
426                    dest.push_int64(v);
427                }
428            }
429            VectorData::Float64(vec) => {
430                if let Some(&v) = vec.get(row) {
431                    dest.push_float64(v);
432                }
433            }
434            VectorData::String(vec) => {
435                if let Some(v) = vec.get(row) {
436                    dest.push_string(v.clone());
437                }
438            }
439            VectorData::NodeId(vec) => {
440                if let Some(&v) = vec.get(row) {
441                    dest.push_node_id(v);
442                }
443            }
444            VectorData::EdgeId(vec) => {
445                if let Some(&v) = vec.get(row) {
446                    dest.push_edge_id(v);
447                }
448            }
449            VectorData::Generic(vec) => {
450                if let Some(v) = vec.get(row) {
451                    dest.push_value(v.clone());
452                }
453            }
454        }
455    }
456
457    /// Clears all data from this vector.
458    pub fn clear(&mut self) {
459        match &mut self.data {
460            VectorData::Bool(vec) => vec.clear(),
461            VectorData::Int64(vec) => vec.clear(),
462            VectorData::Float64(vec) => vec.clear(),
463            VectorData::String(vec) => vec.clear(),
464            VectorData::NodeId(vec) => vec.clear(),
465            VectorData::EdgeId(vec) => vec.clear(),
466            VectorData::Generic(vec) => vec.clear(),
467        }
468        self.len = 0;
469        self.validity = None;
470    }
471}
472
473impl Default for ValueVector {
474    fn default() -> Self {
475        Self::new()
476    }
477}
478
479#[cfg(test)]
480mod tests {
481    use super::*;
482
483    #[test]
484    fn test_int64_vector() {
485        let mut vec = ValueVector::with_type(LogicalType::Int64);
486
487        vec.push_int64(1);
488        vec.push_int64(2);
489        vec.push_int64(3);
490
491        assert_eq!(vec.len(), 3);
492        assert_eq!(vec.get_int64(0), Some(1));
493        assert_eq!(vec.get_int64(1), Some(2));
494        assert_eq!(vec.get_int64(2), Some(3));
495    }
496
497    #[test]
498    fn test_string_vector() {
499        let mut vec = ValueVector::with_type(LogicalType::String);
500
501        vec.push_string("hello");
502        vec.push_string("world");
503
504        assert_eq!(vec.len(), 2);
505        assert_eq!(vec.get_string(0), Some("hello"));
506        assert_eq!(vec.get_string(1), Some("world"));
507    }
508
509    #[test]
510    fn test_null_values() {
511        let mut vec = ValueVector::with_type(LogicalType::Int64);
512
513        vec.push_int64(1);
514        vec.push_int64(2);
515        vec.push_int64(3);
516
517        assert!(!vec.is_null(1));
518        vec.set_null(1);
519        assert!(vec.is_null(1));
520
521        assert_eq!(vec.get_int64(0), Some(1));
522        assert_eq!(vec.get_int64(1), None); // Null
523        assert_eq!(vec.get_int64(2), Some(3));
524    }
525
526    #[test]
527    fn test_get_value() {
528        let mut vec = ValueVector::with_type(LogicalType::Int64);
529        vec.push_int64(42);
530
531        let value = vec.get_value(0);
532        assert_eq!(value, Some(Value::Int64(42)));
533    }
534
535    #[test]
536    fn test_slice_access() {
537        let mut vec = ValueVector::with_type(LogicalType::Int64);
538        vec.push_int64(1);
539        vec.push_int64(2);
540        vec.push_int64(3);
541
542        let slice = vec.as_int64_slice().unwrap();
543        assert_eq!(slice, &[1, 2, 3]);
544    }
545
546    #[test]
547    fn test_clear() {
548        let mut vec = ValueVector::with_type(LogicalType::Int64);
549        vec.push_int64(1);
550        vec.push_int64(2);
551
552        vec.clear();
553
554        assert!(vec.is_empty());
555        assert_eq!(vec.len(), 0);
556    }
557}