Skip to main content

graphos_core/execution/
vector.rs

1//! ValueVector for columnar data storage.
2
3use graphos_common::types::{LogicalType, NodeId, Value};
4use std::sync::Arc;
5
6/// Default vector capacity (tuples per vector).
7pub const DEFAULT_VECTOR_CAPACITY: usize = 2048;
8
9/// A columnar vector of values.
10///
11/// ValueVector stores data in columnar format for efficient SIMD processing
12/// and cache utilization during query execution.
13#[derive(Debug, Clone)]
14pub struct ValueVector {
15    /// The logical type of values in this vector.
16    data_type: LogicalType,
17    /// The actual data storage.
18    data: VectorData,
19    /// Number of valid entries.
20    len: usize,
21    /// Validity bitmap (true = valid, false = null).
22    validity: Option<Vec<bool>>,
23}
24
25/// Internal storage for vector data.
26#[derive(Debug, Clone)]
27enum VectorData {
28    /// Boolean values.
29    Bool(Vec<bool>),
30    /// 64-bit integers.
31    Int64(Vec<i64>),
32    /// 64-bit floats.
33    Float64(Vec<f64>),
34    /// Strings (stored as Arc for cheap cloning).
35    String(Vec<Arc<str>>),
36    /// Node IDs.
37    NodeId(Vec<NodeId>),
38    /// Generic values (fallback for complex types).
39    Generic(Vec<Value>),
40}
41
42impl ValueVector {
43    /// Creates a new empty vector with the given type.
44    #[must_use]
45    pub fn new(data_type: LogicalType) -> Self {
46        Self::with_capacity(data_type, DEFAULT_VECTOR_CAPACITY)
47    }
48
49    /// Creates a new vector with the given capacity.
50    #[must_use]
51    pub fn with_capacity(data_type: LogicalType, capacity: usize) -> Self {
52        let data = match &data_type {
53            LogicalType::Bool => VectorData::Bool(Vec::with_capacity(capacity)),
54            LogicalType::Int8 | LogicalType::Int16 | LogicalType::Int32 | LogicalType::Int64 => {
55                VectorData::Int64(Vec::with_capacity(capacity))
56            }
57            LogicalType::Float32 | LogicalType::Float64 => {
58                VectorData::Float64(Vec::with_capacity(capacity))
59            }
60            LogicalType::String => VectorData::String(Vec::with_capacity(capacity)),
61            LogicalType::Node => VectorData::NodeId(Vec::with_capacity(capacity)),
62            _ => VectorData::Generic(Vec::with_capacity(capacity)),
63        };
64
65        Self {
66            data_type,
67            data,
68            len: 0,
69            validity: None,
70        }
71    }
72
73    /// Returns the data type of this vector.
74    #[must_use]
75    pub fn data_type(&self) -> &LogicalType {
76        &self.data_type
77    }
78
79    /// Returns the number of entries in this vector.
80    #[must_use]
81    pub fn len(&self) -> usize {
82        self.len
83    }
84
85    /// Returns true if this vector is empty.
86    #[must_use]
87    pub fn is_empty(&self) -> bool {
88        self.len == 0
89    }
90
91    /// Returns true if the value at index is null.
92    #[must_use]
93    pub fn is_null(&self, index: usize) -> bool {
94        self.validity
95            .as_ref()
96            .map_or(false, |v| !v.get(index).copied().unwrap_or(true))
97    }
98
99    /// Sets the value at index to null.
100    pub fn set_null(&mut self, index: usize) {
101        if self.validity.is_none() {
102            self.validity = Some(vec![true; self.len]);
103        }
104        if let Some(validity) = &mut self.validity {
105            if index < validity.len() {
106                validity[index] = false;
107            }
108        }
109    }
110
111    /// Pushes a boolean value.
112    pub fn push_bool(&mut self, value: bool) {
113        if let VectorData::Bool(vec) = &mut self.data {
114            vec.push(value);
115            self.len += 1;
116        }
117    }
118
119    /// Pushes an integer value.
120    pub fn push_int64(&mut self, value: i64) {
121        if let VectorData::Int64(vec) = &mut self.data {
122            vec.push(value);
123            self.len += 1;
124        }
125    }
126
127    /// Pushes a float value.
128    pub fn push_float64(&mut self, value: f64) {
129        if let VectorData::Float64(vec) = &mut self.data {
130            vec.push(value);
131            self.len += 1;
132        }
133    }
134
135    /// Pushes a string value.
136    pub fn push_string(&mut self, value: impl Into<Arc<str>>) {
137        if let VectorData::String(vec) = &mut self.data {
138            vec.push(value.into());
139            self.len += 1;
140        }
141    }
142
143    /// Pushes a node ID.
144    pub fn push_node_id(&mut self, value: NodeId) {
145        if let VectorData::NodeId(vec) = &mut self.data {
146            vec.push(value);
147            self.len += 1;
148        }
149    }
150
151    /// Pushes a generic value.
152    pub fn push_value(&mut self, value: Value) {
153        match (&mut self.data, &value) {
154            (VectorData::Bool(vec), Value::Bool(b)) => vec.push(*b),
155            (VectorData::Int64(vec), Value::Int64(i)) => vec.push(*i),
156            (VectorData::Float64(vec), Value::Float64(f)) => vec.push(*f),
157            (VectorData::String(vec), Value::String(s)) => vec.push(s.clone()),
158            (VectorData::Generic(vec), _) => vec.push(value),
159            _ => {
160                // Type mismatch - convert to generic
161                // This shouldn't happen in well-typed execution
162            }
163        }
164        self.len += 1;
165    }
166
167    /// Gets a boolean value at index.
168    #[must_use]
169    pub fn get_bool(&self, index: usize) -> Option<bool> {
170        if self.is_null(index) {
171            return None;
172        }
173        if let VectorData::Bool(vec) = &self.data {
174            vec.get(index).copied()
175        } else {
176            None
177        }
178    }
179
180    /// Gets an integer value at index.
181    #[must_use]
182    pub fn get_int64(&self, index: usize) -> Option<i64> {
183        if self.is_null(index) {
184            return None;
185        }
186        if let VectorData::Int64(vec) = &self.data {
187            vec.get(index).copied()
188        } else {
189            None
190        }
191    }
192
193    /// Gets a float value at index.
194    #[must_use]
195    pub fn get_float64(&self, index: usize) -> Option<f64> {
196        if self.is_null(index) {
197            return None;
198        }
199        if let VectorData::Float64(vec) = &self.data {
200            vec.get(index).copied()
201        } else {
202            None
203        }
204    }
205
206    /// Gets a string value at index.
207    #[must_use]
208    pub fn get_string(&self, index: usize) -> Option<&str> {
209        if self.is_null(index) {
210            return None;
211        }
212        if let VectorData::String(vec) = &self.data {
213            vec.get(index).map(|s| s.as_ref())
214        } else {
215            None
216        }
217    }
218
219    /// Gets a node ID at index.
220    #[must_use]
221    pub fn get_node_id(&self, index: usize) -> Option<NodeId> {
222        if self.is_null(index) {
223            return None;
224        }
225        if let VectorData::NodeId(vec) = &self.data {
226            vec.get(index).copied()
227        } else {
228            None
229        }
230    }
231
232    /// Gets a value at index as a generic Value.
233    #[must_use]
234    pub fn get_value(&self, index: usize) -> Option<Value> {
235        if self.is_null(index) {
236            return Some(Value::Null);
237        }
238
239        match &self.data {
240            VectorData::Bool(vec) => vec.get(index).map(|&v| Value::Bool(v)),
241            VectorData::Int64(vec) => vec.get(index).map(|&v| Value::Int64(v)),
242            VectorData::Float64(vec) => vec.get(index).map(|&v| Value::Float64(v)),
243            VectorData::String(vec) => vec.get(index).map(|v| Value::String(v.clone())),
244            VectorData::NodeId(vec) => vec.get(index).map(|&v| Value::Int64(v.as_u64() as i64)),
245            VectorData::Generic(vec) => vec.get(index).cloned(),
246        }
247    }
248
249    /// Returns a slice of the underlying boolean data.
250    #[must_use]
251    pub fn as_bool_slice(&self) -> Option<&[bool]> {
252        if let VectorData::Bool(vec) = &self.data {
253            Some(vec)
254        } else {
255            None
256        }
257    }
258
259    /// Returns a slice of the underlying integer data.
260    #[must_use]
261    pub fn as_int64_slice(&self) -> Option<&[i64]> {
262        if let VectorData::Int64(vec) = &self.data {
263            Some(vec)
264        } else {
265            None
266        }
267    }
268
269    /// Returns a slice of the underlying float data.
270    #[must_use]
271    pub fn as_float64_slice(&self) -> Option<&[f64]> {
272        if let VectorData::Float64(vec) = &self.data {
273            Some(vec)
274        } else {
275            None
276        }
277    }
278
279    /// Returns a slice of the underlying node ID data.
280    #[must_use]
281    pub fn as_node_id_slice(&self) -> Option<&[NodeId]> {
282        if let VectorData::NodeId(vec) = &self.data {
283            Some(vec)
284        } else {
285            None
286        }
287    }
288
289    /// Clears all data from this vector.
290    pub fn clear(&mut self) {
291        match &mut self.data {
292            VectorData::Bool(vec) => vec.clear(),
293            VectorData::Int64(vec) => vec.clear(),
294            VectorData::Float64(vec) => vec.clear(),
295            VectorData::String(vec) => vec.clear(),
296            VectorData::NodeId(vec) => vec.clear(),
297            VectorData::Generic(vec) => vec.clear(),
298        }
299        self.len = 0;
300        self.validity = None;
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307
308    #[test]
309    fn test_int64_vector() {
310        let mut vec = ValueVector::new(LogicalType::Int64);
311
312        vec.push_int64(1);
313        vec.push_int64(2);
314        vec.push_int64(3);
315
316        assert_eq!(vec.len(), 3);
317        assert_eq!(vec.get_int64(0), Some(1));
318        assert_eq!(vec.get_int64(1), Some(2));
319        assert_eq!(vec.get_int64(2), Some(3));
320    }
321
322    #[test]
323    fn test_string_vector() {
324        let mut vec = ValueVector::new(LogicalType::String);
325
326        vec.push_string("hello");
327        vec.push_string("world");
328
329        assert_eq!(vec.len(), 2);
330        assert_eq!(vec.get_string(0), Some("hello"));
331        assert_eq!(vec.get_string(1), Some("world"));
332    }
333
334    #[test]
335    fn test_null_values() {
336        let mut vec = ValueVector::new(LogicalType::Int64);
337
338        vec.push_int64(1);
339        vec.push_int64(2);
340        vec.push_int64(3);
341
342        assert!(!vec.is_null(1));
343        vec.set_null(1);
344        assert!(vec.is_null(1));
345
346        assert_eq!(vec.get_int64(0), Some(1));
347        assert_eq!(vec.get_int64(1), None); // Null
348        assert_eq!(vec.get_int64(2), Some(3));
349    }
350
351    #[test]
352    fn test_get_value() {
353        let mut vec = ValueVector::new(LogicalType::Int64);
354        vec.push_int64(42);
355
356        let value = vec.get_value(0);
357        assert_eq!(value, Some(Value::Int64(42)));
358    }
359
360    #[test]
361    fn test_slice_access() {
362        let mut vec = ValueVector::new(LogicalType::Int64);
363        vec.push_int64(1);
364        vec.push_int64(2);
365        vec.push_int64(3);
366
367        let slice = vec.as_int64_slice().unwrap();
368        assert_eq!(slice, &[1, 2, 3]);
369    }
370
371    #[test]
372    fn test_clear() {
373        let mut vec = ValueVector::new(LogicalType::Int64);
374        vec.push_int64(1);
375        vec.push_int64(2);
376
377        vec.clear();
378
379        assert!(vec.is_empty());
380        assert_eq!(vec.len(), 0);
381    }
382}