Skip to main content

sparrowdb_execution/
types.rs

1//! Factorized execution core types.
2//!
3//! See spec Section 13 for semantics.
4
5use std::collections::HashMap;
6
7use sparrowdb_common::{EdgeId, NodeId};
8
9/// A typed column vector (one column of data in a group).
10#[derive(Debug, Clone)]
11pub enum TypedVector {
12    Int64(Vec<i64>),
13    Float64(Vec<f64>),
14    Bool(Vec<bool>),
15    String(Vec<String>),
16    NodeRef(Vec<NodeId>),
17    EdgeRef(Vec<EdgeId>),
18}
19
20impl TypedVector {
21    pub fn len(&self) -> usize {
22        match self {
23            TypedVector::Int64(v) => v.len(),
24            TypedVector::Float64(v) => v.len(),
25            TypedVector::Bool(v) => v.len(),
26            TypedVector::String(v) => v.len(),
27            TypedVector::NodeRef(v) => v.len(),
28            TypedVector::EdgeRef(v) => v.len(),
29        }
30    }
31
32    pub fn is_empty(&self) -> bool {
33        self.len() == 0
34    }
35
36    /// Get value at index as a `Value`.
37    pub fn get(&self, idx: usize) -> Value {
38        match self {
39            TypedVector::Int64(v) => Value::Int64(v[idx]),
40            TypedVector::Float64(v) => Value::Float64(v[idx]),
41            TypedVector::Bool(v) => Value::Bool(v[idx]),
42            TypedVector::String(v) => Value::String(v[idx].clone()),
43            TypedVector::NodeRef(v) => Value::NodeRef(v[idx]),
44            TypedVector::EdgeRef(v) => Value::EdgeRef(v[idx]),
45        }
46    }
47}
48
49/// A scalar value (materialized from TypedVector for output).
50#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
51pub enum Value {
52    Null,
53    Int64(i64),
54    Float64(f64),
55    Bool(bool),
56    String(String),
57    NodeRef(NodeId),
58    EdgeRef(EdgeId),
59    /// A list of values, produced by `collect()` aggregation.
60    List(Vec<Value>),
61    /// A property map, returned when a bare node variable is projected (SPA-213).
62    /// Keys are `"col_{id}"` strings; values are the decoded property values.
63    Map(Vec<(String, Value)>),
64}
65
66impl Value {
67    /// Evaluate `CONTAINS` predicate.
68    pub fn contains(&self, other: &Value) -> bool {
69        match (self, other) {
70            (Value::String(s), Value::String(p)) => s.contains(p.as_str()),
71            _ => false,
72        }
73    }
74}
75
76impl std::fmt::Display for Value {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        match self {
79            Value::Null => write!(f, "null"),
80            Value::Int64(v) => write!(f, "{v}"),
81            Value::Float64(v) => write!(f, "{v}"),
82            Value::Bool(v) => write!(f, "{v}"),
83            Value::String(v) => write!(f, "{v}"),
84            Value::NodeRef(n) => write!(f, "node({})", n.0),
85            Value::EdgeRef(e) => write!(f, "edge({})", e.0),
86            Value::List(items) => {
87                write!(f, "[")?;
88                for (i, item) in items.iter().enumerate() {
89                    if i > 0 {
90                        write!(f, ", ")?;
91                    }
92                    write!(f, "{item}")?;
93                }
94                write!(f, "]")
95            }
96            Value::Map(entries) => {
97                write!(f, "{{")?;
98                let mut sorted: Vec<&(String, Value)> = entries.iter().collect();
99                sorted.sort_by(|a, b| a.0.cmp(&b.0));
100                for (i, (k, v)) in sorted.iter().enumerate() {
101                    if i > 0 {
102                        write!(f, ", ")?;
103                    }
104                    write!(f, "{k}: {v}")?;
105                }
106                write!(f, "}}")
107            }
108        }
109    }
110}
111
112/// A vector group: one row-set with named typed columns and a multiplicity.
113///
114/// In factorized execution, `multiplicity` represents the number of implicit
115/// copies of this group without materializing them.
116#[derive(Debug, Clone)]
117pub struct VectorGroup {
118    /// Named column vectors.  All vectors must have the same length.
119    pub columns: HashMap<String, TypedVector>,
120    /// Logical multiplicity — how many times this group is counted.
121    pub multiplicity: u64,
122}
123
124impl VectorGroup {
125    pub fn new(multiplicity: u64) -> Self {
126        VectorGroup {
127            columns: HashMap::new(),
128            multiplicity,
129        }
130    }
131
132    pub fn add_column(&mut self, name: String, vec: TypedVector) {
133        self.columns.insert(name, vec);
134    }
135
136    /// Number of rows in this group (length of any column; 0 if empty).
137    pub fn len(&self) -> usize {
138        self.columns.values().next().map(|v| v.len()).unwrap_or(0)
139    }
140
141    pub fn is_empty(&self) -> bool {
142        self.len() == 0
143    }
144
145    pub fn has_column(&self, name: &str) -> bool {
146        self.columns.contains_key(name)
147    }
148
149    /// Get value at row index from column `name`.
150    pub fn get_value(&self, col: &str, row: usize) -> Option<Value> {
151        self.columns.get(col).and_then(|v| {
152            if row < v.len() {
153                Some(v.get(row))
154            } else {
155                None
156            }
157        })
158    }
159
160    /// Logical row count (len * multiplicity).
161    pub fn logical_row_count(&self) -> u64 {
162        self.len() as u64 * self.multiplicity
163    }
164}
165
166/// A factorized chunk: a batch of vector groups.
167#[derive(Debug, Clone)]
168pub struct FactorizedChunk {
169    pub groups: Vec<VectorGroup>,
170}
171
172impl FactorizedChunk {
173    pub fn new() -> Self {
174        FactorizedChunk { groups: Vec::new() }
175    }
176
177    pub fn push_group(&mut self, group: VectorGroup) {
178        self.groups.push(group);
179    }
180
181    pub fn is_empty(&self) -> bool {
182        self.groups.is_empty()
183    }
184
185    /// Total logical row count across all groups.
186    pub fn logical_row_count(&self) -> u64 {
187        self.groups.iter().map(|g| g.logical_row_count()).sum()
188    }
189}
190
191impl Default for FactorizedChunk {
192    fn default() -> Self {
193        Self::new()
194    }
195}
196
197/// Final materialized query result.
198#[derive(Debug, Clone)]
199pub struct QueryResult {
200    /// Named column headers, in the same order as values within each row.
201    ///
202    /// For `RETURN` queries these are the projected aliases (or expression
203    /// text when no alias is given).  For `CALL` procedures these are the
204    /// output column names declared by the procedure (e.g. `["type", "name",
205    /// "properties"]` for `CALL db.schema()`).
206    pub columns: Vec<String>,
207    pub rows: Vec<Vec<Value>>,
208}
209
210impl QueryResult {
211    pub fn empty(columns: Vec<String>) -> Self {
212        QueryResult {
213            columns,
214            rows: Vec::new(),
215        }
216    }
217
218    /// Return row `idx` as a `HashMap<column_name, Value>`.
219    ///
220    /// Returns `None` if `idx` is out of bounds.  Column names come from
221    /// `self.columns`; if the columns list is shorter than the row, extra
222    /// values are dropped.  If the columns list is longer than the row,
223    /// missing values are absent from the map (they are never `Null`-padded).
224    pub fn row_as_map(&self, idx: usize) -> Option<HashMap<String, Value>> {
225        let row = self.rows.get(idx)?;
226        let mut out: HashMap<String, Value> = HashMap::new();
227        let mut seen: HashMap<String, usize> = HashMap::new();
228        for (col, val) in self.columns.iter().zip(row.iter()) {
229            let count = seen.entry(col.clone()).or_insert(0);
230            *count += 1;
231            let key = if *count == 1 {
232                col.clone()
233            } else {
234                format!("{col}#{count}")
235            };
236            out.insert(key, val.clone());
237        }
238        Some(out)
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn vector_group_len_matches_column_len() {
248        let mut g = VectorGroup::new(1);
249        g.add_column("x".into(), TypedVector::Int64(vec![1, 2, 3]));
250        assert_eq!(g.len(), 3);
251    }
252
253    #[test]
254    fn factorized_chunk_logical_row_count() {
255        let mut chunk = FactorizedChunk::new();
256        let mut g1 = VectorGroup::new(2);
257        g1.add_column("a".into(), TypedVector::Int64(vec![1, 2]));
258        let mut g2 = VectorGroup::new(3);
259        g2.add_column("a".into(), TypedVector::Int64(vec![10]));
260        chunk.push_group(g1);
261        chunk.push_group(g2);
262        // g1: 2 rows * 2 = 4; g2: 1 row * 3 = 3; total = 7
263        assert_eq!(chunk.logical_row_count(), 7);
264    }
265}