Skip to main content

virtual_frame/
column.rs

1//! Columnar storage — typed vectors, one per column.
2//!
3//! Each column is a contiguous typed buffer. Aggregation scans a single
4//! `Vec<f64>` — no pointer chasing, no skipping over unrelated fields.
5//! This is the same storage model used by Apache Arrow and DuckDB.
6
7use std::cmp::Ordering;
8
9/// A single typed column of data.
10#[derive(Debug, Clone)]
11pub enum Column {
12    /// 64-bit signed integer column.
13    Int(Vec<i64>),
14    /// 64-bit floating-point column.
15    Float(Vec<f64>),
16    /// UTF-8 string column.
17    Str(Vec<String>),
18    /// Boolean column.
19    Bool(Vec<bool>),
20}
21
22impl Column {
23    /// Number of rows.
24    pub fn len(&self) -> usize {
25        match self {
26            Column::Int(v) => v.len(),
27            Column::Float(v) => v.len(),
28            Column::Str(v) => v.len(),
29            Column::Bool(v) => v.len(),
30        }
31    }
32
33    /// Whether the column is empty.
34    pub fn is_empty(&self) -> bool {
35        self.len() == 0
36    }
37
38    /// Human-readable type name.
39    pub fn type_name(&self) -> &'static str {
40        match self {
41            Column::Int(_) => "Int",
42            Column::Float(_) => "Float",
43            Column::Str(_) => "Str",
44            Column::Bool(_) => "Bool",
45        }
46    }
47
48    /// Get a display-friendly string value at index.
49    pub fn get_display(&self, idx: usize) -> String {
50        match self {
51            Column::Int(v) => format!("{}", v[idx]),
52            Column::Float(v) => format!("{}", v[idx]),
53            Column::Str(v) => v[idx].clone(),
54            Column::Bool(v) => format!("{}", v[idx]),
55        }
56    }
57
58    /// Get value as f64 (for numeric aggregation). Returns None for non-numeric.
59    pub fn get_f64(&self, idx: usize) -> Option<f64> {
60        match self {
61            Column::Int(v) => Some(v[idx] as f64),
62            Column::Float(v) => Some(v[idx]),
63            Column::Bool(v) => Some(if v[idx] { 1.0 } else { 0.0 }),
64            Column::Str(_) => None,
65        }
66    }
67
68    /// Compare two rows within this column. Used by sort.
69    pub fn compare_rows(&self, a: usize, b: usize) -> Ordering {
70        match self {
71            Column::Int(v) => v[a].cmp(&v[b]),
72            Column::Float(v) => v[a].partial_cmp(&v[b]).unwrap_or(Ordering::Equal),
73            Column::Str(v) => v[a].cmp(&v[b]),
74            Column::Bool(v) => v[a].cmp(&v[b]),
75        }
76    }
77
78    /// Gather rows by index — create a new column from selected rows.
79    pub fn gather(&self, indices: &[usize]) -> Column {
80        match self {
81            Column::Int(v) => Column::Int(indices.iter().map(|&i| v[i]).collect()),
82            Column::Float(v) => Column::Float(indices.iter().map(|&i| v[i]).collect()),
83            Column::Str(v) => Column::Str(indices.iter().map(|&i| v[i].clone()).collect()),
84            Column::Bool(v) => Column::Bool(indices.iter().map(|&i| v[i]).collect()),
85        }
86    }
87}
88
89/// Wrapper for f64 that provides total ordering (for BTreeMap keys).
90/// NaN sorts last, consistent with IEEE 754 totalOrder.
91#[derive(Debug, Clone, Copy)]
92pub struct FloatKey(pub f64);
93
94impl PartialEq for FloatKey {
95    fn eq(&self, other: &Self) -> bool {
96        self.0.to_bits() == other.0.to_bits()
97    }
98}
99
100impl Eq for FloatKey {}
101
102impl PartialOrd for FloatKey {
103    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
104        Some(self.cmp(other))
105    }
106}
107
108impl Ord for FloatKey {
109    fn cmp(&self, other: &Self) -> Ordering {
110        self.0.total_cmp(&other.0)
111    }
112}
113
114/// Owned group key — used for storing unique group values.
115#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
116pub enum GroupKey {
117    Int(i64),
118    Float(FloatKey),
119    Str(String),
120    Bool(bool),
121}
122
123impl GroupKey {
124    /// Construct from a column at a given row.
125    pub fn from_column(col: &Column, row: usize) -> Self {
126        match col {
127            Column::Int(v) => GroupKey::Int(v[row]),
128            Column::Float(v) => GroupKey::Float(FloatKey(v[row])),
129            Column::Str(v) => GroupKey::Str(v[row].clone()),
130            Column::Bool(v) => GroupKey::Bool(v[row]),
131        }
132    }
133
134    /// Display as string.
135    pub fn to_display(&self) -> String {
136        match self {
137            GroupKey::Int(v) => format!("{}", v),
138            GroupKey::Float(FloatKey(v)) => format!("{}", v),
139            GroupKey::Str(s) => s.clone(),
140            GroupKey::Bool(b) => format!("{}", b),
141        }
142    }
143}
144
145/// Borrowed group key — zero-copy reference into column data.
146/// Used for BTreeMap lookups without per-row allocation.
147#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
148pub enum ColumnKeyRef<'a> {
149    Int(i64),
150    Float(FloatKey),
151    Str(&'a str),
152    Bool(bool),
153}
154
155impl<'a> ColumnKeyRef<'a> {
156    /// Borrow a key from a column at a given row — zero allocation.
157    #[inline]
158    pub fn from_column(col: &'a Column, row: usize) -> Self {
159        match col {
160            Column::Int(v) => ColumnKeyRef::Int(v[row]),
161            Column::Float(v) => ColumnKeyRef::Float(FloatKey(v[row])),
162            Column::Str(v) => ColumnKeyRef::Str(&v[row]),
163            Column::Bool(v) => ColumnKeyRef::Bool(v[row]),
164        }
165    }
166
167    /// Convert to owned GroupKey (clones string if Str variant).
168    pub fn to_owned_key(&self) -> GroupKey {
169        match self {
170            ColumnKeyRef::Int(v) => GroupKey::Int(*v),
171            ColumnKeyRef::Float(v) => GroupKey::Float(*v),
172            ColumnKeyRef::Str(s) => GroupKey::Str((*s).to_string()),
173            ColumnKeyRef::Bool(v) => GroupKey::Bool(*v),
174        }
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn test_column_len() {
184        let col = Column::Int(vec![1, 2, 3]);
185        assert_eq!(col.len(), 3);
186    }
187
188    #[test]
189    fn test_column_gather() {
190        let col = Column::Str(vec!["a".into(), "b".into(), "c".into(), "d".into()]);
191        let gathered = col.gather(&[0, 2, 3]);
192        if let Column::Str(v) = gathered {
193            assert_eq!(v, vec!["a", "c", "d"]);
194        } else {
195            panic!("wrong type");
196        }
197    }
198
199    #[test]
200    fn test_float_key_nan_ordering() {
201        let a = FloatKey(f64::NAN);
202        let b = FloatKey(1.0);
203        // NaN should sort consistently (not panic)
204        let _ = a.cmp(&b);
205    }
206
207    #[test]
208    fn test_column_key_ref_zero_copy() {
209        let col = Column::Str(vec!["hello".into(), "world".into()]);
210        let key = ColumnKeyRef::from_column(&col, 0);
211        // key borrows from col — no allocation
212        assert_eq!(key, ColumnKeyRef::Str("hello"));
213    }
214}