vibesql_executor/select/columnar/batch/storage.rs
1//! Storage layer conversion
2//!
3//! This module contains methods for converting between the storage layer's
4//! ColumnarTable and the executor's ColumnarBatch.
5
6use std::sync::Arc;
7
8use vibesql_types::SqlValue;
9
10use super::types::{ColumnArray, ColumnarBatch};
11use crate::errors::ExecutorError;
12
13impl ColumnarBatch {
14 /// Convert from storage layer ColumnarTable to executor ColumnarBatch
15 ///
16 /// This method provides **true zero-copy** conversion from the storage layer's
17 /// columnar format to the executor's columnar format. This is the key integration
18 /// point for native columnar table scans.
19 ///
20 /// # Performance
21 ///
22 /// - **O(1) for numeric/string columns**: Arc::clone is just a reference count bump
23 /// - **< 1 microsecond** for millions of rows (vs O(n) with data copy)
24 /// - Directly shares storage ColumnData with executor ColumnArray
25 /// - Critical path for TPC-H Q6 and other analytical queries
26 ///
27 /// # Zero-Copy Design
28 ///
29 /// Both `vibesql_storage::ColumnData` and executor `ColumnArray` use `Arc<Vec<T>>`
30 /// for column data. Calling `Arc::clone()` only increments a reference count,
31 /// avoiding any data copying:
32 ///
33 /// ```text
34 /// Storage: Arc<Vec<i64>> ─┬─> [1, 2, 3, 4, ...] (shared memory)
35 /// │
36 /// Executor: Arc<Vec<i64>> ┘
37 /// ```
38 ///
39 /// # Arguments
40 ///
41 /// * `storage_columnar` - ColumnarTable from storage layer (vibesql-storage)
42 ///
43 /// # Returns
44 ///
45 /// * `Ok(ColumnarBatch)` - Executor-ready columnar batch with shared Arc references
46 /// * `Err(ExecutorError)` - If type conversion fails
47 pub fn from_storage_columnar(
48 storage_columnar: &vibesql_storage::ColumnarTable,
49 ) -> Result<Self, ExecutorError> {
50 use vibesql_storage::ColumnData;
51
52 let column_names = storage_columnar.column_names().to_vec();
53 let row_count = storage_columnar.row_count();
54
55 // Handle empty tables: return an empty batch with column names but no data
56 // This happens when ColumnarTable::from_rows is called with empty rows -
57 // the column_names are preserved but the columns HashMap is empty
58 if row_count == 0 {
59 return Ok(Self {
60 row_count: 0,
61 columns: Vec::new(),
62 column_names: Some(column_names),
63 });
64 }
65
66 let mut columns = Vec::with_capacity(column_names.len());
67
68 for col_name in column_names.iter() {
69 let storage_col = storage_columnar.get_column(col_name).ok_or_else(|| {
70 ExecutorError::ColumnarColumnNotFoundByName { column_name: col_name.to_string() }
71 })?;
72
73 let column_array =
74 match storage_col {
75 ColumnData::Int64 { values, nulls } => {
76 // Zero-copy: Arc::clone is O(1) - just bumps reference count
77 let null_bitmap =
78 if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
79 ColumnArray::Int64(Arc::clone(values), null_bitmap)
80 }
81 ColumnData::Float64 { values, nulls } => {
82 // Zero-copy: Arc::clone is O(1)
83 let null_bitmap =
84 if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
85 ColumnArray::Float64(Arc::clone(values), null_bitmap)
86 }
87 ColumnData::String { values, nulls } => {
88 // Zero-copy: Arc::clone is O(1)
89 let null_bitmap =
90 if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
91 ColumnArray::String(Arc::clone(values), null_bitmap)
92 }
93 ColumnData::Bool { values, nulls } => {
94 // Convert bool to u8 for SIMD compatibility (requires iteration)
95 let u8_values: Vec<u8> =
96 values.iter().map(|&b| if b { 1 } else { 0 }).collect();
97 let null_bitmap =
98 if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
99 ColumnArray::Boolean(Arc::new(u8_values), null_bitmap)
100 }
101 ColumnData::Date { values, nulls } => {
102 // Convert Date to i32 (days since Unix epoch 1970-01-01)
103 // Must use the same formula as simd_filter.rs:date_to_days_since_epoch
104 // for predicate evaluation to work correctly
105 let i32_values: Vec<i32> =
106 values.iter().map(date_to_days_since_epoch).collect();
107 let null_bitmap =
108 if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
109 ColumnArray::Date(Arc::new(i32_values), null_bitmap)
110 }
111 ColumnData::Timestamp { values, nulls } => {
112 // Convert Timestamp to Mixed (fallback - no direct i64 conversion)
113 let sql_values: Vec<SqlValue> = values
114 .iter()
115 .zip(nulls.iter())
116 .map(
117 |(t, &is_null)| {
118 if is_null {
119 SqlValue::Null
120 } else {
121 SqlValue::Timestamp(*t)
122 }
123 },
124 )
125 .collect();
126 ColumnArray::Mixed(Arc::new(sql_values))
127 }
128 ColumnData::Time { values, nulls } => {
129 // Convert Time to Mixed (fallback - Time doesn't have direct i64 conversion)
130 let sql_values: Vec<SqlValue> = values
131 .iter()
132 .zip(nulls.iter())
133 .map(
134 |(t, &is_null)| {
135 if is_null {
136 SqlValue::Null
137 } else {
138 SqlValue::Time(*t)
139 }
140 },
141 )
142 .collect();
143 ColumnArray::Mixed(Arc::new(sql_values))
144 }
145 ColumnData::Interval { values, nulls } => {
146 // Convert Interval to Mixed (fallback)
147 let sql_values: Vec<SqlValue> = values
148 .iter()
149 .zip(nulls.iter())
150 .map(|(i, &is_null)| {
151 if is_null {
152 SqlValue::Null
153 } else {
154 SqlValue::Interval(i.clone())
155 }
156 })
157 .collect();
158 ColumnArray::Mixed(Arc::new(sql_values))
159 }
160 ColumnData::Vector { values, nulls } => {
161 // Convert Vector to Mixed (fallback)
162 let sql_values: Vec<SqlValue> =
163 values
164 .iter()
165 .zip(nulls.iter())
166 .map(|(v, &is_null)| {
167 if is_null {
168 SqlValue::Null
169 } else {
170 SqlValue::Vector(v.clone())
171 }
172 })
173 .collect();
174 ColumnArray::Mixed(Arc::new(sql_values))
175 }
176 ColumnData::Blob { values, nulls } => {
177 // Convert Blob to Mixed (fallback)
178 let sql_values: Vec<SqlValue> =
179 values
180 .iter()
181 .zip(nulls.iter())
182 .map(|(b, &is_null)| {
183 if is_null {
184 SqlValue::Null
185 } else {
186 SqlValue::Blob(b.clone())
187 }
188 })
189 .collect();
190 ColumnArray::Mixed(Arc::new(sql_values))
191 }
192 };
193
194 columns.push(column_array);
195 }
196
197 Ok(Self { row_count, columns, column_names: Some(column_names) })
198 }
199}
200
201/// Convert Date to days since Unix epoch (1970-01-01)
202///
203/// This function MUST be kept in sync with simd_filter.rs::date_to_days_since_epoch()
204/// to ensure predicates compare dates correctly.
205fn date_to_days_since_epoch(date: &vibesql_types::Date) -> i32 {
206 // Accurate days since Unix epoch calculation with leap year handling
207 let year_days = (date.year - 1970) * 365;
208 let leap_years =
209 ((date.year - 1969) / 4) - ((date.year - 1901) / 100) + ((date.year - 1601) / 400);
210 let month_days: i32 = match date.month {
211 1 => 0,
212 2 => 31,
213 3 => 59,
214 4 => 90,
215 5 => 120,
216 6 => 151,
217 7 => 181,
218 8 => 212,
219 9 => 243,
220 10 => 273,
221 11 => 304,
222 12 => 334,
223 _ => 0,
224 };
225
226 // Add leap day if after February in a leap year
227 let is_leap = date.year % 4 == 0 && (date.year % 100 != 0 || date.year % 400 == 0);
228 let leap_adjustment = if is_leap && date.month > 2 { 1 } else { 0 };
229
230 year_days + leap_years + month_days + date.day as i32 - 1 + leap_adjustment
231}
232
233#[cfg(test)]
234mod tests {
235 use vibesql_storage::Row;
236
237 use super::*;
238
239 #[test]
240 fn test_from_storage_columnar() {
241 // Create storage-layer columnar table
242 let rows = vec![
243 Row::new(vec![
244 SqlValue::Integer(1),
245 SqlValue::Double(10.5),
246 SqlValue::Varchar(arcstr::ArcStr::from("Alice")),
247 ]),
248 Row::new(vec![
249 SqlValue::Integer(2),
250 SqlValue::Double(20.5),
251 SqlValue::Varchar(arcstr::ArcStr::from("Bob")),
252 ]),
253 Row::new(vec![
254 SqlValue::Integer(3),
255 SqlValue::Double(30.5),
256 SqlValue::Varchar(arcstr::ArcStr::from("Charlie")),
257 ]),
258 ];
259 let column_names = vec!["id".to_string(), "value".to_string(), "name".to_string()];
260 let storage_columnar =
261 vibesql_storage::ColumnarTable::from_rows(&rows, &column_names).unwrap();
262
263 // Convert to executor ColumnarBatch
264 let batch = ColumnarBatch::from_storage_columnar(&storage_columnar).unwrap();
265
266 // Verify structure
267 assert_eq!(batch.row_count(), 3);
268 assert_eq!(batch.column_count(), 3);
269
270 // Verify column names
271 let names = batch.column_names().unwrap();
272 assert_eq!(names, &["id", "value", "name"]);
273
274 // Verify Int64 column
275 let col0 = batch.column(0).unwrap();
276 if let Some((values, nulls)) = col0.as_i64() {
277 assert_eq!(values, &[1, 2, 3]);
278 assert!(nulls.is_none());
279 } else {
280 panic!("Expected i64 column");
281 }
282
283 // Verify Float64 column
284 let col1 = batch.column(1).unwrap();
285 if let Some((values, nulls)) = col1.as_f64() {
286 assert_eq!(values, &[10.5, 20.5, 30.5]);
287 assert!(nulls.is_none());
288 } else {
289 panic!("Expected f64 column");
290 }
291
292 // Verify String column
293 let col2 = batch.column(2).unwrap();
294 if let ColumnArray::String(values, nulls) = col2 {
295 let str_refs: Vec<&str> = values.iter().map(|s| s.as_ref()).collect();
296 assert_eq!(str_refs, vec!["Alice", "Bob", "Charlie"]);
297 assert!(nulls.is_none());
298 } else {
299 panic!("Expected String column");
300 }
301 }
302
303 #[test]
304 fn test_from_storage_columnar_with_nulls() {
305 // Create storage-layer columnar table with NULLs
306 let rows = vec![
307 Row::new(vec![SqlValue::Integer(1), SqlValue::Double(10.0)]),
308 Row::new(vec![SqlValue::Null, SqlValue::Double(20.0)]),
309 Row::new(vec![SqlValue::Integer(3), SqlValue::Null]),
310 ];
311 let column_names = vec!["id".to_string(), "value".to_string()];
312 let storage_columnar =
313 vibesql_storage::ColumnarTable::from_rows(&rows, &column_names).unwrap();
314
315 // Convert to executor ColumnarBatch
316 let batch = ColumnarBatch::from_storage_columnar(&storage_columnar).unwrap();
317
318 // Verify Int64 column with NULL
319 let col0 = batch.column(0).unwrap();
320 if let Some((values, Some(nulls))) = col0.as_i64() {
321 assert_eq!(values.len(), 3);
322 assert_eq!(nulls, &[false, true, false]);
323 } else {
324 panic!("Expected i64 column with nulls");
325 }
326
327 // Verify Float64 column with NULL
328 let col1 = batch.column(1).unwrap();
329 if let Some((values, Some(nulls))) = col1.as_f64() {
330 assert_eq!(values.len(), 3);
331 assert_eq!(nulls, &[false, false, true]);
332 } else {
333 panic!("Expected f64 column with nulls");
334 }
335 }
336}