vibesql_executor/select/columnar/batch/storage.rs
1//! Storage layer conversion
2//!
3//! This module contains methods for converting between the storage layer's
4//! ColumnarTable and the executor's ColumnarBatch.
5
6use std::sync::Arc;
7
8use crate::errors::ExecutorError;
9use vibesql_types::SqlValue;
10
11use super::types::{ColumnArray, ColumnarBatch};
12
13impl ColumnarBatch {
14 /// Convert from storage layer ColumnarTable to executor ColumnarBatch
15 ///
16 /// This method provides **true zero-copy** conversion from the storage layer's
17 /// columnar format to the executor's columnar format. This is the key integration
18 /// point for native columnar table scans.
19 ///
20 /// # Performance
21 ///
22 /// - **O(1) for numeric/string columns**: Arc::clone is just a reference count bump
23 /// - **< 1 microsecond** for millions of rows (vs O(n) with data copy)
24 /// - Directly shares storage ColumnData with executor ColumnArray
25 /// - Critical path for TPC-H Q6 and other analytical queries
26 ///
27 /// # Zero-Copy Design
28 ///
29 /// Both `vibesql_storage::ColumnData` and executor `ColumnArray` use `Arc<Vec<T>>`
30 /// for column data. Calling `Arc::clone()` only increments a reference count,
31 /// avoiding any data copying:
32 ///
33 /// ```text
34 /// Storage: Arc<Vec<i64>> ─┬─> [1, 2, 3, 4, ...] (shared memory)
35 /// │
36 /// Executor: Arc<Vec<i64>> ┘
37 /// ```
38 ///
39 /// # Arguments
40 ///
41 /// * `storage_columnar` - ColumnarTable from storage layer (vibesql-storage)
42 ///
43 /// # Returns
44 ///
45 /// * `Ok(ColumnarBatch)` - Executor-ready columnar batch with shared Arc references
46 /// * `Err(ExecutorError)` - If type conversion fails
47 pub fn from_storage_columnar(
48 storage_columnar: &vibesql_storage::ColumnarTable,
49 ) -> Result<Self, ExecutorError> {
50 use vibesql_storage::ColumnData;
51
52 let column_names = storage_columnar.column_names().to_vec();
53 let row_count = storage_columnar.row_count();
54
55 // Handle empty tables: return an empty batch with column names but no data
56 // This happens when ColumnarTable::from_rows is called with empty rows -
57 // the column_names are preserved but the columns HashMap is empty
58 if row_count == 0 {
59 return Ok(Self {
60 row_count: 0,
61 columns: Vec::new(),
62 column_names: Some(column_names),
63 });
64 }
65
66 let mut columns = Vec::with_capacity(column_names.len());
67
68 for col_name in column_names.iter() {
69 let storage_col = storage_columnar.get_column(col_name).ok_or_else(|| {
70 ExecutorError::ColumnarColumnNotFoundByName { column_name: col_name.clone() }
71 })?;
72
73 let column_array =
74 match storage_col {
75 ColumnData::Int64 { values, nulls } => {
76 // Zero-copy: Arc::clone is O(1) - just bumps reference count
77 let null_bitmap =
78 if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
79 ColumnArray::Int64(Arc::clone(values), null_bitmap)
80 }
81 ColumnData::Float64 { values, nulls } => {
82 // Zero-copy: Arc::clone is O(1)
83 let null_bitmap =
84 if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
85 ColumnArray::Float64(Arc::clone(values), null_bitmap)
86 }
87 ColumnData::String { values, nulls } => {
88 // Zero-copy: Arc::clone is O(1)
89 let null_bitmap =
90 if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
91 ColumnArray::String(Arc::clone(values), null_bitmap)
92 }
93 ColumnData::Bool { values, nulls } => {
94 // Convert bool to u8 for SIMD compatibility (requires iteration)
95 let u8_values: Vec<u8> =
96 values.iter().map(|&b| if b { 1 } else { 0 }).collect();
97 let null_bitmap =
98 if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
99 ColumnArray::Boolean(Arc::new(u8_values), null_bitmap)
100 }
101 ColumnData::Date { values, nulls } => {
102 // Convert Date to i32 (days since Unix epoch 1970-01-01)
103 // Must use the same formula as simd_filter.rs:date_to_days_since_epoch
104 // for predicate evaluation to work correctly
105 let i32_values: Vec<i32> =
106 values.iter().map(date_to_days_since_epoch).collect();
107 let null_bitmap =
108 if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
109 ColumnArray::Date(Arc::new(i32_values), null_bitmap)
110 }
111 ColumnData::Timestamp { values, nulls } => {
112 // Convert Timestamp to Mixed (fallback - no direct i64 conversion)
113 let sql_values: Vec<SqlValue> = values
114 .iter()
115 .zip(nulls.iter())
116 .map(
117 |(t, &is_null)| {
118 if is_null {
119 SqlValue::Null
120 } else {
121 SqlValue::Timestamp(*t)
122 }
123 },
124 )
125 .collect();
126 ColumnArray::Mixed(Arc::new(sql_values))
127 }
128 ColumnData::Time { values, nulls } => {
129 // Convert Time to Mixed (fallback - Time doesn't have direct i64 conversion)
130 let sql_values: Vec<SqlValue> = values
131 .iter()
132 .zip(nulls.iter())
133 .map(
134 |(t, &is_null)| {
135 if is_null {
136 SqlValue::Null
137 } else {
138 SqlValue::Time(*t)
139 }
140 },
141 )
142 .collect();
143 ColumnArray::Mixed(Arc::new(sql_values))
144 }
145 ColumnData::Interval { values, nulls } => {
146 // Convert Interval to Mixed (fallback)
147 let sql_values: Vec<SqlValue> = values
148 .iter()
149 .zip(nulls.iter())
150 .map(|(i, &is_null)| {
151 if is_null {
152 SqlValue::Null
153 } else {
154 SqlValue::Interval(i.clone())
155 }
156 })
157 .collect();
158 ColumnArray::Mixed(Arc::new(sql_values))
159 }
160 ColumnData::Vector { values, nulls } => {
161 // Convert Vector to Mixed (fallback)
162 let sql_values: Vec<SqlValue> =
163 values
164 .iter()
165 .zip(nulls.iter())
166 .map(|(v, &is_null)| {
167 if is_null {
168 SqlValue::Null
169 } else {
170 SqlValue::Vector(v.clone())
171 }
172 })
173 .collect();
174 ColumnArray::Mixed(Arc::new(sql_values))
175 }
176 };
177
178 columns.push(column_array);
179 }
180
181 Ok(Self { row_count, columns, column_names: Some(column_names) })
182 }
183}
184
185/// Convert Date to days since Unix epoch (1970-01-01)
186///
187/// This function MUST be kept in sync with simd_filter.rs::date_to_days_since_epoch()
188/// to ensure predicates compare dates correctly.
189fn date_to_days_since_epoch(date: &vibesql_types::Date) -> i32 {
190 // Accurate days since Unix epoch calculation with leap year handling
191 let year_days = (date.year - 1970) * 365;
192 let leap_years =
193 ((date.year - 1969) / 4) - ((date.year - 1901) / 100) + ((date.year - 1601) / 400);
194 let month_days: i32 = match date.month {
195 1 => 0,
196 2 => 31,
197 3 => 59,
198 4 => 90,
199 5 => 120,
200 6 => 151,
201 7 => 181,
202 8 => 212,
203 9 => 243,
204 10 => 273,
205 11 => 304,
206 12 => 334,
207 _ => 0,
208 };
209
210 // Add leap day if after February in a leap year
211 let is_leap = date.year % 4 == 0 && (date.year % 100 != 0 || date.year % 400 == 0);
212 let leap_adjustment = if is_leap && date.month > 2 { 1 } else { 0 };
213
214 year_days + leap_years + month_days + date.day as i32 - 1 + leap_adjustment
215}
216
217#[cfg(test)]
218mod tests {
219 use super::*;
220 use vibesql_storage::Row;
221
222 #[test]
223 fn test_from_storage_columnar() {
224 // Create storage-layer columnar table
225 let rows = vec![
226 Row::new(vec![
227 SqlValue::Integer(1),
228 SqlValue::Double(10.5),
229 SqlValue::Varchar("Alice".to_string()),
230 ]),
231 Row::new(vec![
232 SqlValue::Integer(2),
233 SqlValue::Double(20.5),
234 SqlValue::Varchar("Bob".to_string()),
235 ]),
236 Row::new(vec![
237 SqlValue::Integer(3),
238 SqlValue::Double(30.5),
239 SqlValue::Varchar("Charlie".to_string()),
240 ]),
241 ];
242 let column_names = vec!["id".to_string(), "value".to_string(), "name".to_string()];
243 let storage_columnar =
244 vibesql_storage::ColumnarTable::from_rows(&rows, &column_names).unwrap();
245
246 // Convert to executor ColumnarBatch
247 let batch = ColumnarBatch::from_storage_columnar(&storage_columnar).unwrap();
248
249 // Verify structure
250 assert_eq!(batch.row_count(), 3);
251 assert_eq!(batch.column_count(), 3);
252
253 // Verify column names
254 let names = batch.column_names().unwrap();
255 assert_eq!(names, &["id", "value", "name"]);
256
257 // Verify Int64 column
258 let col0 = batch.column(0).unwrap();
259 if let Some((values, nulls)) = col0.as_i64() {
260 assert_eq!(values, &[1, 2, 3]);
261 assert!(nulls.is_none());
262 } else {
263 panic!("Expected i64 column");
264 }
265
266 // Verify Float64 column
267 let col1 = batch.column(1).unwrap();
268 if let Some((values, nulls)) = col1.as_f64() {
269 assert_eq!(values, &[10.5, 20.5, 30.5]);
270 assert!(nulls.is_none());
271 } else {
272 panic!("Expected f64 column");
273 }
274
275 // Verify String column
276 let col2 = batch.column(2).unwrap();
277 if let ColumnArray::String(values, nulls) = col2 {
278 assert_eq!(values.as_slice(), &["Alice", "Bob", "Charlie"]);
279 assert!(nulls.is_none());
280 } else {
281 panic!("Expected String column");
282 }
283 }
284
285 #[test]
286 fn test_from_storage_columnar_with_nulls() {
287 // Create storage-layer columnar table with NULLs
288 let rows = vec![
289 Row::new(vec![SqlValue::Integer(1), SqlValue::Double(10.0)]),
290 Row::new(vec![SqlValue::Null, SqlValue::Double(20.0)]),
291 Row::new(vec![SqlValue::Integer(3), SqlValue::Null]),
292 ];
293 let column_names = vec!["id".to_string(), "value".to_string()];
294 let storage_columnar =
295 vibesql_storage::ColumnarTable::from_rows(&rows, &column_names).unwrap();
296
297 // Convert to executor ColumnarBatch
298 let batch = ColumnarBatch::from_storage_columnar(&storage_columnar).unwrap();
299
300 // Verify Int64 column with NULL
301 let col0 = batch.column(0).unwrap();
302 if let Some((values, Some(nulls))) = col0.as_i64() {
303 assert_eq!(values.len(), 3);
304 assert_eq!(nulls, &[false, true, false]);
305 } else {
306 panic!("Expected i64 column with nulls");
307 }
308
309 // Verify Float64 column with NULL
310 let col1 = batch.column(1).unwrap();
311 if let Some((values, Some(nulls))) = col1.as_f64() {
312 assert_eq!(values.len(), 3);
313 assert_eq!(nulls, &[false, false, true]);
314 } else {
315 panic!("Expected f64 column with nulls");
316 }
317 }
318}