vibesql_executor/select/columnar/batch/
storage.rs

1//! Storage layer conversion
2//!
3//! This module contains methods for converting between the storage layer's
4//! ColumnarTable and the executor's ColumnarBatch.
5
6use std::sync::Arc;
7
8use crate::errors::ExecutorError;
9use vibesql_types::SqlValue;
10
11use super::types::{ColumnArray, ColumnarBatch};
12
13impl ColumnarBatch {
14    /// Convert from storage layer ColumnarTable to executor ColumnarBatch
15    ///
16    /// This method provides **true zero-copy** conversion from the storage layer's
17    /// columnar format to the executor's columnar format. This is the key integration
18    /// point for native columnar table scans.
19    ///
20    /// # Performance
21    ///
22    /// - **O(1) for numeric/string columns**: Arc::clone is just a reference count bump
23    /// - **< 1 microsecond** for millions of rows (vs O(n) with data copy)
24    /// - Directly shares storage ColumnData with executor ColumnArray
25    /// - Critical path for TPC-H Q6 and other analytical queries
26    ///
27    /// # Zero-Copy Design
28    ///
29    /// Both `vibesql_storage::ColumnData` and executor `ColumnArray` use `Arc<Vec<T>>`
30    /// for column data. Calling `Arc::clone()` only increments a reference count,
31    /// avoiding any data copying:
32    ///
33    /// ```text
34    /// Storage: Arc<Vec<i64>> ─┬─> [1, 2, 3, 4, ...]  (shared memory)
35    ///                         │
36    /// Executor: Arc<Vec<i64>> ┘
37    /// ```
38    ///
39    /// # Arguments
40    ///
41    /// * `storage_columnar` - ColumnarTable from storage layer (vibesql-storage)
42    ///
43    /// # Returns
44    ///
45    /// * `Ok(ColumnarBatch)` - Executor-ready columnar batch with shared Arc references
46    /// * `Err(ExecutorError)` - If type conversion fails
47    pub fn from_storage_columnar(
48        storage_columnar: &vibesql_storage::ColumnarTable,
49    ) -> Result<Self, ExecutorError> {
50        use vibesql_storage::ColumnData;
51
52        let column_names = storage_columnar.column_names().to_vec();
53        let row_count = storage_columnar.row_count();
54
55        // Handle empty tables: return an empty batch with column names but no data
56        // This happens when ColumnarTable::from_rows is called with empty rows -
57        // the column_names are preserved but the columns HashMap is empty
58        if row_count == 0 {
59            return Ok(Self {
60                row_count: 0,
61                columns: Vec::new(),
62                column_names: Some(column_names),
63            });
64        }
65
66        let mut columns = Vec::with_capacity(column_names.len());
67
68        for col_name in column_names.iter() {
69            let storage_col = storage_columnar.get_column(col_name).ok_or_else(|| {
70                ExecutorError::ColumnarColumnNotFoundByName {
71                    column_name: col_name.clone(),
72                }
73            })?;
74
75            let column_array = match storage_col {
76                ColumnData::Int64 { values, nulls } => {
77                    // Zero-copy: Arc::clone is O(1) - just bumps reference count
78                    let null_bitmap = if nulls.iter().any(|&n| n) {
79                        Some(Arc::clone(nulls))
80                    } else {
81                        None
82                    };
83                    ColumnArray::Int64(Arc::clone(values), null_bitmap)
84                }
85                ColumnData::Float64 { values, nulls } => {
86                    // Zero-copy: Arc::clone is O(1)
87                    let null_bitmap = if nulls.iter().any(|&n| n) {
88                        Some(Arc::clone(nulls))
89                    } else {
90                        None
91                    };
92                    ColumnArray::Float64(Arc::clone(values), null_bitmap)
93                }
94                ColumnData::String { values, nulls } => {
95                    // Zero-copy: Arc::clone is O(1)
96                    let null_bitmap = if nulls.iter().any(|&n| n) {
97                        Some(Arc::clone(nulls))
98                    } else {
99                        None
100                    };
101                    ColumnArray::String(Arc::clone(values), null_bitmap)
102                }
103                ColumnData::Bool { values, nulls } => {
104                    // Convert bool to u8 for SIMD compatibility (requires iteration)
105                    let u8_values: Vec<u8> = values.iter().map(|&b| if b { 1 } else { 0 }).collect();
106                    let null_bitmap = if nulls.iter().any(|&n| n) {
107                        Some(Arc::clone(nulls))
108                    } else {
109                        None
110                    };
111                    ColumnArray::Boolean(Arc::new(u8_values), null_bitmap)
112                }
113                ColumnData::Date { values, nulls } => {
114                    // Convert Date to i32 (days since Unix epoch 1970-01-01)
115                    // Must use the same formula as simd_filter.rs:date_to_days_since_epoch
116                    // for predicate evaluation to work correctly
117                    let i32_values: Vec<i32> =
118                        values.iter().map(date_to_days_since_epoch).collect();
119                    let null_bitmap = if nulls.iter().any(|&n| n) {
120                        Some(Arc::clone(nulls))
121                    } else {
122                        None
123                    };
124                    ColumnArray::Date(Arc::new(i32_values), null_bitmap)
125                }
126                ColumnData::Timestamp { values, nulls } => {
127                    // Convert Timestamp to Mixed (fallback - no direct i64 conversion)
128                    let sql_values: Vec<SqlValue> = values
129                        .iter()
130                        .zip(nulls.iter())
131                        .map(|(t, &is_null)| {
132                            if is_null {
133                                SqlValue::Null
134                            } else {
135                                SqlValue::Timestamp(*t)
136                            }
137                        })
138                        .collect();
139                    ColumnArray::Mixed(Arc::new(sql_values))
140                }
141                ColumnData::Time { values, nulls } => {
142                    // Convert Time to Mixed (fallback - Time doesn't have direct i64 conversion)
143                    let sql_values: Vec<SqlValue> = values
144                        .iter()
145                        .zip(nulls.iter())
146                        .map(|(t, &is_null)| {
147                            if is_null {
148                                SqlValue::Null
149                            } else {
150                                SqlValue::Time(*t)
151                            }
152                        })
153                        .collect();
154                    ColumnArray::Mixed(Arc::new(sql_values))
155                }
156                ColumnData::Interval { values, nulls } => {
157                    // Convert Interval to Mixed (fallback)
158                    let sql_values: Vec<SqlValue> = values
159                        .iter()
160                        .zip(nulls.iter())
161                        .map(|(i, &is_null)| {
162                            if is_null {
163                                SqlValue::Null
164                            } else {
165                                SqlValue::Interval(i.clone())
166                            }
167                        })
168                        .collect();
169                    ColumnArray::Mixed(Arc::new(sql_values))
170                }
171                ColumnData::Vector { values, nulls } => {
172                    // Convert Vector to Mixed (fallback)
173                    let sql_values: Vec<SqlValue> = values
174                        .iter()
175                        .zip(nulls.iter())
176                        .map(|(v, &is_null)| {
177                            if is_null {
178                                SqlValue::Null
179                            } else {
180                                SqlValue::Vector(v.clone())
181                            }
182                        })
183                        .collect();
184                    ColumnArray::Mixed(Arc::new(sql_values))
185                }
186            };
187
188            columns.push(column_array);
189        }
190
191        Ok(Self {
192            row_count,
193            columns,
194            column_names: Some(column_names),
195        })
196    }
197}
198
199/// Convert Date to days since Unix epoch (1970-01-01)
200///
201/// This function MUST be kept in sync with simd_filter.rs::date_to_days_since_epoch()
202/// to ensure predicates compare dates correctly.
203fn date_to_days_since_epoch(date: &vibesql_types::Date) -> i32 {
204    // Accurate days since Unix epoch calculation with leap year handling
205    let year_days = (date.year - 1970) * 365;
206    let leap_years =
207        ((date.year - 1969) / 4) - ((date.year - 1901) / 100) + ((date.year - 1601) / 400);
208    let month_days: i32 = match date.month {
209        1 => 0,
210        2 => 31,
211        3 => 59,
212        4 => 90,
213        5 => 120,
214        6 => 151,
215        7 => 181,
216        8 => 212,
217        9 => 243,
218        10 => 273,
219        11 => 304,
220        12 => 334,
221        _ => 0,
222    };
223
224    // Add leap day if after February in a leap year
225    let is_leap = date.year % 4 == 0 && (date.year % 100 != 0 || date.year % 400 == 0);
226    let leap_adjustment = if is_leap && date.month > 2 { 1 } else { 0 };
227
228    year_days + leap_years + month_days + date.day as i32 - 1 + leap_adjustment
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234    use vibesql_storage::Row;
235
236    #[test]
237    fn test_from_storage_columnar() {
238        // Create storage-layer columnar table
239        let rows = vec![
240            Row::new(vec![
241                SqlValue::Integer(1),
242                SqlValue::Double(10.5),
243                SqlValue::Varchar("Alice".to_string()),
244            ]),
245            Row::new(vec![
246                SqlValue::Integer(2),
247                SqlValue::Double(20.5),
248                SqlValue::Varchar("Bob".to_string()),
249            ]),
250            Row::new(vec![
251                SqlValue::Integer(3),
252                SqlValue::Double(30.5),
253                SqlValue::Varchar("Charlie".to_string()),
254            ]),
255        ];
256        let column_names = vec!["id".to_string(), "value".to_string(), "name".to_string()];
257        let storage_columnar =
258            vibesql_storage::ColumnarTable::from_rows(&rows, &column_names).unwrap();
259
260        // Convert to executor ColumnarBatch
261        let batch = ColumnarBatch::from_storage_columnar(&storage_columnar).unwrap();
262
263        // Verify structure
264        assert_eq!(batch.row_count(), 3);
265        assert_eq!(batch.column_count(), 3);
266
267        // Verify column names
268        let names = batch.column_names().unwrap();
269        assert_eq!(names, &["id", "value", "name"]);
270
271        // Verify Int64 column
272        let col0 = batch.column(0).unwrap();
273        if let Some((values, nulls)) = col0.as_i64() {
274            assert_eq!(values, &[1, 2, 3]);
275            assert!(nulls.is_none());
276        } else {
277            panic!("Expected i64 column");
278        }
279
280        // Verify Float64 column
281        let col1 = batch.column(1).unwrap();
282        if let Some((values, nulls)) = col1.as_f64() {
283            assert_eq!(values, &[10.5, 20.5, 30.5]);
284            assert!(nulls.is_none());
285        } else {
286            panic!("Expected f64 column");
287        }
288
289        // Verify String column
290        let col2 = batch.column(2).unwrap();
291        if let ColumnArray::String(values, nulls) = col2 {
292            assert_eq!(values.as_slice(), &["Alice", "Bob", "Charlie"]);
293            assert!(nulls.is_none());
294        } else {
295            panic!("Expected String column");
296        }
297    }
298
299    #[test]
300    fn test_from_storage_columnar_with_nulls() {
301        // Create storage-layer columnar table with NULLs
302        let rows = vec![
303            Row::new(vec![SqlValue::Integer(1), SqlValue::Double(10.0)]),
304            Row::new(vec![SqlValue::Null, SqlValue::Double(20.0)]),
305            Row::new(vec![SqlValue::Integer(3), SqlValue::Null]),
306        ];
307        let column_names = vec!["id".to_string(), "value".to_string()];
308        let storage_columnar =
309            vibesql_storage::ColumnarTable::from_rows(&rows, &column_names).unwrap();
310
311        // Convert to executor ColumnarBatch
312        let batch = ColumnarBatch::from_storage_columnar(&storage_columnar).unwrap();
313
314        // Verify Int64 column with NULL
315        let col0 = batch.column(0).unwrap();
316        if let Some((values, Some(nulls))) = col0.as_i64() {
317            assert_eq!(values.len(), 3);
318            assert_eq!(nulls, &[false, true, false]);
319        } else {
320            panic!("Expected i64 column with nulls");
321        }
322
323        // Verify Float64 column with NULL
324        let col1 = batch.column(1).unwrap();
325        if let Some((values, Some(nulls))) = col1.as_f64() {
326            assert_eq!(values.len(), 3);
327            assert_eq!(nulls, &[false, false, true]);
328        } else {
329            panic!("Expected f64 column with nulls");
330        }
331    }
332}