vibesql_executor/select/columnar/batch/
storage.rs

1//! Storage layer conversion
2//!
3//! This module contains methods for converting between the storage layer's
4//! ColumnarTable and the executor's ColumnarBatch.
5
6use std::sync::Arc;
7
8use vibesql_types::SqlValue;
9
10use super::types::{ColumnArray, ColumnarBatch};
11use crate::errors::ExecutorError;
12
13impl ColumnarBatch {
14    /// Convert from storage layer ColumnarTable to executor ColumnarBatch
15    ///
16    /// This method provides **true zero-copy** conversion from the storage layer's
17    /// columnar format to the executor's columnar format. This is the key integration
18    /// point for native columnar table scans.
19    ///
20    /// # Performance
21    ///
22    /// - **O(1) for numeric/string columns**: Arc::clone is just a reference count bump
23    /// - **< 1 microsecond** for millions of rows (vs O(n) with data copy)
24    /// - Directly shares storage ColumnData with executor ColumnArray
25    /// - Critical path for TPC-H Q6 and other analytical queries
26    ///
27    /// # Zero-Copy Design
28    ///
29    /// Both `vibesql_storage::ColumnData` and executor `ColumnArray` use `Arc<Vec<T>>`
30    /// for column data. Calling `Arc::clone()` only increments a reference count,
31    /// avoiding any data copying:
32    ///
33    /// ```text
34    /// Storage: Arc<Vec<i64>> ─┬─> [1, 2, 3, 4, ...]  (shared memory)
35    ///                         │
36    /// Executor: Arc<Vec<i64>> ┘
37    /// ```
38    ///
39    /// # Arguments
40    ///
41    /// * `storage_columnar` - ColumnarTable from storage layer (vibesql-storage)
42    ///
43    /// # Returns
44    ///
45    /// * `Ok(ColumnarBatch)` - Executor-ready columnar batch with shared Arc references
46    /// * `Err(ExecutorError)` - If type conversion fails
47    pub fn from_storage_columnar(
48        storage_columnar: &vibesql_storage::ColumnarTable,
49    ) -> Result<Self, ExecutorError> {
50        use vibesql_storage::ColumnData;
51
52        let column_names = storage_columnar.column_names().to_vec();
53        let row_count = storage_columnar.row_count();
54
55        // Handle empty tables: return an empty batch with column names but no data
56        // This happens when ColumnarTable::from_rows is called with empty rows -
57        // the column_names are preserved but the columns HashMap is empty
58        if row_count == 0 {
59            return Ok(Self {
60                row_count: 0,
61                columns: Vec::new(),
62                column_names: Some(column_names),
63            });
64        }
65
66        let mut columns = Vec::with_capacity(column_names.len());
67
68        for col_name in column_names.iter() {
69            let storage_col = storage_columnar.get_column(col_name).ok_or_else(|| {
70                ExecutorError::ColumnarColumnNotFoundByName { column_name: col_name.to_string() }
71            })?;
72
73            let column_array =
74                match storage_col {
75                    ColumnData::Int64 { values, nulls } => {
76                        // Zero-copy: Arc::clone is O(1) - just bumps reference count
77                        let null_bitmap =
78                            if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
79                        ColumnArray::Int64(Arc::clone(values), null_bitmap)
80                    }
81                    ColumnData::Float64 { values, nulls } => {
82                        // Zero-copy: Arc::clone is O(1)
83                        let null_bitmap =
84                            if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
85                        ColumnArray::Float64(Arc::clone(values), null_bitmap)
86                    }
87                    ColumnData::String { values, nulls } => {
88                        // Zero-copy: Arc::clone is O(1)
89                        let null_bitmap =
90                            if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
91                        ColumnArray::String(Arc::clone(values), null_bitmap)
92                    }
93                    ColumnData::Bool { values, nulls } => {
94                        // Convert bool to u8 for SIMD compatibility (requires iteration)
95                        let u8_values: Vec<u8> =
96                            values.iter().map(|&b| if b { 1 } else { 0 }).collect();
97                        let null_bitmap =
98                            if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
99                        ColumnArray::Boolean(Arc::new(u8_values), null_bitmap)
100                    }
101                    ColumnData::Date { values, nulls } => {
102                        // Convert Date to i32 (days since Unix epoch 1970-01-01)
103                        // Must use the same formula as simd_filter.rs:date_to_days_since_epoch
104                        // for predicate evaluation to work correctly
105                        let i32_values: Vec<i32> =
106                            values.iter().map(date_to_days_since_epoch).collect();
107                        let null_bitmap =
108                            if nulls.iter().any(|&n| n) { Some(Arc::clone(nulls)) } else { None };
109                        ColumnArray::Date(Arc::new(i32_values), null_bitmap)
110                    }
111                    ColumnData::Timestamp { values, nulls } => {
112                        // Convert Timestamp to Mixed (fallback - no direct i64 conversion)
113                        let sql_values: Vec<SqlValue> = values
114                            .iter()
115                            .zip(nulls.iter())
116                            .map(
117                                |(t, &is_null)| {
118                                    if is_null {
119                                        SqlValue::Null
120                                    } else {
121                                        SqlValue::Timestamp(*t)
122                                    }
123                                },
124                            )
125                            .collect();
126                        ColumnArray::Mixed(Arc::new(sql_values))
127                    }
128                    ColumnData::Time { values, nulls } => {
129                        // Convert Time to Mixed (fallback - Time doesn't have direct i64 conversion)
130                        let sql_values: Vec<SqlValue> = values
131                            .iter()
132                            .zip(nulls.iter())
133                            .map(
134                                |(t, &is_null)| {
135                                    if is_null {
136                                        SqlValue::Null
137                                    } else {
138                                        SqlValue::Time(*t)
139                                    }
140                                },
141                            )
142                            .collect();
143                        ColumnArray::Mixed(Arc::new(sql_values))
144                    }
145                    ColumnData::Interval { values, nulls } => {
146                        // Convert Interval to Mixed (fallback)
147                        let sql_values: Vec<SqlValue> = values
148                            .iter()
149                            .zip(nulls.iter())
150                            .map(|(i, &is_null)| {
151                                if is_null {
152                                    SqlValue::Null
153                                } else {
154                                    SqlValue::Interval(i.clone())
155                                }
156                            })
157                            .collect();
158                        ColumnArray::Mixed(Arc::new(sql_values))
159                    }
160                    ColumnData::Vector { values, nulls } => {
161                        // Convert Vector to Mixed (fallback)
162                        let sql_values: Vec<SqlValue> =
163                            values
164                                .iter()
165                                .zip(nulls.iter())
166                                .map(|(v, &is_null)| {
167                                    if is_null {
168                                        SqlValue::Null
169                                    } else {
170                                        SqlValue::Vector(v.clone())
171                                    }
172                                })
173                                .collect();
174                        ColumnArray::Mixed(Arc::new(sql_values))
175                    }
176                    ColumnData::Blob { values, nulls } => {
177                        // Convert Blob to Mixed (fallback)
178                        let sql_values: Vec<SqlValue> =
179                            values
180                                .iter()
181                                .zip(nulls.iter())
182                                .map(|(b, &is_null)| {
183                                    if is_null {
184                                        SqlValue::Null
185                                    } else {
186                                        SqlValue::Blob(b.clone())
187                                    }
188                                })
189                                .collect();
190                        ColumnArray::Mixed(Arc::new(sql_values))
191                    }
192                };
193
194            columns.push(column_array);
195        }
196
197        Ok(Self { row_count, columns, column_names: Some(column_names) })
198    }
199}
200
201/// Convert Date to days since Unix epoch (1970-01-01)
202///
203/// This function MUST be kept in sync with simd_filter.rs::date_to_days_since_epoch()
204/// to ensure predicates compare dates correctly.
205fn date_to_days_since_epoch(date: &vibesql_types::Date) -> i32 {
206    // Accurate days since Unix epoch calculation with leap year handling
207    let year_days = (date.year - 1970) * 365;
208    let leap_years =
209        ((date.year - 1969) / 4) - ((date.year - 1901) / 100) + ((date.year - 1601) / 400);
210    let month_days: i32 = match date.month {
211        1 => 0,
212        2 => 31,
213        3 => 59,
214        4 => 90,
215        5 => 120,
216        6 => 151,
217        7 => 181,
218        8 => 212,
219        9 => 243,
220        10 => 273,
221        11 => 304,
222        12 => 334,
223        _ => 0,
224    };
225
226    // Add leap day if after February in a leap year
227    let is_leap = date.year % 4 == 0 && (date.year % 100 != 0 || date.year % 400 == 0);
228    let leap_adjustment = if is_leap && date.month > 2 { 1 } else { 0 };
229
230    year_days + leap_years + month_days + date.day as i32 - 1 + leap_adjustment
231}
232
233#[cfg(test)]
234mod tests {
235    use vibesql_storage::Row;
236
237    use super::*;
238
239    #[test]
240    fn test_from_storage_columnar() {
241        // Create storage-layer columnar table
242        let rows = vec![
243            Row::new(vec![
244                SqlValue::Integer(1),
245                SqlValue::Double(10.5),
246                SqlValue::Varchar(arcstr::ArcStr::from("Alice")),
247            ]),
248            Row::new(vec![
249                SqlValue::Integer(2),
250                SqlValue::Double(20.5),
251                SqlValue::Varchar(arcstr::ArcStr::from("Bob")),
252            ]),
253            Row::new(vec![
254                SqlValue::Integer(3),
255                SqlValue::Double(30.5),
256                SqlValue::Varchar(arcstr::ArcStr::from("Charlie")),
257            ]),
258        ];
259        let column_names = vec!["id".to_string(), "value".to_string(), "name".to_string()];
260        let storage_columnar =
261            vibesql_storage::ColumnarTable::from_rows(&rows, &column_names).unwrap();
262
263        // Convert to executor ColumnarBatch
264        let batch = ColumnarBatch::from_storage_columnar(&storage_columnar).unwrap();
265
266        // Verify structure
267        assert_eq!(batch.row_count(), 3);
268        assert_eq!(batch.column_count(), 3);
269
270        // Verify column names
271        let names = batch.column_names().unwrap();
272        assert_eq!(names, &["id", "value", "name"]);
273
274        // Verify Int64 column
275        let col0 = batch.column(0).unwrap();
276        if let Some((values, nulls)) = col0.as_i64() {
277            assert_eq!(values, &[1, 2, 3]);
278            assert!(nulls.is_none());
279        } else {
280            panic!("Expected i64 column");
281        }
282
283        // Verify Float64 column
284        let col1 = batch.column(1).unwrap();
285        if let Some((values, nulls)) = col1.as_f64() {
286            assert_eq!(values, &[10.5, 20.5, 30.5]);
287            assert!(nulls.is_none());
288        } else {
289            panic!("Expected f64 column");
290        }
291
292        // Verify String column
293        let col2 = batch.column(2).unwrap();
294        if let ColumnArray::String(values, nulls) = col2 {
295            let str_refs: Vec<&str> = values.iter().map(|s| s.as_ref()).collect();
296            assert_eq!(str_refs, vec!["Alice", "Bob", "Charlie"]);
297            assert!(nulls.is_none());
298        } else {
299            panic!("Expected String column");
300        }
301    }
302
303    #[test]
304    fn test_from_storage_columnar_with_nulls() {
305        // Create storage-layer columnar table with NULLs
306        let rows = vec![
307            Row::new(vec![SqlValue::Integer(1), SqlValue::Double(10.0)]),
308            Row::new(vec![SqlValue::Null, SqlValue::Double(20.0)]),
309            Row::new(vec![SqlValue::Integer(3), SqlValue::Null]),
310        ];
311        let column_names = vec!["id".to_string(), "value".to_string()];
312        let storage_columnar =
313            vibesql_storage::ColumnarTable::from_rows(&rows, &column_names).unwrap();
314
315        // Convert to executor ColumnarBatch
316        let batch = ColumnarBatch::from_storage_columnar(&storage_columnar).unwrap();
317
318        // Verify Int64 column with NULL
319        let col0 = batch.column(0).unwrap();
320        if let Some((values, Some(nulls))) = col0.as_i64() {
321            assert_eq!(values.len(), 3);
322            assert_eq!(nulls, &[false, true, false]);
323        } else {
324            panic!("Expected i64 column with nulls");
325        }
326
327        // Verify Float64 column with NULL
328        let col1 = batch.column(1).unwrap();
329        if let Some((values, Some(nulls))) = col1.as_f64() {
330            assert_eq!(values.len(), 3);
331            assert_eq!(nulls, &[false, false, true]);
332        } else {
333            panic!("Expected f64 column with nulls");
334        }
335    }
336}