vibesql_executor/select/columnar/batch/
builder.rs

1//! Batch construction and building logic
2//!
3//! This module contains methods for creating `ColumnarBatch` instances
4//! from various sources like rows and column arrays.
5
6use std::sync::Arc;
7
8use vibesql_storage::Row;
9use vibesql_types::SqlValue;
10
11use super::types::{ColumnArray, ColumnType, ColumnarBatch};
12use crate::errors::ExecutorError;
13
14impl ColumnarBatch {
15    /// Create a new empty columnar batch
16    pub fn new(column_count: usize) -> Self {
17        Self { row_count: 0, columns: Vec::with_capacity(column_count), column_names: None }
18    }
19
20    /// Create a columnar batch with specified capacity
21    pub fn with_capacity(_row_count: usize, column_count: usize) -> Self {
22        Self { row_count: 0, columns: Vec::with_capacity(column_count), column_names: None }
23    }
24
25    /// Create an empty batch with the specified number of columns
26    pub fn empty(column_count: usize) -> Result<Self, ExecutorError> {
27        Ok(Self {
28            row_count: 0,
29            columns: vec![ColumnArray::Mixed(Arc::new(vec![])); column_count],
30            column_names: None,
31        })
32    }
33
34    /// Create a batch from a list of columns
35    pub fn from_columns(
36        columns: Vec<ColumnArray>,
37        column_names: Option<Vec<String>>,
38    ) -> Result<Self, ExecutorError> {
39        if columns.is_empty() {
40            return Ok(Self { row_count: 0, columns, column_names });
41        }
42
43        // Verify all columns have the same length
44        let row_count = columns[0].len();
45        for (idx, column) in columns.iter().enumerate() {
46            if column.len() != row_count {
47                return Err(ExecutorError::ColumnarLengthMismatch {
48                    context: format!("from_columns (column {})", idx),
49                    expected: row_count,
50                    actual: column.len(),
51                });
52            }
53        }
54
55        Ok(Self { row_count, columns, column_names })
56    }
57
58    /// Convert from row-oriented storage to columnar batch
59    ///
60    /// This analyzes the first row to infer column types, then materializes
61    /// all values into type-specialized column arrays.
62    pub fn from_rows(rows: &[Row]) -> Result<Self, ExecutorError> {
63        if rows.is_empty() {
64            return Ok(Self::new(0));
65        }
66
67        let row_count = rows.len();
68        let column_count = rows[0].len();
69
70        // Infer column types from first row
71        let column_types = Self::infer_column_types(&rows[0]);
72
73        // Create column arrays
74        let mut columns = Vec::with_capacity(column_count);
75
76        for (col_idx, col_type) in column_types.iter().enumerate() {
77            let column = Self::extract_column(rows, col_idx, col_type)?;
78            columns.push(column);
79        }
80
81        Ok(Self { row_count, columns, column_names: None })
82    }
83
84    /// Convert selected columns from row-oriented storage to columnar batch
85    ///
86    /// This is an optimized version of `from_rows` that only extracts the
87    /// specified columns. This is critical for predicate evaluation on wide
88    /// tables where only a few columns are referenced by the WHERE clause.
89    ///
90    /// # Arguments
91    ///
92    /// * `rows` - The rows to convert
93    /// * `column_indices` - Which column indices to extract (must be sorted)
94    ///
95    /// # Returns
96    ///
97    /// A sparse columnar batch where `column(i)` returns the data for
98    /// `column_indices[i]`. The caller must map original column indices
99    /// to batch positions using the `column_indices` array.
100    ///
101    /// # Performance
102    ///
103    /// For a table with 16 columns where only 1 column is needed:
104    /// - `from_rows`: extracts all 16 columns (100% work)
105    /// - `from_rows_selective`: extracts only 1 column (6% work)
106    pub fn from_rows_selective(
107        rows: &[Row],
108        column_indices: &[usize],
109    ) -> Result<Self, ExecutorError> {
110        if rows.is_empty() || column_indices.is_empty() {
111            return Ok(Self::new(0));
112        }
113
114        let row_count = rows.len();
115
116        // Infer column types from first row for only the selected columns
117        let column_types: Vec<ColumnType> = column_indices
118            .iter()
119            .map(|&col_idx| {
120                rows[0].get(col_idx).map(Self::infer_type_from_value).unwrap_or(ColumnType::Mixed)
121            })
122            .collect();
123
124        // Create column arrays for only the selected columns
125        let mut columns = Vec::with_capacity(column_indices.len());
126
127        for (batch_idx, &col_idx) in column_indices.iter().enumerate() {
128            let column = Self::extract_column(rows, col_idx, &column_types[batch_idx])?;
129            columns.push(column);
130        }
131
132        Ok(Self { row_count, columns, column_names: None })
133    }
134
135    /// Extract a single column from rows into a typed array
136    ///
137    /// SQLite allows mixed types in a single column (manifest typing), so this
138    /// function handles type mismatches gracefully by falling back to Mixed type.
139    pub(crate) fn extract_column(
140        rows: &[Row],
141        col_idx: usize,
142        col_type: &ColumnType,
143    ) -> Result<ColumnArray, ExecutorError> {
144        match col_type {
145            ColumnType::Int64 => {
146                let mut values = Vec::with_capacity(rows.len());
147                let mut nulls = Vec::with_capacity(rows.len());
148                let mut has_nulls = false;
149
150                for row in rows {
151                    match row.get(col_idx) {
152                        Some(SqlValue::Integer(v)) => {
153                            values.push(*v);
154                            nulls.push(false);
155                        }
156                        Some(SqlValue::Null) => {
157                            values.push(0); // placeholder
158                            nulls.push(true);
159                            has_nulls = true;
160                        }
161                        Some(_other) => {
162                            // Type mismatch - fall back to Mixed type for SQLite compatibility
163                            return Self::extract_column(rows, col_idx, &ColumnType::Mixed);
164                        }
165                        None => {
166                            values.push(0);
167                            nulls.push(true);
168                            has_nulls = true;
169                        }
170                    }
171                }
172
173                Ok(ColumnArray::Int64(
174                    Arc::new(values),
175                    if has_nulls { Some(Arc::new(nulls)) } else { None },
176                ))
177            }
178
179            ColumnType::Float64 => {
180                let mut values = Vec::with_capacity(rows.len());
181                let mut nulls = Vec::with_capacity(rows.len());
182                let mut has_nulls = false;
183
184                for row in rows {
185                    match row.get(col_idx) {
186                        Some(SqlValue::Double(v)) => {
187                            values.push(*v);
188                            nulls.push(false);
189                        }
190                        Some(SqlValue::Null) => {
191                            values.push(0.0); // placeholder
192                            nulls.push(true);
193                            has_nulls = true;
194                        }
195                        Some(_other) => {
196                            // Type mismatch - fall back to Mixed type for SQLite compatibility
197                            return Self::extract_column(rows, col_idx, &ColumnType::Mixed);
198                        }
199                        None => {
200                            values.push(0.0);
201                            nulls.push(true);
202                            has_nulls = true;
203                        }
204                    }
205                }
206
207                Ok(ColumnArray::Float64(
208                    Arc::new(values),
209                    if has_nulls { Some(Arc::new(nulls)) } else { None },
210                ))
211            }
212
213            ColumnType::String => {
214                let mut values = Vec::with_capacity(rows.len());
215                let mut nulls = Vec::with_capacity(rows.len());
216                let mut has_nulls = false;
217
218                for row in rows {
219                    match row.get(col_idx) {
220                        Some(SqlValue::Varchar(v)) => {
221                            values.push(Arc::from(v.as_str()));
222                            nulls.push(false);
223                        }
224                        Some(SqlValue::Null) => {
225                            values.push(Arc::from("")); // placeholder
226                            nulls.push(true);
227                            has_nulls = true;
228                        }
229                        Some(_other) => {
230                            // Type mismatch - fall back to Mixed type for SQLite compatibility
231                            return Self::extract_column(rows, col_idx, &ColumnType::Mixed);
232                        }
233                        None => {
234                            values.push(Arc::from(""));
235                            nulls.push(true);
236                            has_nulls = true;
237                        }
238                    }
239                }
240
241                Ok(ColumnArray::String(
242                    Arc::new(values),
243                    if has_nulls { Some(Arc::new(nulls)) } else { None },
244                ))
245            }
246
247            ColumnType::Date | ColumnType::Mixed => {
248                // Store dates and mixed types as Mixed (fallback for non-SIMD types)
249                let mut values = Vec::with_capacity(rows.len());
250
251                for row in rows {
252                    let value = row.get(col_idx).cloned().unwrap_or(SqlValue::Null);
253                    values.push(value);
254                }
255
256                Ok(ColumnArray::Mixed(Arc::new(values)))
257            }
258
259            ColumnType::Boolean => {
260                let mut values = Vec::with_capacity(rows.len());
261                let mut nulls = Vec::with_capacity(rows.len());
262                let mut has_nulls = false;
263
264                for row in rows {
265                    match row.get(col_idx) {
266                        Some(SqlValue::Boolean(b)) => {
267                            values.push(if *b { 1 } else { 0 });
268                            nulls.push(false);
269                        }
270                        Some(SqlValue::Null) => {
271                            values.push(0); // placeholder
272                            nulls.push(true);
273                            has_nulls = true;
274                        }
275                        Some(_other) => {
276                            // Type mismatch - fall back to Mixed type for SQLite compatibility
277                            return Self::extract_column(rows, col_idx, &ColumnType::Mixed);
278                        }
279                        None => {
280                            values.push(0);
281                            nulls.push(true);
282                            has_nulls = true;
283                        }
284                    }
285                }
286
287                Ok(ColumnArray::Boolean(
288                    Arc::new(values),
289                    if has_nulls { Some(Arc::new(nulls)) } else { None },
290                ))
291            }
292        }
293    }
294
295    /// Infer column types from the first row
296    pub(crate) fn infer_column_types(first_row: &Row) -> Vec<ColumnType> {
297        let mut types = Vec::with_capacity(first_row.len());
298
299        for i in 0..first_row.len() {
300            let col_type =
301                first_row.get(i).map(Self::infer_type_from_value).unwrap_or(ColumnType::Mixed);
302            types.push(col_type);
303        }
304
305        types
306    }
307
308    /// Infer column type from a single SqlValue
309    fn infer_type_from_value(value: &SqlValue) -> ColumnType {
310        match value {
311            SqlValue::Integer(_) => ColumnType::Int64,
312            SqlValue::Double(_) => ColumnType::Float64,
313            SqlValue::Varchar(_) => ColumnType::String,
314            SqlValue::Date(_) => ColumnType::Date,
315            SqlValue::Boolean(_) => ColumnType::Boolean,
316            _ => ColumnType::Mixed,
317        }
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    #[test]
326    fn test_columnar_batch_creation() {
327        let rows = vec![
328            Row::new(vec![
329                SqlValue::Integer(1),
330                SqlValue::Double(10.5),
331                SqlValue::Varchar(arcstr::ArcStr::from("Alice")),
332            ]),
333            Row::new(vec![
334                SqlValue::Integer(2),
335                SqlValue::Double(20.5),
336                SqlValue::Varchar(arcstr::ArcStr::from("Bob")),
337            ]),
338            Row::new(vec![
339                SqlValue::Integer(3),
340                SqlValue::Double(30.5),
341                SqlValue::Varchar(arcstr::ArcStr::from("Charlie")),
342            ]),
343        ];
344
345        let batch = ColumnarBatch::from_rows(&rows).unwrap();
346
347        assert_eq!(batch.row_count(), 3);
348        assert_eq!(batch.column_count(), 3);
349
350        // Check column 0 (integers)
351        let col0 = batch.column(0).unwrap();
352        if let ColumnArray::Int64(values, nulls) = col0 {
353            assert_eq!(values.as_slice(), &[1, 2, 3]);
354            assert!(nulls.is_none());
355        } else {
356            panic!("Expected Int64 column");
357        }
358
359        // Check column 1 (doubles)
360        let col1 = batch.column(1).unwrap();
361        if let ColumnArray::Float64(values, nulls) = col1 {
362            assert_eq!(values.as_slice(), &[10.5, 20.5, 30.5]);
363            assert!(nulls.is_none());
364        } else {
365            panic!("Expected Float64 column");
366        }
367
368        // Check column 2 (strings)
369        let col2 = batch.column(2).unwrap();
370        if let ColumnArray::String(values, nulls) = col2 {
371            let str_refs: Vec<&str> = values.iter().map(|s| s.as_ref()).collect();
372            assert_eq!(str_refs, vec!["Alice", "Bob", "Charlie"]);
373            assert!(nulls.is_none());
374        } else {
375            panic!("Expected String column");
376        }
377    }
378
379    #[test]
380    fn test_columnar_batch_with_nulls() {
381        let rows = vec![
382            Row::new(vec![SqlValue::Integer(1), SqlValue::Double(10.0)]),
383            Row::new(vec![SqlValue::Null, SqlValue::Double(20.0)]),
384            Row::new(vec![SqlValue::Integer(3), SqlValue::Null]),
385        ];
386
387        let batch = ColumnarBatch::from_rows(&rows).unwrap();
388
389        // Check column 0 (with NULL)
390        let col0 = batch.column(0).unwrap();
391        if let ColumnArray::Int64(values, Some(nulls)) = col0 {
392            assert_eq!(values.len(), 3);
393            assert_eq!(nulls.as_slice(), &[false, true, false]);
394        } else {
395            panic!("Expected Int64 column with nulls");
396        }
397
398        // Check column 1 (with NULL)
399        let col1 = batch.column(1).unwrap();
400        if let ColumnArray::Float64(values, Some(nulls)) = col1 {
401            assert_eq!(values.len(), 3);
402            assert_eq!(nulls.as_slice(), &[false, false, true]);
403        } else {
404            panic!("Expected Float64 column with nulls");
405        }
406    }
407
408    #[test]
409    fn test_columnar_batch_mixed_types() {
410        // SQLite allows mixed types in a single column (manifest typing)
411        // First row has VARCHAR, subsequent rows have INTEGER
412        let rows = vec![
413            Row::new(vec![SqlValue::Varchar(arcstr::ArcStr::from("abc")), SqlValue::Null]),
414            Row::new(vec![SqlValue::Null, SqlValue::Varchar(arcstr::ArcStr::from("xyz"))]),
415            Row::new(vec![SqlValue::Integer(11), SqlValue::Integer(22)]),
416            Row::new(vec![SqlValue::Integer(33), SqlValue::Integer(44)]),
417        ];
418
419        let batch = ColumnarBatch::from_rows(&rows).unwrap();
420
421        assert_eq!(batch.row_count(), 4);
422        assert_eq!(batch.column_count(), 2);
423
424        // Column 0 should be Mixed due to type mismatch (VARCHAR then INTEGER)
425        let col0 = batch.column(0).unwrap();
426        if let ColumnArray::Mixed(values) = col0 {
427            assert_eq!(values.len(), 4);
428            assert_eq!(values[0], SqlValue::Varchar(arcstr::ArcStr::from("abc")));
429            assert_eq!(values[1], SqlValue::Null);
430            assert_eq!(values[2], SqlValue::Integer(11));
431            assert_eq!(values[3], SqlValue::Integer(33));
432        } else {
433            panic!("Expected Mixed column, got {:?}", col0);
434        }
435
436        // Column 1 should also be Mixed (NULL first, then VARCHAR, then INTEGERs)
437        let col1 = batch.column(1).unwrap();
438        if let ColumnArray::Mixed(values) = col1 {
439            assert_eq!(values.len(), 4);
440            assert_eq!(values[0], SqlValue::Null);
441            assert_eq!(values[1], SqlValue::Varchar(arcstr::ArcStr::from("xyz")));
442            assert_eq!(values[2], SqlValue::Integer(22));
443            assert_eq!(values[3], SqlValue::Integer(44));
444        } else {
445            panic!("Expected Mixed column, got {:?}", col1);
446        }
447    }
448
449    #[test]
450    fn test_columnar_batch_mixed_types_int_first() {
451        // Test case where INTEGER comes first, then VARCHAR
452        let rows = vec![
453            Row::new(vec![SqlValue::Integer(11), SqlValue::Integer(22)]),
454            Row::new(vec![SqlValue::Varchar(arcstr::ArcStr::from("abc")), SqlValue::Null]),
455        ];
456
457        let batch = ColumnarBatch::from_rows(&rows).unwrap();
458
459        // Column 0: First row is Integer, second is Varchar -> Mixed
460        let col0 = batch.column(0).unwrap();
461        if let ColumnArray::Mixed(values) = col0 {
462            assert_eq!(values.len(), 2);
463            assert_eq!(values[0], SqlValue::Integer(11));
464            assert_eq!(values[1], SqlValue::Varchar(arcstr::ArcStr::from("abc")));
465        } else {
466            panic!("Expected Mixed column for mixed integer/varchar, got {:?}", col0);
467        }
468    }
469}