vibesql_executor/select/columnar/batch/
builder.rs

1//! Batch construction and building logic
2//!
3//! This module contains methods for creating `ColumnarBatch` instances
4//! from various sources like rows and column arrays.
5
6use std::sync::Arc;
7
8use crate::errors::ExecutorError;
9use vibesql_storage::Row;
10use vibesql_types::SqlValue;
11
12use super::types::{ColumnArray, ColumnType, ColumnarBatch};
13
14impl ColumnarBatch {
15    /// Create a new empty columnar batch
16    pub fn new(column_count: usize) -> Self {
17        Self { row_count: 0, columns: Vec::with_capacity(column_count), column_names: None }
18    }
19
20    /// Create a columnar batch with specified capacity
21    pub fn with_capacity(_row_count: usize, column_count: usize) -> Self {
22        Self { row_count: 0, columns: Vec::with_capacity(column_count), column_names: None }
23    }
24
25    /// Create an empty batch with the specified number of columns
26    pub fn empty(column_count: usize) -> Result<Self, ExecutorError> {
27        Ok(Self {
28            row_count: 0,
29            columns: vec![ColumnArray::Mixed(Arc::new(vec![])); column_count],
30            column_names: None,
31        })
32    }
33
34    /// Create a batch from a list of columns
35    pub fn from_columns(
36        columns: Vec<ColumnArray>,
37        column_names: Option<Vec<String>>,
38    ) -> Result<Self, ExecutorError> {
39        if columns.is_empty() {
40            return Ok(Self { row_count: 0, columns, column_names });
41        }
42
43        // Verify all columns have the same length
44        let row_count = columns[0].len();
45        for (idx, column) in columns.iter().enumerate() {
46            if column.len() != row_count {
47                return Err(ExecutorError::ColumnarLengthMismatch {
48                    context: format!("from_columns (column {})", idx),
49                    expected: row_count,
50                    actual: column.len(),
51                });
52            }
53        }
54
55        Ok(Self { row_count, columns, column_names })
56    }
57
58    /// Convert from row-oriented storage to columnar batch
59    ///
60    /// This analyzes the first row to infer column types, then materializes
61    /// all values into type-specialized column arrays.
62    pub fn from_rows(rows: &[Row]) -> Result<Self, ExecutorError> {
63        if rows.is_empty() {
64            return Ok(Self::new(0));
65        }
66
67        let row_count = rows.len();
68        let column_count = rows[0].len();
69
70        // Infer column types from first row
71        let column_types = Self::infer_column_types(&rows[0]);
72
73        // Create column arrays
74        let mut columns = Vec::with_capacity(column_count);
75
76        for (col_idx, col_type) in column_types.iter().enumerate() {
77            let column = Self::extract_column(rows, col_idx, col_type)?;
78            columns.push(column);
79        }
80
81        Ok(Self { row_count, columns, column_names: None })
82    }
83
84    /// Extract a single column from rows into a typed array
85    pub(crate) fn extract_column(
86        rows: &[Row],
87        col_idx: usize,
88        col_type: &ColumnType,
89    ) -> Result<ColumnArray, ExecutorError> {
90        match col_type {
91            ColumnType::Int64 => {
92                let mut values = Vec::with_capacity(rows.len());
93                let mut nulls = Vec::with_capacity(rows.len());
94                let mut has_nulls = false;
95
96                for row in rows {
97                    match row.get(col_idx) {
98                        Some(SqlValue::Integer(v)) => {
99                            values.push(*v);
100                            nulls.push(false);
101                        }
102                        Some(SqlValue::Null) => {
103                            values.push(0); // placeholder
104                            nulls.push(true);
105                            has_nulls = true;
106                        }
107                        Some(other) => {
108                            return Err(ExecutorError::ColumnarTypeMismatch {
109                                operation: "extract_column".to_string(),
110                                left_type: "Integer".to_string(),
111                                right_type: Some(format!("{:?}", other)),
112                            });
113                        }
114                        None => {
115                            values.push(0);
116                            nulls.push(true);
117                            has_nulls = true;
118                        }
119                    }
120                }
121
122                Ok(ColumnArray::Int64(
123                    Arc::new(values),
124                    if has_nulls { Some(Arc::new(nulls)) } else { None },
125                ))
126            }
127
128            ColumnType::Float64 => {
129                let mut values = Vec::with_capacity(rows.len());
130                let mut nulls = Vec::with_capacity(rows.len());
131                let mut has_nulls = false;
132
133                for row in rows {
134                    match row.get(col_idx) {
135                        Some(SqlValue::Double(v)) => {
136                            values.push(*v);
137                            nulls.push(false);
138                        }
139                        Some(SqlValue::Null) => {
140                            values.push(0.0); // placeholder
141                            nulls.push(true);
142                            has_nulls = true;
143                        }
144                        Some(other) => {
145                            return Err(ExecutorError::ColumnarTypeMismatch {
146                                operation: "extract_column".to_string(),
147                                left_type: "Double".to_string(),
148                                right_type: Some(format!("{:?}", other)),
149                            });
150                        }
151                        None => {
152                            values.push(0.0);
153                            nulls.push(true);
154                            has_nulls = true;
155                        }
156                    }
157                }
158
159                Ok(ColumnArray::Float64(
160                    Arc::new(values),
161                    if has_nulls { Some(Arc::new(nulls)) } else { None },
162                ))
163            }
164
165            ColumnType::String => {
166                let mut values = Vec::with_capacity(rows.len());
167                let mut nulls = Vec::with_capacity(rows.len());
168                let mut has_nulls = false;
169
170                for row in rows {
171                    match row.get(col_idx) {
172                        Some(SqlValue::Varchar(v)) => {
173                            values.push(v.clone());
174                            nulls.push(false);
175                        }
176                        Some(SqlValue::Null) => {
177                            values.push(String::new()); // placeholder
178                            nulls.push(true);
179                            has_nulls = true;
180                        }
181                        Some(other) => {
182                            return Err(ExecutorError::ColumnarTypeMismatch {
183                                operation: "extract_column".to_string(),
184                                left_type: "Varchar".to_string(),
185                                right_type: Some(format!("{:?}", other)),
186                            });
187                        }
188                        None => {
189                            values.push(String::new());
190                            nulls.push(true);
191                            has_nulls = true;
192                        }
193                    }
194                }
195
196                Ok(ColumnArray::String(
197                    Arc::new(values),
198                    if has_nulls { Some(Arc::new(nulls)) } else { None },
199                ))
200            }
201
202            ColumnType::Date | ColumnType::Mixed => {
203                // Store dates and mixed types as Mixed (fallback for non-SIMD types)
204                let mut values = Vec::with_capacity(rows.len());
205
206                for row in rows {
207                    let value = row.get(col_idx).cloned().unwrap_or(SqlValue::Null);
208                    values.push(value);
209                }
210
211                Ok(ColumnArray::Mixed(Arc::new(values)))
212            }
213
214            ColumnType::Boolean => {
215                let mut values = Vec::with_capacity(rows.len());
216                let mut nulls = Vec::with_capacity(rows.len());
217                let mut has_nulls = false;
218
219                for row in rows {
220                    match row.get(col_idx) {
221                        Some(SqlValue::Boolean(b)) => {
222                            values.push(if *b { 1 } else { 0 });
223                            nulls.push(false);
224                        }
225                        Some(SqlValue::Null) => {
226                            values.push(0); // placeholder
227                            nulls.push(true);
228                            has_nulls = true;
229                        }
230                        Some(other) => {
231                            return Err(ExecutorError::ColumnarTypeMismatch {
232                                operation: "extract_column".to_string(),
233                                left_type: "Boolean".to_string(),
234                                right_type: Some(format!("{:?}", other)),
235                            });
236                        }
237                        None => {
238                            values.push(0);
239                            nulls.push(true);
240                            has_nulls = true;
241                        }
242                    }
243                }
244
245                Ok(ColumnArray::Boolean(
246                    Arc::new(values),
247                    if has_nulls { Some(Arc::new(nulls)) } else { None },
248                ))
249            }
250        }
251    }
252
253    /// Infer column types from the first row
254    pub(crate) fn infer_column_types(first_row: &Row) -> Vec<ColumnType> {
255        let mut types = Vec::with_capacity(first_row.len());
256
257        for i in 0..first_row.len() {
258            let col_type = match first_row.get(i) {
259                Some(SqlValue::Integer(_)) => ColumnType::Int64,
260                Some(SqlValue::Double(_)) => ColumnType::Float64,
261                Some(SqlValue::Varchar(_)) => ColumnType::String,
262                Some(SqlValue::Date(_)) => ColumnType::Date,
263                Some(SqlValue::Boolean(_)) => ColumnType::Boolean,
264                _ => ColumnType::Mixed,
265            };
266            types.push(col_type);
267        }
268
269        types
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    #[test]
278    fn test_columnar_batch_creation() {
279        let rows = vec![
280            Row::new(vec![
281                SqlValue::Integer(1),
282                SqlValue::Double(10.5),
283                SqlValue::Varchar("Alice".to_string()),
284            ]),
285            Row::new(vec![
286                SqlValue::Integer(2),
287                SqlValue::Double(20.5),
288                SqlValue::Varchar("Bob".to_string()),
289            ]),
290            Row::new(vec![
291                SqlValue::Integer(3),
292                SqlValue::Double(30.5),
293                SqlValue::Varchar("Charlie".to_string()),
294            ]),
295        ];
296
297        let batch = ColumnarBatch::from_rows(&rows).unwrap();
298
299        assert_eq!(batch.row_count(), 3);
300        assert_eq!(batch.column_count(), 3);
301
302        // Check column 0 (integers)
303        let col0 = batch.column(0).unwrap();
304        if let ColumnArray::Int64(values, nulls) = col0 {
305            assert_eq!(values.as_slice(), &[1, 2, 3]);
306            assert!(nulls.is_none());
307        } else {
308            panic!("Expected Int64 column");
309        }
310
311        // Check column 1 (doubles)
312        let col1 = batch.column(1).unwrap();
313        if let ColumnArray::Float64(values, nulls) = col1 {
314            assert_eq!(values.as_slice(), &[10.5, 20.5, 30.5]);
315            assert!(nulls.is_none());
316        } else {
317            panic!("Expected Float64 column");
318        }
319
320        // Check column 2 (strings)
321        let col2 = batch.column(2).unwrap();
322        if let ColumnArray::String(values, nulls) = col2 {
323            assert_eq!(values.as_slice(), &["Alice", "Bob", "Charlie"]);
324            assert!(nulls.is_none());
325        } else {
326            panic!("Expected String column");
327        }
328    }
329
330    #[test]
331    fn test_columnar_batch_with_nulls() {
332        let rows = vec![
333            Row::new(vec![SqlValue::Integer(1), SqlValue::Double(10.0)]),
334            Row::new(vec![SqlValue::Null, SqlValue::Double(20.0)]),
335            Row::new(vec![SqlValue::Integer(3), SqlValue::Null]),
336        ];
337
338        let batch = ColumnarBatch::from_rows(&rows).unwrap();
339
340        // Check column 0 (with NULL)
341        let col0 = batch.column(0).unwrap();
342        if let ColumnArray::Int64(values, Some(nulls)) = col0 {
343            assert_eq!(values.len(), 3);
344            assert_eq!(nulls.as_slice(), &[false, true, false]);
345        } else {
346            panic!("Expected Int64 column with nulls");
347        }
348
349        // Check column 1 (with NULL)
350        let col1 = batch.column(1).unwrap();
351        if let ColumnArray::Float64(values, Some(nulls)) = col1 {
352            assert_eq!(values.len(), 3);
353            assert_eq!(nulls.as_slice(), &[false, false, true]);
354        } else {
355            panic!("Expected Float64 column with nulls");
356        }
357    }
358}