vibesql_executor/select/columnar/batch/
builder.rs

1//! Batch construction and building logic
2//!
3//! This module contains methods for creating `ColumnarBatch` instances
4//! from various sources like rows and column arrays.
5
6use std::sync::Arc;
7
8use crate::errors::ExecutorError;
9use vibesql_storage::Row;
10use vibesql_types::SqlValue;
11
12use super::types::{ColumnArray, ColumnType, ColumnarBatch};
13
14impl ColumnarBatch {
15    /// Create a new empty columnar batch
16    pub fn new(column_count: usize) -> Self {
17        Self {
18            row_count: 0,
19            columns: Vec::with_capacity(column_count),
20            column_names: None,
21        }
22    }
23
24    /// Create a columnar batch with specified capacity
25    pub fn with_capacity(_row_count: usize, column_count: usize) -> Self {
26        Self {
27            row_count: 0,
28            columns: Vec::with_capacity(column_count),
29            column_names: None,
30        }
31    }
32
33    /// Create an empty batch with the specified number of columns
34    pub fn empty(column_count: usize) -> Result<Self, ExecutorError> {
35        Ok(Self {
36            row_count: 0,
37            columns: vec![ColumnArray::Mixed(Arc::new(vec![])); column_count],
38            column_names: None,
39        })
40    }
41
42    /// Create a batch from a list of columns
43    pub fn from_columns(
44        columns: Vec<ColumnArray>,
45        column_names: Option<Vec<String>>,
46    ) -> Result<Self, ExecutorError> {
47        if columns.is_empty() {
48            return Ok(Self {
49                row_count: 0,
50                columns,
51                column_names,
52            });
53        }
54
55        // Verify all columns have the same length
56        let row_count = columns[0].len();
57        for (idx, column) in columns.iter().enumerate() {
58            if column.len() != row_count {
59                return Err(ExecutorError::ColumnarLengthMismatch {
60                    context: format!("from_columns (column {})", idx),
61                    expected: row_count,
62                    actual: column.len(),
63                });
64            }
65        }
66
67        Ok(Self {
68            row_count,
69            columns,
70            column_names,
71        })
72    }
73
74    /// Convert from row-oriented storage to columnar batch
75    ///
76    /// This analyzes the first row to infer column types, then materializes
77    /// all values into type-specialized column arrays.
78    pub fn from_rows(rows: &[Row]) -> Result<Self, ExecutorError> {
79        if rows.is_empty() {
80            return Ok(Self::new(0));
81        }
82
83        let row_count = rows.len();
84        let column_count = rows[0].len();
85
86        // Infer column types from first row
87        let column_types = Self::infer_column_types(&rows[0]);
88
89        // Create column arrays
90        let mut columns = Vec::with_capacity(column_count);
91
92        for (col_idx, col_type) in column_types.iter().enumerate() {
93            let column = Self::extract_column(rows, col_idx, col_type)?;
94            columns.push(column);
95        }
96
97        Ok(Self {
98            row_count,
99            columns,
100            column_names: None,
101        })
102    }
103
104    /// Extract a single column from rows into a typed array
105    pub(crate) fn extract_column(
106        rows: &[Row],
107        col_idx: usize,
108        col_type: &ColumnType,
109    ) -> Result<ColumnArray, ExecutorError> {
110        match col_type {
111            ColumnType::Int64 => {
112                let mut values = Vec::with_capacity(rows.len());
113                let mut nulls = Vec::with_capacity(rows.len());
114                let mut has_nulls = false;
115
116                for row in rows {
117                    match row.get(col_idx) {
118                        Some(SqlValue::Integer(v)) => {
119                            values.push(*v);
120                            nulls.push(false);
121                        }
122                        Some(SqlValue::Null) => {
123                            values.push(0); // placeholder
124                            nulls.push(true);
125                            has_nulls = true;
126                        }
127                        Some(other) => {
128                            return Err(ExecutorError::ColumnarTypeMismatch {
129                                operation: "extract_column".to_string(),
130                                left_type: "Integer".to_string(),
131                                right_type: Some(format!("{:?}", other)),
132                            });
133                        }
134                        None => {
135                            values.push(0);
136                            nulls.push(true);
137                            has_nulls = true;
138                        }
139                    }
140                }
141
142                Ok(ColumnArray::Int64(
143                    Arc::new(values),
144                    if has_nulls { Some(Arc::new(nulls)) } else { None },
145                ))
146            }
147
148            ColumnType::Float64 => {
149                let mut values = Vec::with_capacity(rows.len());
150                let mut nulls = Vec::with_capacity(rows.len());
151                let mut has_nulls = false;
152
153                for row in rows {
154                    match row.get(col_idx) {
155                        Some(SqlValue::Double(v)) => {
156                            values.push(*v);
157                            nulls.push(false);
158                        }
159                        Some(SqlValue::Null) => {
160                            values.push(0.0); // placeholder
161                            nulls.push(true);
162                            has_nulls = true;
163                        }
164                        Some(other) => {
165                            return Err(ExecutorError::ColumnarTypeMismatch {
166                                operation: "extract_column".to_string(),
167                                left_type: "Double".to_string(),
168                                right_type: Some(format!("{:?}", other)),
169                            });
170                        }
171                        None => {
172                            values.push(0.0);
173                            nulls.push(true);
174                            has_nulls = true;
175                        }
176                    }
177                }
178
179                Ok(ColumnArray::Float64(
180                    Arc::new(values),
181                    if has_nulls { Some(Arc::new(nulls)) } else { None },
182                ))
183            }
184
185            ColumnType::String => {
186                let mut values = Vec::with_capacity(rows.len());
187                let mut nulls = Vec::with_capacity(rows.len());
188                let mut has_nulls = false;
189
190                for row in rows {
191                    match row.get(col_idx) {
192                        Some(SqlValue::Varchar(v)) => {
193                            values.push(v.clone());
194                            nulls.push(false);
195                        }
196                        Some(SqlValue::Null) => {
197                            values.push(String::new()); // placeholder
198                            nulls.push(true);
199                            has_nulls = true;
200                        }
201                        Some(other) => {
202                            return Err(ExecutorError::ColumnarTypeMismatch {
203                                operation: "extract_column".to_string(),
204                                left_type: "Varchar".to_string(),
205                                right_type: Some(format!("{:?}", other)),
206                            });
207                        }
208                        None => {
209                            values.push(String::new());
210                            nulls.push(true);
211                            has_nulls = true;
212                        }
213                    }
214                }
215
216                Ok(ColumnArray::String(
217                    Arc::new(values),
218                    if has_nulls { Some(Arc::new(nulls)) } else { None },
219                ))
220            }
221
222            ColumnType::Date | ColumnType::Mixed => {
223                // Store dates and mixed types as Mixed (fallback for non-SIMD types)
224                let mut values = Vec::with_capacity(rows.len());
225
226                for row in rows {
227                    let value = row.get(col_idx).cloned().unwrap_or(SqlValue::Null);
228                    values.push(value);
229                }
230
231                Ok(ColumnArray::Mixed(Arc::new(values)))
232            }
233
234            ColumnType::Boolean => {
235                let mut values = Vec::with_capacity(rows.len());
236                let mut nulls = Vec::with_capacity(rows.len());
237                let mut has_nulls = false;
238
239                for row in rows {
240                    match row.get(col_idx) {
241                        Some(SqlValue::Boolean(b)) => {
242                            values.push(if *b { 1 } else { 0 });
243                            nulls.push(false);
244                        }
245                        Some(SqlValue::Null) => {
246                            values.push(0); // placeholder
247                            nulls.push(true);
248                            has_nulls = true;
249                        }
250                        Some(other) => {
251                            return Err(ExecutorError::ColumnarTypeMismatch {
252                                operation: "extract_column".to_string(),
253                                left_type: "Boolean".to_string(),
254                                right_type: Some(format!("{:?}", other)),
255                            });
256                        }
257                        None => {
258                            values.push(0);
259                            nulls.push(true);
260                            has_nulls = true;
261                        }
262                    }
263                }
264
265                Ok(ColumnArray::Boolean(
266                    Arc::new(values),
267                    if has_nulls { Some(Arc::new(nulls)) } else { None },
268                ))
269            }
270        }
271    }
272
273    /// Infer column types from the first row
274    pub(crate) fn infer_column_types(first_row: &Row) -> Vec<ColumnType> {
275        let mut types = Vec::with_capacity(first_row.len());
276
277        for i in 0..first_row.len() {
278            let col_type = match first_row.get(i) {
279                Some(SqlValue::Integer(_)) => ColumnType::Int64,
280                Some(SqlValue::Double(_)) => ColumnType::Float64,
281                Some(SqlValue::Varchar(_)) => ColumnType::String,
282                Some(SqlValue::Date(_)) => ColumnType::Date,
283                Some(SqlValue::Boolean(_)) => ColumnType::Boolean,
284                _ => ColumnType::Mixed,
285            };
286            types.push(col_type);
287        }
288
289        types
290    }
291}
292
293#[cfg(test)]
294mod tests {
295    use super::*;
296
297    #[test]
298    fn test_columnar_batch_creation() {
299        let rows = vec![
300            Row::new(vec![
301                SqlValue::Integer(1),
302                SqlValue::Double(10.5),
303                SqlValue::Varchar("Alice".to_string()),
304            ]),
305            Row::new(vec![
306                SqlValue::Integer(2),
307                SqlValue::Double(20.5),
308                SqlValue::Varchar("Bob".to_string()),
309            ]),
310            Row::new(vec![
311                SqlValue::Integer(3),
312                SqlValue::Double(30.5),
313                SqlValue::Varchar("Charlie".to_string()),
314            ]),
315        ];
316
317        let batch = ColumnarBatch::from_rows(&rows).unwrap();
318
319        assert_eq!(batch.row_count(), 3);
320        assert_eq!(batch.column_count(), 3);
321
322        // Check column 0 (integers)
323        let col0 = batch.column(0).unwrap();
324        if let ColumnArray::Int64(values, nulls) = col0 {
325            assert_eq!(values.as_slice(), &[1, 2, 3]);
326            assert!(nulls.is_none());
327        } else {
328            panic!("Expected Int64 column");
329        }
330
331        // Check column 1 (doubles)
332        let col1 = batch.column(1).unwrap();
333        if let ColumnArray::Float64(values, nulls) = col1 {
334            assert_eq!(values.as_slice(), &[10.5, 20.5, 30.5]);
335            assert!(nulls.is_none());
336        } else {
337            panic!("Expected Float64 column");
338        }
339
340        // Check column 2 (strings)
341        let col2 = batch.column(2).unwrap();
342        if let ColumnArray::String(values, nulls) = col2 {
343            assert_eq!(values.as_slice(), &["Alice", "Bob", "Charlie"]);
344            assert!(nulls.is_none());
345        } else {
346            panic!("Expected String column");
347        }
348    }
349
350    #[test]
351    fn test_columnar_batch_with_nulls() {
352        let rows = vec![
353            Row::new(vec![SqlValue::Integer(1), SqlValue::Double(10.0)]),
354            Row::new(vec![SqlValue::Null, SqlValue::Double(20.0)]),
355            Row::new(vec![SqlValue::Integer(3), SqlValue::Null]),
356        ];
357
358        let batch = ColumnarBatch::from_rows(&rows).unwrap();
359
360        // Check column 0 (with NULL)
361        let col0 = batch.column(0).unwrap();
362        if let ColumnArray::Int64(values, Some(nulls)) = col0 {
363            assert_eq!(values.len(), 3);
364            assert_eq!(nulls.as_slice(), &[false, true, false]);
365        } else {
366            panic!("Expected Int64 column with nulls");
367        }
368
369        // Check column 1 (with NULL)
370        let col1 = batch.column(1).unwrap();
371        if let ColumnArray::Float64(values, Some(nulls)) = col1 {
372            assert_eq!(values.len(), 3);
373            assert_eq!(nulls.as_slice(), &[false, false, true]);
374        } else {
375            panic!("Expected Float64 column with nulls");
376        }
377    }
378}