vibesql_executor/select/columnar/batch/
builder.rs1use std::sync::Arc;
7
8use vibesql_storage::Row;
9use vibesql_types::SqlValue;
10
11use super::types::{ColumnArray, ColumnType, ColumnarBatch};
12use crate::errors::ExecutorError;
13
14impl ColumnarBatch {
15 pub fn new(column_count: usize) -> Self {
17 Self { row_count: 0, columns: Vec::with_capacity(column_count), column_names: None }
18 }
19
20 pub fn with_capacity(_row_count: usize, column_count: usize) -> Self {
22 Self { row_count: 0, columns: Vec::with_capacity(column_count), column_names: None }
23 }
24
25 pub fn empty(column_count: usize) -> Result<Self, ExecutorError> {
27 Ok(Self {
28 row_count: 0,
29 columns: vec![ColumnArray::Mixed(Arc::new(vec![])); column_count],
30 column_names: None,
31 })
32 }
33
34 pub fn from_columns(
36 columns: Vec<ColumnArray>,
37 column_names: Option<Vec<String>>,
38 ) -> Result<Self, ExecutorError> {
39 if columns.is_empty() {
40 return Ok(Self { row_count: 0, columns, column_names });
41 }
42
43 let row_count = columns[0].len();
45 for (idx, column) in columns.iter().enumerate() {
46 if column.len() != row_count {
47 return Err(ExecutorError::ColumnarLengthMismatch {
48 context: format!("from_columns (column {})", idx),
49 expected: row_count,
50 actual: column.len(),
51 });
52 }
53 }
54
55 Ok(Self { row_count, columns, column_names })
56 }
57
58 pub fn from_rows(rows: &[Row]) -> Result<Self, ExecutorError> {
63 if rows.is_empty() {
64 return Ok(Self::new(0));
65 }
66
67 let row_count = rows.len();
68 let column_count = rows[0].len();
69
70 let column_types = Self::infer_column_types(&rows[0]);
72
73 let mut columns = Vec::with_capacity(column_count);
75
76 for (col_idx, col_type) in column_types.iter().enumerate() {
77 let column = Self::extract_column(rows, col_idx, col_type)?;
78 columns.push(column);
79 }
80
81 Ok(Self { row_count, columns, column_names: None })
82 }
83
84 pub fn from_rows_selective(
107 rows: &[Row],
108 column_indices: &[usize],
109 ) -> Result<Self, ExecutorError> {
110 if rows.is_empty() || column_indices.is_empty() {
111 return Ok(Self::new(0));
112 }
113
114 let row_count = rows.len();
115
116 let column_types: Vec<ColumnType> = column_indices
118 .iter()
119 .map(|&col_idx| {
120 rows[0].get(col_idx).map(Self::infer_type_from_value).unwrap_or(ColumnType::Mixed)
121 })
122 .collect();
123
124 let mut columns = Vec::with_capacity(column_indices.len());
126
127 for (batch_idx, &col_idx) in column_indices.iter().enumerate() {
128 let column = Self::extract_column(rows, col_idx, &column_types[batch_idx])?;
129 columns.push(column);
130 }
131
132 Ok(Self { row_count, columns, column_names: None })
133 }
134
135 pub(crate) fn extract_column(
140 rows: &[Row],
141 col_idx: usize,
142 col_type: &ColumnType,
143 ) -> Result<ColumnArray, ExecutorError> {
144 match col_type {
145 ColumnType::Int64 => {
146 let mut values = Vec::with_capacity(rows.len());
147 let mut nulls = Vec::with_capacity(rows.len());
148 let mut has_nulls = false;
149
150 for row in rows {
151 match row.get(col_idx) {
152 Some(SqlValue::Integer(v)) => {
153 values.push(*v);
154 nulls.push(false);
155 }
156 Some(SqlValue::Null) => {
157 values.push(0); nulls.push(true);
159 has_nulls = true;
160 }
161 Some(_other) => {
162 return Self::extract_column(rows, col_idx, &ColumnType::Mixed);
164 }
165 None => {
166 values.push(0);
167 nulls.push(true);
168 has_nulls = true;
169 }
170 }
171 }
172
173 Ok(ColumnArray::Int64(
174 Arc::new(values),
175 if has_nulls { Some(Arc::new(nulls)) } else { None },
176 ))
177 }
178
179 ColumnType::Float64 => {
180 let mut values = Vec::with_capacity(rows.len());
181 let mut nulls = Vec::with_capacity(rows.len());
182 let mut has_nulls = false;
183
184 for row in rows {
185 match row.get(col_idx) {
186 Some(SqlValue::Double(v)) => {
187 values.push(*v);
188 nulls.push(false);
189 }
190 Some(SqlValue::Null) => {
191 values.push(0.0); nulls.push(true);
193 has_nulls = true;
194 }
195 Some(_other) => {
196 return Self::extract_column(rows, col_idx, &ColumnType::Mixed);
198 }
199 None => {
200 values.push(0.0);
201 nulls.push(true);
202 has_nulls = true;
203 }
204 }
205 }
206
207 Ok(ColumnArray::Float64(
208 Arc::new(values),
209 if has_nulls { Some(Arc::new(nulls)) } else { None },
210 ))
211 }
212
213 ColumnType::String => {
214 let mut values = Vec::with_capacity(rows.len());
215 let mut nulls = Vec::with_capacity(rows.len());
216 let mut has_nulls = false;
217
218 for row in rows {
219 match row.get(col_idx) {
220 Some(SqlValue::Varchar(v)) => {
221 values.push(Arc::from(v.as_str()));
222 nulls.push(false);
223 }
224 Some(SqlValue::Null) => {
225 values.push(Arc::from("")); nulls.push(true);
227 has_nulls = true;
228 }
229 Some(_other) => {
230 return Self::extract_column(rows, col_idx, &ColumnType::Mixed);
232 }
233 None => {
234 values.push(Arc::from(""));
235 nulls.push(true);
236 has_nulls = true;
237 }
238 }
239 }
240
241 Ok(ColumnArray::String(
242 Arc::new(values),
243 if has_nulls { Some(Arc::new(nulls)) } else { None },
244 ))
245 }
246
247 ColumnType::Date | ColumnType::Mixed => {
248 let mut values = Vec::with_capacity(rows.len());
250
251 for row in rows {
252 let value = row.get(col_idx).cloned().unwrap_or(SqlValue::Null);
253 values.push(value);
254 }
255
256 Ok(ColumnArray::Mixed(Arc::new(values)))
257 }
258
259 ColumnType::Boolean => {
260 let mut values = Vec::with_capacity(rows.len());
261 let mut nulls = Vec::with_capacity(rows.len());
262 let mut has_nulls = false;
263
264 for row in rows {
265 match row.get(col_idx) {
266 Some(SqlValue::Boolean(b)) => {
267 values.push(if *b { 1 } else { 0 });
268 nulls.push(false);
269 }
270 Some(SqlValue::Null) => {
271 values.push(0); nulls.push(true);
273 has_nulls = true;
274 }
275 Some(_other) => {
276 return Self::extract_column(rows, col_idx, &ColumnType::Mixed);
278 }
279 None => {
280 values.push(0);
281 nulls.push(true);
282 has_nulls = true;
283 }
284 }
285 }
286
287 Ok(ColumnArray::Boolean(
288 Arc::new(values),
289 if has_nulls { Some(Arc::new(nulls)) } else { None },
290 ))
291 }
292 }
293 }
294
295 pub(crate) fn infer_column_types(first_row: &Row) -> Vec<ColumnType> {
297 let mut types = Vec::with_capacity(first_row.len());
298
299 for i in 0..first_row.len() {
300 let col_type =
301 first_row.get(i).map(Self::infer_type_from_value).unwrap_or(ColumnType::Mixed);
302 types.push(col_type);
303 }
304
305 types
306 }
307
308 fn infer_type_from_value(value: &SqlValue) -> ColumnType {
310 match value {
311 SqlValue::Integer(_) => ColumnType::Int64,
312 SqlValue::Double(_) => ColumnType::Float64,
313 SqlValue::Varchar(_) => ColumnType::String,
314 SqlValue::Date(_) => ColumnType::Date,
315 SqlValue::Boolean(_) => ColumnType::Boolean,
316 _ => ColumnType::Mixed,
317 }
318 }
319}
320
321#[cfg(test)]
322mod tests {
323 use super::*;
324
325 #[test]
326 fn test_columnar_batch_creation() {
327 let rows = vec![
328 Row::new(vec![
329 SqlValue::Integer(1),
330 SqlValue::Double(10.5),
331 SqlValue::Varchar(arcstr::ArcStr::from("Alice")),
332 ]),
333 Row::new(vec![
334 SqlValue::Integer(2),
335 SqlValue::Double(20.5),
336 SqlValue::Varchar(arcstr::ArcStr::from("Bob")),
337 ]),
338 Row::new(vec![
339 SqlValue::Integer(3),
340 SqlValue::Double(30.5),
341 SqlValue::Varchar(arcstr::ArcStr::from("Charlie")),
342 ]),
343 ];
344
345 let batch = ColumnarBatch::from_rows(&rows).unwrap();
346
347 assert_eq!(batch.row_count(), 3);
348 assert_eq!(batch.column_count(), 3);
349
350 let col0 = batch.column(0).unwrap();
352 if let ColumnArray::Int64(values, nulls) = col0 {
353 assert_eq!(values.as_slice(), &[1, 2, 3]);
354 assert!(nulls.is_none());
355 } else {
356 panic!("Expected Int64 column");
357 }
358
359 let col1 = batch.column(1).unwrap();
361 if let ColumnArray::Float64(values, nulls) = col1 {
362 assert_eq!(values.as_slice(), &[10.5, 20.5, 30.5]);
363 assert!(nulls.is_none());
364 } else {
365 panic!("Expected Float64 column");
366 }
367
368 let col2 = batch.column(2).unwrap();
370 if let ColumnArray::String(values, nulls) = col2 {
371 let str_refs: Vec<&str> = values.iter().map(|s| s.as_ref()).collect();
372 assert_eq!(str_refs, vec!["Alice", "Bob", "Charlie"]);
373 assert!(nulls.is_none());
374 } else {
375 panic!("Expected String column");
376 }
377 }
378
379 #[test]
380 fn test_columnar_batch_with_nulls() {
381 let rows = vec![
382 Row::new(vec![SqlValue::Integer(1), SqlValue::Double(10.0)]),
383 Row::new(vec![SqlValue::Null, SqlValue::Double(20.0)]),
384 Row::new(vec![SqlValue::Integer(3), SqlValue::Null]),
385 ];
386
387 let batch = ColumnarBatch::from_rows(&rows).unwrap();
388
389 let col0 = batch.column(0).unwrap();
391 if let ColumnArray::Int64(values, Some(nulls)) = col0 {
392 assert_eq!(values.len(), 3);
393 assert_eq!(nulls.as_slice(), &[false, true, false]);
394 } else {
395 panic!("Expected Int64 column with nulls");
396 }
397
398 let col1 = batch.column(1).unwrap();
400 if let ColumnArray::Float64(values, Some(nulls)) = col1 {
401 assert_eq!(values.len(), 3);
402 assert_eq!(nulls.as_slice(), &[false, false, true]);
403 } else {
404 panic!("Expected Float64 column with nulls");
405 }
406 }
407
408 #[test]
409 fn test_columnar_batch_mixed_types() {
410 let rows = vec![
413 Row::new(vec![SqlValue::Varchar(arcstr::ArcStr::from("abc")), SqlValue::Null]),
414 Row::new(vec![SqlValue::Null, SqlValue::Varchar(arcstr::ArcStr::from("xyz"))]),
415 Row::new(vec![SqlValue::Integer(11), SqlValue::Integer(22)]),
416 Row::new(vec![SqlValue::Integer(33), SqlValue::Integer(44)]),
417 ];
418
419 let batch = ColumnarBatch::from_rows(&rows).unwrap();
420
421 assert_eq!(batch.row_count(), 4);
422 assert_eq!(batch.column_count(), 2);
423
424 let col0 = batch.column(0).unwrap();
426 if let ColumnArray::Mixed(values) = col0 {
427 assert_eq!(values.len(), 4);
428 assert_eq!(values[0], SqlValue::Varchar(arcstr::ArcStr::from("abc")));
429 assert_eq!(values[1], SqlValue::Null);
430 assert_eq!(values[2], SqlValue::Integer(11));
431 assert_eq!(values[3], SqlValue::Integer(33));
432 } else {
433 panic!("Expected Mixed column, got {:?}", col0);
434 }
435
436 let col1 = batch.column(1).unwrap();
438 if let ColumnArray::Mixed(values) = col1 {
439 assert_eq!(values.len(), 4);
440 assert_eq!(values[0], SqlValue::Null);
441 assert_eq!(values[1], SqlValue::Varchar(arcstr::ArcStr::from("xyz")));
442 assert_eq!(values[2], SqlValue::Integer(22));
443 assert_eq!(values[3], SqlValue::Integer(44));
444 } else {
445 panic!("Expected Mixed column, got {:?}", col1);
446 }
447 }
448
449 #[test]
450 fn test_columnar_batch_mixed_types_int_first() {
451 let rows = vec![
453 Row::new(vec![SqlValue::Integer(11), SqlValue::Integer(22)]),
454 Row::new(vec![SqlValue::Varchar(arcstr::ArcStr::from("abc")), SqlValue::Null]),
455 ];
456
457 let batch = ColumnarBatch::from_rows(&rows).unwrap();
458
459 let col0 = batch.column(0).unwrap();
461 if let ColumnArray::Mixed(values) = col0 {
462 assert_eq!(values.len(), 2);
463 assert_eq!(values[0], SqlValue::Integer(11));
464 assert_eq!(values[1], SqlValue::Varchar(arcstr::ArcStr::from("abc")));
465 } else {
466 panic!("Expected Mixed column for mixed integer/varchar, got {:?}", col0);
467 }
468 }
469}