1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_panic;
12use vortex_mask::Mask;
13
14use crate::ArrayRef;
15use crate::Canonical;
16use crate::IntoArray;
17use crate::LEGACY_SESSION;
18use crate::VortexSessionExecute;
19use crate::arrays::ListArray;
20use crate::arrays::listview::ListViewArrayExt;
21use crate::builders::ArrayBuilder;
22use crate::builders::DEFAULT_BUILDER_CAPACITY;
23use crate::builders::LazyBitBufferBuilder;
24use crate::builders::PrimitiveBuilder;
25use crate::builders::builder_with_capacity;
26use crate::canonical::ToCanonical;
27use crate::dtype::DType;
28use crate::dtype::IntegerPType;
29use crate::dtype::Nullability;
30use crate::dtype::Nullability::NonNullable;
31use crate::match_each_integer_ptype;
32use crate::scalar::ListScalar;
33use crate::scalar::Scalar;
34
35pub struct ListBuilder<O: IntegerPType> {
38 dtype: DType,
40
41 elements_builder: Box<dyn ArrayBuilder>,
43
44 offsets_builder: PrimitiveBuilder<O>,
46
47 nulls: LazyBitBufferBuilder,
49}
50
51impl<O: IntegerPType> ListBuilder<O> {
52 pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
54 Self::with_capacity(
55 value_dtype,
56 nullability,
57 DEFAULT_BUILDER_CAPACITY * 2,
60 DEFAULT_BUILDER_CAPACITY,
61 )
62 }
63
64 pub fn with_capacity(
72 value_dtype: Arc<DType>,
73 nullability: Nullability,
74 elements_capacity: usize,
75 capacity: usize,
76 ) -> Self {
77 let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
78 let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
79
80 offsets_builder.append_zero();
82
83 Self {
84 elements_builder,
85 offsets_builder,
86 nulls: LazyBitBufferBuilder::new(capacity),
87 dtype: DType::List(value_dtype, nullability),
88 }
89 }
90
91 pub fn append_array_as_list(&mut self, array: &ArrayRef) -> VortexResult<()> {
98 vortex_ensure!(
99 array.dtype() == self.element_dtype(),
100 "Array dtype {:?} does not match list element dtype {:?}",
101 array.dtype(),
102 self.element_dtype()
103 );
104
105 self.elements_builder.extend_from_array(array);
106 self.nulls.append_non_null();
107 self.offsets_builder.append_value(
108 O::from_usize(self.elements_builder.len())
109 .vortex_expect("Failed to convert from usize to O"),
110 );
111
112 Ok(())
113 }
114
115 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
117 match value.elements() {
118 None => {
119 if self.dtype.nullability() == NonNullable {
120 vortex_bail!("Cannot append null value to non-nullable list");
121 }
122 self.append_null();
123 }
124 Some(elements) => {
125 for scalar in elements {
126 self.elements_builder.append_scalar(&scalar)?;
129 }
130
131 self.nulls.append_non_null();
132 self.offsets_builder.append_value(
133 O::from_usize(self.elements_builder.len())
134 .vortex_expect("Failed to convert from usize to O"),
135 );
136 }
137 }
138
139 Ok(())
140 }
141
142 pub fn finish_into_list(&mut self) -> ListArray {
144 assert_eq!(
145 self.offsets_builder.len(),
146 self.nulls.len() + 1,
147 "offsets length must be one more than nulls length."
148 );
149
150 ListArray::try_new(
151 self.elements_builder.finish(),
152 self.offsets_builder.finish(),
153 self.nulls.finish_with_nullability(self.dtype.nullability()),
154 )
155 .vortex_expect("Buffer, offsets, and validity must have same length.")
156 }
157
158 pub fn element_dtype(&self) -> &DType {
161 let DType::List(element_dtype, _) = &self.dtype else {
162 vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
163 };
164
165 element_dtype
166 }
167}
168
169impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
170 fn as_any(&self) -> &dyn Any {
171 self
172 }
173
174 fn as_any_mut(&mut self) -> &mut dyn Any {
175 self
176 }
177
178 fn dtype(&self) -> &DType {
179 &self.dtype
180 }
181
182 fn len(&self) -> usize {
183 self.nulls.len()
184 }
185
186 fn append_zeros(&mut self, n: usize) {
187 let curr_len = self.elements_builder.len();
188 for _ in 0..n {
189 self.offsets_builder.append_value(
190 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
191 )
192 }
193 self.nulls.append_n_non_nulls(n);
194 }
195
196 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
197 let curr_len = self.elements_builder.len();
198 for _ in 0..n {
199 self.offsets_builder.append_value(
202 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
203 )
204 }
205 self.nulls.append_n_nulls(n);
206 }
207
208 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
209 vortex_ensure!(
210 scalar.dtype() == self.dtype(),
211 "ListBuilder expected scalar with dtype {}, got {}",
212 self.dtype(),
213 scalar.dtype()
214 );
215
216 self.append_value(scalar.as_list())
217 }
218
219 unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
220 let list = array.to_listview();
221 if list.is_empty() {
222 return;
223 }
224
225 self.nulls.append_validity_mask(
227 array
228 .validity()
229 .vortex_expect("validity_mask in extend_from_array_unchecked")
230 .to_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
231 .vortex_expect("Failed to compute validity mask"),
232 );
233
234 let elements = list.elements();
236 let offsets = list.offsets().to_primitive();
237 let sizes = list.sizes().to_primitive();
238
239 fn extend_inner<O, OffsetType, SizeType>(
240 builder: &mut ListBuilder<O>,
241 new_elements: &ArrayRef,
242 new_offsets: &[OffsetType],
243 new_sizes: &[SizeType],
244 ) where
245 O: IntegerPType,
246 OffsetType: IntegerPType,
247 SizeType: IntegerPType,
248 {
249 let num_lists = new_offsets.len();
250 debug_assert_eq!(num_lists, new_sizes.len());
251
252 let mut curr_offset = builder.elements_builder.len();
253 let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
254
255 for i in 0..new_offsets.len() {
258 let offset: usize = new_offsets[i].as_();
259 let size: usize = new_sizes[i].as_();
260
261 if size > 0 {
262 let list_elements = new_elements
263 .slice(offset..offset + size)
264 .vortex_expect("list builder slice");
265 builder.elements_builder.extend_from_array(&list_elements);
266 curr_offset += size;
267 }
268
269 let new_offset =
270 O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
271
272 offsets_range.set_value(i, new_offset);
273 }
274
275 unsafe { offsets_range.finish() };
278 }
279
280 match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
281 match_each_integer_ptype!(sizes.ptype(), |SizeType| {
282 extend_inner(
283 self,
284 elements,
285 offsets.as_slice::<OffsetType>(),
286 sizes.as_slice::<SizeType>(),
287 )
288 })
289 })
290 }
291
292 fn reserve_exact(&mut self, additional: usize) {
293 self.elements_builder.reserve_exact(additional);
294 self.offsets_builder.reserve_exact(additional);
295 self.nulls.reserve_exact(additional);
296 }
297
298 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
299 self.nulls = LazyBitBufferBuilder::new(validity.len());
300 self.nulls.append_validity_mask(validity);
301 }
302
303 fn finish(&mut self) -> ArrayRef {
304 self.finish_into_list().into_array()
305 }
306
307 fn finish_into_canonical(&mut self) -> Canonical {
308 Canonical::List(self.finish_into_list().into_array().to_listview())
309 }
310}
311
312#[cfg(test)]
313mod tests {
314 use std::sync::Arc;
315
316 use Nullability::NonNullable;
317 use Nullability::Nullable;
318 use vortex_buffer::buffer;
319 use vortex_error::VortexExpect;
320
321 use crate::IntoArray;
322 use crate::LEGACY_SESSION;
323 use crate::ToCanonical;
324 use crate::arrays::ChunkedArray;
325 use crate::arrays::PrimitiveArray;
326 use crate::arrays::list::ListArrayExt;
327 use crate::arrays::listview::ListViewArrayExt;
328 use crate::assert_arrays_eq;
329 use crate::builders::ArrayBuilder;
330 use crate::builders::list::ListArray;
331 use crate::builders::list::ListBuilder;
332 use crate::dtype::DType;
333 use crate::dtype::IntegerPType;
334 use crate::dtype::Nullability;
335 use crate::dtype::PType::I32;
336 use crate::executor::VortexSessionExecute;
337 use crate::scalar::Scalar;
338 use crate::validity::Validity;
339
340 #[test]
341 fn test_empty() {
342 let mut builder =
343 ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
344
345 let list = builder.finish();
346 assert_eq!(list.len(), 0);
347 }
348
349 #[test]
350 fn test_values() {
351 let dtype: Arc<DType> = Arc::new(I32.into());
352 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
353
354 builder
355 .append_value(
356 Scalar::list(
357 Arc::clone(&dtype),
358 vec![1i32.into(), 2i32.into(), 3i32.into()],
359 NonNullable,
360 )
361 .as_list(),
362 )
363 .unwrap();
364
365 builder
366 .append_value(
367 Scalar::list(
368 dtype,
369 vec![4i32.into(), 5i32.into(), 6i32.into()],
370 NonNullable,
371 )
372 .as_list(),
373 )
374 .unwrap();
375
376 let list = builder.finish();
377 assert_eq!(list.len(), 2);
378
379 let list_array = list.to_listview();
380
381 assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
382 assert_eq!(list_array.list_elements_at(1).unwrap().len(), 3);
383 }
384
385 #[test]
386 fn test_append_empty_list() {
387 let dtype: Arc<DType> = Arc::new(I32.into());
388 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
389
390 assert!(
391 builder
392 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
393 .is_ok()
394 )
395 }
396
397 #[test]
398 fn test_nullable_values() {
399 let dtype: Arc<DType> = Arc::new(I32.into());
400 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
401
402 builder
403 .append_value(
404 Scalar::list(
405 Arc::clone(&dtype),
406 vec![1i32.into(), 2i32.into(), 3i32.into()],
407 NonNullable,
408 )
409 .as_list(),
410 )
411 .unwrap();
412
413 builder
414 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
415 .unwrap();
416
417 builder
418 .append_value(
419 Scalar::list(
420 dtype,
421 vec![4i32.into(), 5i32.into(), 6i32.into()],
422 NonNullable,
423 )
424 .as_list(),
425 )
426 .unwrap();
427
428 let list = builder.finish();
429 assert_eq!(list.len(), 3);
430
431 let list_array = list.to_listview();
432
433 assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
434 assert_eq!(list_array.list_elements_at(1).unwrap().len(), 0);
435 assert_eq!(list_array.list_elements_at(2).unwrap().len(), 3);
436 }
437
438 fn test_extend_builder_gen<O: IntegerPType>() {
439 let list = ListArray::from_iter_opt_slow::<O, _, _>(
440 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
441 Arc::new(I32.into()),
442 )
443 .unwrap();
444 assert_eq!(list.len(), 3);
445
446 let mut ctx = LEGACY_SESSION.create_execution_ctx();
447
448 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
449 builder.extend_from_array(&list);
450 builder.extend_from_array(&list);
451 builder.extend_from_array(&list.slice(0..0).unwrap());
452 builder.extend_from_array(&list.slice(1..3).unwrap());
453
454 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
455 [
456 Some(vec![0, 1, 2]),
457 None,
458 Some(vec![4, 5]),
459 Some(vec![0, 1, 2]),
460 None,
461 Some(vec![4, 5]),
462 None,
463 Some(vec![4, 5]),
464 ],
465 Arc::new(DType::Primitive(I32, NonNullable)),
466 )
467 .unwrap()
468 .to_listview();
469
470 let actual = builder.finish_into_canonical().into_listview();
471
472 assert_arrays_eq!(actual.elements(), expected.elements());
473
474 assert_arrays_eq!(actual.offsets(), expected.offsets());
475
476 assert!(
477 actual
478 .validity()
479 .vortex_expect("list validity should be derivable")
480 .mask_eq(
481 &expected
482 .validity()
483 .vortex_expect("list validity should be derivable"),
484 &mut ctx,
485 )
486 .unwrap(),
487 );
488 }
489
490 #[test]
491 fn test_extend_builder() {
492 test_extend_builder_gen::<i8>();
493 test_extend_builder_gen::<i16>();
494 test_extend_builder_gen::<i32>();
495 test_extend_builder_gen::<i64>();
496
497 test_extend_builder_gen::<u8>();
498 test_extend_builder_gen::<u16>();
499 test_extend_builder_gen::<u32>();
500 test_extend_builder_gen::<u64>();
501 }
502
503 #[test]
504 pub fn test_array_with_gap() {
505 let one_trailing_unused_element = ListArray::try_new(
506 buffer![1, 2, 3, 4].into_array(),
507 buffer![0, 3].into_array(),
508 Validity::NonNullable,
509 )
510 .unwrap();
511
512 let second_array = ListArray::try_new(
513 buffer![5, 6].into_array(),
514 buffer![0, 2].into_array(),
515 Validity::NonNullable,
516 )
517 .unwrap();
518
519 let chunked_list = ChunkedArray::try_new(
520 vec![
521 one_trailing_unused_element.clone().into_array(),
522 second_array.clone().into_array(),
523 ],
524 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
525 );
526
527 let canon_values = chunked_list.unwrap().as_array().to_listview();
528
529 assert_eq!(
530 one_trailing_unused_element
531 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
532 .unwrap(),
533 canon_values
534 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
535 .unwrap()
536 );
537 assert_eq!(
538 second_array
539 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
540 .unwrap(),
541 canon_values
542 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
543 .unwrap()
544 );
545 }
546
547 #[test]
548 fn test_append_scalar() {
549 let dtype: Arc<DType> = Arc::new(I32.into());
550 let mut builder = ListBuilder::<u64>::with_capacity(Arc::clone(&dtype), Nullable, 20, 10);
551
552 let list_scalar1 =
554 Scalar::list(Arc::clone(&dtype), vec![1i32.into(), 2i32.into()], Nullable);
555 builder.append_scalar(&list_scalar1).unwrap();
556
557 let list_scalar2 = Scalar::list(
559 Arc::clone(&dtype),
560 vec![3i32.into(), 4i32.into(), 5i32.into()],
561 Nullable,
562 );
563 builder.append_scalar(&list_scalar2).unwrap();
564
565 let null_scalar = Scalar::null(DType::List(Arc::clone(&dtype), Nullable));
567 builder.append_scalar(&null_scalar).unwrap();
568
569 let array = builder.finish_into_list();
570 assert_eq!(array.len(), 3);
571
572 let scalar0 = array
575 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
576 .unwrap();
577 let list0 = scalar0.as_list();
578 assert_eq!(list0.len(), 2);
579 if let Some(list0_items) = list0.elements() {
580 assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
581 assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
582 }
583
584 let scalar1 = array
585 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
586 .unwrap();
587 let list1 = scalar1.as_list();
588 assert_eq!(list1.len(), 3);
589 if let Some(list1_items) = list1.elements() {
590 assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
591 assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
592 assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
593 }
594
595 let scalar2 = array
596 .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
597 .unwrap();
598 let list2 = scalar2.as_list();
599 assert!(list2.is_null()); assert!(
603 array
604 .validity()
605 .vortex_expect("list validity should be derivable")
606 .is_valid(0)
607 .unwrap()
608 );
609 assert!(
610 array
611 .validity()
612 .vortex_expect("list validity should be derivable")
613 .is_valid(1)
614 .unwrap()
615 );
616 assert!(
617 !array
618 .validity()
619 .vortex_expect("list validity should be derivable")
620 .is_valid(2)
621 .unwrap()
622 );
623
624 let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
626 let wrong_scalar = Scalar::from(42i32);
627 assert!(builder.append_scalar(&wrong_scalar).is_err());
628 }
629
630 #[test]
631 fn test_append_array_as_list() {
632 let dtype: Arc<DType> = Arc::new(I32.into());
633 let mut builder =
634 ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
635
636 let arr1 = buffer![1i32, 2, 3].into_array();
638 builder.append_array_as_list(&arr1).unwrap();
639
640 builder
642 .append_value(
643 Scalar::list(
644 Arc::clone(&dtype),
645 vec![10i32.into(), 11i32.into()],
646 NonNullable,
647 )
648 .as_list(),
649 )
650 .unwrap();
651
652 let arr2 = buffer![4i32, 5].into_array();
654 builder.append_array_as_list(&arr2).unwrap();
655
656 let arr3 = buffer![0i32; 0].into_array();
658 builder.append_array_as_list(&arr3).unwrap();
659
660 builder
662 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
663 .unwrap();
664
665 let list = builder.finish_into_list();
666 assert_eq!(list.len(), 5);
667
668 assert_arrays_eq!(
670 list.elements(),
671 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
672 );
673
674 assert_arrays_eq!(
676 list.offsets(),
677 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7, 7])
678 );
679
680 let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
682 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
683 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
684 }
685}