Skip to main content

vortex_array/builders/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::Arc;
6
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_panic;
12use vortex_mask::Mask;
13
14use crate::ArrayRef;
15use crate::Canonical;
16use crate::ExecutionCtx;
17use crate::IntoArray;
18use crate::LEGACY_SESSION;
19use crate::VortexSessionExecute;
20use crate::arrays::ListArray;
21use crate::arrays::listview::ListViewArrayExt;
22use crate::builders::ArrayBuilder;
23use crate::builders::DEFAULT_BUILDER_CAPACITY;
24use crate::builders::LazyBitBufferBuilder;
25use crate::builders::PrimitiveBuilder;
26use crate::builders::builder_with_capacity;
27#[expect(deprecated)]
28use crate::canonical::ToCanonical as _;
29use crate::dtype::DType;
30use crate::dtype::IntegerPType;
31use crate::dtype::Nullability;
32use crate::dtype::Nullability::NonNullable;
33use crate::match_each_integer_ptype;
34use crate::scalar::ListScalar;
35use crate::scalar::Scalar;
36
37/// The builder for building a [`ListArray`], parametrized by the [`IntegerPType`] of the `offsets`
38/// builder.
39pub struct ListBuilder<O: IntegerPType> {
40    /// The [`DType`] of the [`ListArray`]. This **must** be a [`DType::List`].
41    dtype: DType,
42
43    /// The builder for the underlying elements of the [`ListArray`].
44    elements_builder: Box<dyn ArrayBuilder>,
45
46    /// The builder for the `offsets` into the `elements` array.
47    offsets_builder: PrimitiveBuilder<O>,
48
49    /// The null map builder of the [`ListArray`].
50    nulls: LazyBitBufferBuilder,
51}
52
53impl<O: IntegerPType> ListBuilder<O> {
54    /// Creates a new `ListBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
55    pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
56        Self::with_capacity(
57            value_dtype,
58            nullability,
59            // We arbitrarily choose 2 times the number of list scalars for the capacity of the
60            // elements builder since we cannot know this ahead of time.
61            DEFAULT_BUILDER_CAPACITY * 2,
62            DEFAULT_BUILDER_CAPACITY,
63        )
64    }
65
66    /// Create a new [`ListArray`] builder with a with the given `capacity`, as well as an initial
67    /// capacity for the `elements` builder (since we cannot know that ahead of time solely based on
68    /// the outer array `capacity`).
69    ///
70    /// # Notes
71    ///
72    /// The number of offsets is one more than the length (# of list scalars) in the array.
73    pub fn with_capacity(
74        value_dtype: Arc<DType>,
75        nullability: Nullability,
76        elements_capacity: usize,
77        capacity: usize,
78    ) -> Self {
79        let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
80        let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
81
82        // The first offset is always 0 and represents an empty list.
83        offsets_builder.append_zero();
84
85        Self {
86            elements_builder,
87            offsets_builder,
88            nulls: LazyBitBufferBuilder::new(capacity),
89            dtype: DType::List(value_dtype, nullability),
90        }
91    }
92
93    /// Appends an array as a single non-null list entry to the builder.
94    ///
95    /// The input `array` must have the same dtype as the element dtype of this list builder.
96    ///
97    /// Note that the list entry will be non-null but the elements themselves are allowed to be null
98    /// (only if the elements [`DType`] in nullable, of course).
99    pub fn append_array_as_list(
100        &mut self,
101        array: &ArrayRef,
102        ctx: &mut ExecutionCtx,
103    ) -> VortexResult<()> {
104        vortex_ensure!(
105            array.dtype() == self.element_dtype(),
106            "Array dtype {:?} does not match list element dtype {:?}",
107            array.dtype(),
108            self.element_dtype()
109        );
110
111        array.append_to_builder(self.elements_builder.as_mut(), ctx)?;
112        self.nulls.append_non_null();
113        self.offsets_builder.append_value(
114            O::from_usize(self.elements_builder.len())
115                .vortex_expect("Failed to convert from usize to O"),
116        );
117
118        Ok(())
119    }
120
121    /// Appends a list `value` to the builder.
122    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
123        match value.elements() {
124            None => {
125                if self.dtype.nullability() == NonNullable {
126                    vortex_bail!("Cannot append null value to non-nullable list");
127                }
128                self.append_null();
129            }
130            Some(elements) => {
131                for scalar in elements {
132                    // TODO(connor): This is slow, we should be able to append multiple values at
133                    // once, or the list scalar should hold an Array
134                    self.elements_builder.append_scalar(&scalar)?;
135                }
136
137                self.nulls.append_non_null();
138                self.offsets_builder.append_value(
139                    O::from_usize(self.elements_builder.len())
140                        .vortex_expect("Failed to convert from usize to O"),
141                );
142            }
143        }
144
145        Ok(())
146    }
147
148    /// Finishes the builder directly into a [`ListArray`].
149    pub fn finish_into_list(&mut self) -> ListArray {
150        assert_eq!(
151            self.offsets_builder.len(),
152            self.nulls.len() + 1,
153            "offsets length must be one more than nulls length."
154        );
155
156        ListArray::try_new(
157            self.elements_builder.finish(),
158            self.offsets_builder.finish(),
159            self.nulls.finish_with_nullability(self.dtype.nullability()),
160        )
161        .vortex_expect("Buffer, offsets, and validity must have same length.")
162    }
163
164    /// The [`DType`] of the inner elements. Note that this is **not** the same as the [`DType`] of
165    /// the outer `List`.
166    pub fn element_dtype(&self) -> &DType {
167        let DType::List(element_dtype, _) = &self.dtype else {
168            vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
169        };
170
171        element_dtype
172    }
173}
174
175impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
176    fn as_any(&self) -> &dyn Any {
177        self
178    }
179
180    fn as_any_mut(&mut self) -> &mut dyn Any {
181        self
182    }
183
184    fn dtype(&self) -> &DType {
185        &self.dtype
186    }
187
188    fn len(&self) -> usize {
189        self.nulls.len()
190    }
191
192    fn append_zeros(&mut self, n: usize) {
193        let curr_len = self.elements_builder.len();
194        for _ in 0..n {
195            self.offsets_builder.append_value(
196                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
197            )
198        }
199        self.nulls.append_n_non_nulls(n);
200    }
201
202    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
203        let curr_len = self.elements_builder.len();
204        for _ in 0..n {
205            // A list with a null element is can be a list with a zero-span offset and a validity
206            // bit set
207            self.offsets_builder.append_value(
208                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
209            )
210        }
211        self.nulls.append_n_nulls(n);
212    }
213
214    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
215        vortex_ensure!(
216            scalar.dtype() == self.dtype(),
217            "ListBuilder expected scalar with dtype {}, got {}",
218            self.dtype(),
219            scalar.dtype()
220        );
221
222        self.append_value(scalar.as_list())
223    }
224
225    unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
226        #[expect(deprecated)]
227        let list = array.to_listview();
228        if list.is_empty() {
229            return;
230        }
231
232        // Append validity information.
233        self.nulls.append_validity_mask(
234            &array
235                .validity()
236                .vortex_expect("validity_mask in extend_from_array_unchecked")
237                .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
238                .vortex_expect("Failed to compute validity mask"),
239        );
240
241        // Note that `ListViewArray` has `n` offsets and sizes, not `n+1` offsets like `ListArray`.
242        let elements = list.elements();
243        #[expect(deprecated)]
244        let offsets = list.offsets().to_primitive();
245        #[expect(deprecated)]
246        let sizes = list.sizes().to_primitive();
247
248        fn extend_inner<O, OffsetType, SizeType>(
249            builder: &mut ListBuilder<O>,
250            new_elements: &ArrayRef,
251            new_offsets: &[OffsetType],
252            new_sizes: &[SizeType],
253        ) where
254            O: IntegerPType,
255            OffsetType: IntegerPType,
256            SizeType: IntegerPType,
257        {
258            let num_lists = new_offsets.len();
259            debug_assert_eq!(num_lists, new_sizes.len());
260
261            let mut curr_offset = builder.elements_builder.len();
262            let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
263
264            // We need to append each list individually, converting from `ListViewArray` format to
265            // the `ListArray` format that `ListBuilder` expects.
266            for i in 0..new_offsets.len() {
267                let offset: usize = new_offsets[i].as_();
268                let size: usize = new_sizes[i].as_();
269
270                if size > 0 {
271                    let list_elements = new_elements
272                        .slice(offset..offset + size)
273                        .vortex_expect("list builder slice");
274                    builder.elements_builder.extend_from_array(&list_elements);
275                    curr_offset += size;
276                }
277
278                let new_offset =
279                    O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
280
281                offsets_range.set_value(i, new_offset);
282            }
283
284            // SAFETY: We have initialized all `num_lists` values, and since the `offsets` array is
285            // non-nullable, we are done.
286            unsafe { offsets_range.finish() };
287        }
288
289        match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
290            match_each_integer_ptype!(sizes.ptype(), |SizeType| {
291                extend_inner(
292                    self,
293                    elements,
294                    offsets.as_slice::<OffsetType>(),
295                    sizes.as_slice::<SizeType>(),
296                )
297            })
298        })
299    }
300
301    fn reserve_exact(&mut self, additional: usize) {
302        self.elements_builder.reserve_exact(additional);
303        self.offsets_builder.reserve_exact(additional);
304        self.nulls.reserve_exact(additional);
305    }
306
307    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
308        self.nulls = LazyBitBufferBuilder::from_validity_mask(validity);
309    }
310
311    fn finish(&mut self) -> ArrayRef {
312        self.finish_into_list().into_array()
313    }
314
315    fn finish_into_canonical(&mut self) -> Canonical {
316        #[expect(deprecated)]
317        let listview = self.finish_into_list().into_array().to_listview();
318        Canonical::List(listview)
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use std::sync::Arc;
325
326    use Nullability::NonNullable;
327    use Nullability::Nullable;
328    use vortex_buffer::buffer;
329    use vortex_error::VortexExpect;
330
331    use crate::IntoArray;
332    #[expect(deprecated)]
333    use crate::ToCanonical as _;
334    use crate::array_session;
335    use crate::arrays::ChunkedArray;
336    use crate::arrays::PrimitiveArray;
337    use crate::arrays::list::ListArrayExt;
338    use crate::arrays::listview::ListViewArrayExt;
339    use crate::assert_arrays_eq;
340    use crate::builders::ArrayBuilder;
341    use crate::builders::list::ListArray;
342    use crate::builders::list::ListBuilder;
343    use crate::dtype::DType;
344    use crate::dtype::IntegerPType;
345    use crate::dtype::Nullability;
346    use crate::dtype::PType::I32;
347    use crate::executor::VortexSessionExecute;
348    use crate::scalar::Scalar;
349    use crate::validity::Validity;
350
351    #[test]
352    fn test_empty() {
353        let mut builder =
354            ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
355
356        let list = builder.finish();
357        assert_eq!(list.len(), 0);
358    }
359
360    #[test]
361    fn test_values() {
362        let dtype: Arc<DType> = Arc::new(I32.into());
363        let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
364
365        builder
366            .append_value(
367                Scalar::list(
368                    Arc::clone(&dtype),
369                    vec![1i32.into(), 2i32.into(), 3i32.into()],
370                    NonNullable,
371                )
372                .as_list(),
373            )
374            .unwrap();
375
376        builder
377            .append_value(
378                Scalar::list(
379                    dtype,
380                    vec![4i32.into(), 5i32.into(), 6i32.into()],
381                    NonNullable,
382                )
383                .as_list(),
384            )
385            .unwrap();
386
387        let list = builder.finish();
388        assert_eq!(list.len(), 2);
389
390        #[expect(deprecated)]
391        let list_array = list.to_listview();
392
393        assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
394        assert_eq!(list_array.list_elements_at(1).unwrap().len(), 3);
395    }
396
397    #[test]
398    fn test_append_empty_list() {
399        let dtype: Arc<DType> = Arc::new(I32.into());
400        let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
401
402        assert!(
403            builder
404                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
405                .is_ok()
406        )
407    }
408
409    #[test]
410    fn test_nullable_values() {
411        let dtype: Arc<DType> = Arc::new(I32.into());
412        let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
413
414        builder
415            .append_value(
416                Scalar::list(
417                    Arc::clone(&dtype),
418                    vec![1i32.into(), 2i32.into(), 3i32.into()],
419                    NonNullable,
420                )
421                .as_list(),
422            )
423            .unwrap();
424
425        builder
426            .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
427            .unwrap();
428
429        builder
430            .append_value(
431                Scalar::list(
432                    dtype,
433                    vec![4i32.into(), 5i32.into(), 6i32.into()],
434                    NonNullable,
435                )
436                .as_list(),
437            )
438            .unwrap();
439
440        let list = builder.finish();
441        assert_eq!(list.len(), 3);
442
443        #[expect(deprecated)]
444        let list_array = list.to_listview();
445
446        assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
447        assert_eq!(list_array.list_elements_at(1).unwrap().len(), 0);
448        assert_eq!(list_array.list_elements_at(2).unwrap().len(), 3);
449    }
450
451    fn test_extend_builder_gen<O: IntegerPType>() {
452        let list = ListArray::from_iter_opt_slow::<O, _, _>(
453            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
454            Arc::new(I32.into()),
455        )
456        .unwrap();
457        assert_eq!(list.len(), 3);
458
459        let mut ctx = array_session().create_execution_ctx();
460
461        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
462        builder.extend_from_array(&list);
463        builder.extend_from_array(&list);
464        builder.extend_from_array(&list.slice(0..0).unwrap());
465        builder.extend_from_array(&list.slice(1..3).unwrap());
466
467        #[expect(deprecated)]
468        let expected = ListArray::from_iter_opt_slow::<O, _, _>(
469            [
470                Some(vec![0, 1, 2]),
471                None,
472                Some(vec![4, 5]),
473                Some(vec![0, 1, 2]),
474                None,
475                Some(vec![4, 5]),
476                None,
477                Some(vec![4, 5]),
478            ],
479            Arc::new(DType::Primitive(I32, NonNullable)),
480        )
481        .unwrap()
482        .to_listview();
483
484        let actual = builder.finish_into_canonical().into_listview();
485
486        assert_arrays_eq!(actual.elements(), expected.elements(), &mut ctx);
487
488        assert_arrays_eq!(actual.offsets(), expected.offsets(), &mut ctx);
489
490        assert!(
491            actual
492                .validity()
493                .vortex_expect("list validity should be derivable")
494                .mask_eq(
495                    &expected
496                        .validity()
497                        .vortex_expect("list validity should be derivable"),
498                    actual.len(),
499                    &mut ctx,
500                )
501                .unwrap(),
502        );
503    }
504
505    #[test]
506    fn test_extend_builder() {
507        test_extend_builder_gen::<i8>();
508        test_extend_builder_gen::<i16>();
509        test_extend_builder_gen::<i32>();
510        test_extend_builder_gen::<i64>();
511
512        test_extend_builder_gen::<u8>();
513        test_extend_builder_gen::<u16>();
514        test_extend_builder_gen::<u32>();
515        test_extend_builder_gen::<u64>();
516    }
517
518    #[test]
519    pub fn test_array_with_gap() {
520        let one_trailing_unused_element = ListArray::try_new(
521            buffer![1, 2, 3, 4].into_array(),
522            buffer![0, 3].into_array(),
523            Validity::NonNullable,
524        )
525        .unwrap();
526
527        let second_array = ListArray::try_new(
528            buffer![5, 6].into_array(),
529            buffer![0, 2].into_array(),
530            Validity::NonNullable,
531        )
532        .unwrap();
533
534        let chunked_list = ChunkedArray::try_new(
535            vec![
536                one_trailing_unused_element.clone().into_array(),
537                second_array.clone().into_array(),
538            ],
539            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
540        );
541
542        #[expect(deprecated)]
543        let canon_values = chunked_list.unwrap().as_array().to_listview();
544
545        assert_eq!(
546            one_trailing_unused_element
547                .execute_scalar(0, &mut array_session().create_execution_ctx())
548                .unwrap(),
549            canon_values
550                .execute_scalar(0, &mut array_session().create_execution_ctx())
551                .unwrap()
552        );
553        assert_eq!(
554            second_array
555                .execute_scalar(0, &mut array_session().create_execution_ctx())
556                .unwrap(),
557            canon_values
558                .execute_scalar(1, &mut array_session().create_execution_ctx())
559                .unwrap()
560        );
561    }
562
563    #[test]
564    fn test_append_scalar() {
565        let dtype: Arc<DType> = Arc::new(I32.into());
566        let mut builder = ListBuilder::<u64>::with_capacity(Arc::clone(&dtype), Nullable, 20, 10);
567
568        // Test appending a valid list.
569        let list_scalar1 =
570            Scalar::list(Arc::clone(&dtype), vec![1i32.into(), 2i32.into()], Nullable);
571        builder.append_scalar(&list_scalar1).unwrap();
572
573        // Test appending another list.
574        let list_scalar2 = Scalar::list(
575            Arc::clone(&dtype),
576            vec![3i32.into(), 4i32.into(), 5i32.into()],
577            Nullable,
578        );
579        builder.append_scalar(&list_scalar2).unwrap();
580
581        // Test appending null value.
582        let null_scalar = Scalar::null(DType::List(Arc::clone(&dtype), Nullable));
583        builder.append_scalar(&null_scalar).unwrap();
584
585        let array = builder.finish_into_list();
586        assert_eq!(array.len(), 3);
587
588        let mut ctx = array_session().create_execution_ctx();
589
590        // Check actual values using scalar_at.
591
592        let scalar0 = array.execute_scalar(0, &mut ctx).unwrap();
593        let list0 = scalar0.as_list();
594        assert_eq!(list0.len(), 2);
595        if let Some(list0_items) = list0.elements() {
596            assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
597            assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
598        }
599
600        let scalar1 = array.execute_scalar(1, &mut ctx).unwrap();
601        let list1 = scalar1.as_list();
602        assert_eq!(list1.len(), 3);
603        if let Some(list1_items) = list1.elements() {
604            assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
605            assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
606            assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
607        }
608
609        let scalar2 = array.execute_scalar(2, &mut ctx).unwrap();
610        let list2 = scalar2.as_list();
611        assert!(list2.is_null()); // This should be null.
612
613        // Check validity.
614        assert!(
615            array
616                .validity()
617                .vortex_expect("list validity should be derivable")
618                .execute_is_valid(0, &mut ctx)
619                .unwrap()
620        );
621        assert!(
622            array
623                .validity()
624                .vortex_expect("list validity should be derivable")
625                .execute_is_valid(1, &mut ctx)
626                .unwrap()
627        );
628        assert!(
629            !array
630                .validity()
631                .vortex_expect("list validity should be derivable")
632                .execute_is_valid(2, &mut ctx)
633                .unwrap()
634        );
635
636        // Test wrong dtype error.
637        let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
638        let wrong_scalar = Scalar::from(42i32);
639        assert!(builder.append_scalar(&wrong_scalar).is_err());
640    }
641
642    #[test]
643    fn test_append_array_as_list() {
644        let dtype: Arc<DType> = Arc::new(I32.into());
645        let mut ctx = array_session().create_execution_ctx();
646        let mut builder =
647            ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
648
649        // Append a primitive array as a single list entry.
650        let arr1 = buffer![1i32, 2, 3].into_array();
651        builder.append_array_as_list(&arr1, &mut ctx).unwrap();
652
653        // Interleave with a list scalar.
654        builder
655            .append_value(
656                Scalar::list(
657                    Arc::clone(&dtype),
658                    vec![10i32.into(), 11i32.into()],
659                    NonNullable,
660                )
661                .as_list(),
662            )
663            .unwrap();
664
665        // Append another primitive array as a single list entry.
666        let arr2 = buffer![4i32, 5].into_array();
667        builder.append_array_as_list(&arr2, &mut ctx).unwrap();
668
669        // Append an empty array as a single list entry (empty list).
670        let arr3 = buffer![0i32; 0].into_array();
671        builder.append_array_as_list(&arr3, &mut ctx).unwrap();
672
673        // Interleave with another list scalar (empty list).
674        builder
675            .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
676            .unwrap();
677
678        let list = builder.finish_into_list();
679        assert_eq!(list.len(), 5);
680
681        // Verify elements array: [1, 2, 3, 10, 11, 4, 5].
682        assert_arrays_eq!(
683            list.elements(),
684            PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5]),
685            &mut ctx
686        );
687
688        // Verify offsets array.
689        assert_arrays_eq!(
690            list.offsets(),
691            PrimitiveArray::from_iter([0u32, 3, 5, 7, 7, 7]),
692            &mut ctx
693        );
694
695        // Test dtype mismatch error.
696        let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
697        let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
698        assert!(
699            builder
700                .append_array_as_list(&wrong_dtype_arr, &mut ctx)
701                .is_err()
702        );
703    }
704}