Skip to main content

vortex_array/builders/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::Arc;
6
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_panic;
12use vortex_mask::Mask;
13
14use crate::ArrayRef;
15use crate::Canonical;
16use crate::IntoArray;
17use crate::LEGACY_SESSION;
18use crate::VortexSessionExecute;
19use crate::arrays::ListArray;
20use crate::arrays::listview::ListViewArrayExt;
21use crate::builders::ArrayBuilder;
22use crate::builders::DEFAULT_BUILDER_CAPACITY;
23use crate::builders::LazyBitBufferBuilder;
24use crate::builders::PrimitiveBuilder;
25use crate::builders::builder_with_capacity;
26#[expect(deprecated)]
27use crate::canonical::ToCanonical as _;
28use crate::dtype::DType;
29use crate::dtype::IntegerPType;
30use crate::dtype::Nullability;
31use crate::dtype::Nullability::NonNullable;
32use crate::match_each_integer_ptype;
33use crate::scalar::ListScalar;
34use crate::scalar::Scalar;
35
36/// The builder for building a [`ListArray`], parametrized by the [`IntegerPType`] of the `offsets`
37/// builder.
38pub struct ListBuilder<O: IntegerPType> {
39    /// The [`DType`] of the [`ListArray`]. This **must** be a [`DType::List`].
40    dtype: DType,
41
42    /// The builder for the underlying elements of the [`ListArray`].
43    elements_builder: Box<dyn ArrayBuilder>,
44
45    /// The builder for the `offsets` into the `elements` array.
46    offsets_builder: PrimitiveBuilder<O>,
47
48    /// The null map builder of the [`ListArray`].
49    nulls: LazyBitBufferBuilder,
50}
51
52impl<O: IntegerPType> ListBuilder<O> {
53    /// Creates a new `ListBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
54    pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
55        Self::with_capacity(
56            value_dtype,
57            nullability,
58            // We arbitrarily choose 2 times the number of list scalars for the capacity of the
59            // elements builder since we cannot know this ahead of time.
60            DEFAULT_BUILDER_CAPACITY * 2,
61            DEFAULT_BUILDER_CAPACITY,
62        )
63    }
64
65    /// Create a new [`ListArray`] builder with a with the given `capacity`, as well as an initial
66    /// capacity for the `elements` builder (since we cannot know that ahead of time solely based on
67    /// the outer array `capacity`).
68    ///
69    /// # Notes
70    ///
71    /// The number of offsets is one more than the length (# of list scalars) in the array.
72    pub fn with_capacity(
73        value_dtype: Arc<DType>,
74        nullability: Nullability,
75        elements_capacity: usize,
76        capacity: usize,
77    ) -> Self {
78        let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
79        let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
80
81        // The first offset is always 0 and represents an empty list.
82        offsets_builder.append_zero();
83
84        Self {
85            elements_builder,
86            offsets_builder,
87            nulls: LazyBitBufferBuilder::new(capacity),
88            dtype: DType::List(value_dtype, nullability),
89        }
90    }
91
92    /// Appends an array as a single non-null list entry to the builder.
93    ///
94    /// The input `array` must have the same dtype as the element dtype of this list builder.
95    ///
96    /// Note that the list entry will be non-null but the elements themselves are allowed to be null
97    /// (only if the elements [`DType`] in nullable, of course).
98    pub fn append_array_as_list(&mut self, array: &ArrayRef) -> VortexResult<()> {
99        vortex_ensure!(
100            array.dtype() == self.element_dtype(),
101            "Array dtype {:?} does not match list element dtype {:?}",
102            array.dtype(),
103            self.element_dtype()
104        );
105
106        self.elements_builder.extend_from_array(array);
107        self.nulls.append_non_null();
108        self.offsets_builder.append_value(
109            O::from_usize(self.elements_builder.len())
110                .vortex_expect("Failed to convert from usize to O"),
111        );
112
113        Ok(())
114    }
115
116    /// Appends a list `value` to the builder.
117    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
118        match value.elements() {
119            None => {
120                if self.dtype.nullability() == NonNullable {
121                    vortex_bail!("Cannot append null value to non-nullable list");
122                }
123                self.append_null();
124            }
125            Some(elements) => {
126                for scalar in elements {
127                    // TODO(connor): This is slow, we should be able to append multiple values at
128                    // once, or the list scalar should hold an Array
129                    self.elements_builder.append_scalar(&scalar)?;
130                }
131
132                self.nulls.append_non_null();
133                self.offsets_builder.append_value(
134                    O::from_usize(self.elements_builder.len())
135                        .vortex_expect("Failed to convert from usize to O"),
136                );
137            }
138        }
139
140        Ok(())
141    }
142
143    /// Finishes the builder directly into a [`ListArray`].
144    pub fn finish_into_list(&mut self) -> ListArray {
145        assert_eq!(
146            self.offsets_builder.len(),
147            self.nulls.len() + 1,
148            "offsets length must be one more than nulls length."
149        );
150
151        ListArray::try_new(
152            self.elements_builder.finish(),
153            self.offsets_builder.finish(),
154            self.nulls.finish_with_nullability(self.dtype.nullability()),
155        )
156        .vortex_expect("Buffer, offsets, and validity must have same length.")
157    }
158
159    /// The [`DType`] of the inner elements. Note that this is **not** the same as the [`DType`] of
160    /// the outer `List`.
161    pub fn element_dtype(&self) -> &DType {
162        let DType::List(element_dtype, _) = &self.dtype else {
163            vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
164        };
165
166        element_dtype
167    }
168}
169
170impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
171    fn as_any(&self) -> &dyn Any {
172        self
173    }
174
175    fn as_any_mut(&mut self) -> &mut dyn Any {
176        self
177    }
178
179    fn dtype(&self) -> &DType {
180        &self.dtype
181    }
182
183    fn len(&self) -> usize {
184        self.nulls.len()
185    }
186
187    fn append_zeros(&mut self, n: usize) {
188        let curr_len = self.elements_builder.len();
189        for _ in 0..n {
190            self.offsets_builder.append_value(
191                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
192            )
193        }
194        self.nulls.append_n_non_nulls(n);
195    }
196
197    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
198        let curr_len = self.elements_builder.len();
199        for _ in 0..n {
200            // A list with a null element is can be a list with a zero-span offset and a validity
201            // bit set
202            self.offsets_builder.append_value(
203                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
204            )
205        }
206        self.nulls.append_n_nulls(n);
207    }
208
209    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
210        vortex_ensure!(
211            scalar.dtype() == self.dtype(),
212            "ListBuilder expected scalar with dtype {}, got {}",
213            self.dtype(),
214            scalar.dtype()
215        );
216
217        self.append_value(scalar.as_list())
218    }
219
220    unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
221        #[expect(deprecated)]
222        let list = array.to_listview();
223        if list.is_empty() {
224            return;
225        }
226
227        // Append validity information.
228        self.nulls.append_validity_mask(
229            &array
230                .validity()
231                .vortex_expect("validity_mask in extend_from_array_unchecked")
232                .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
233                .vortex_expect("Failed to compute validity mask"),
234        );
235
236        // Note that `ListViewArray` has `n` offsets and sizes, not `n+1` offsets like `ListArray`.
237        let elements = list.elements();
238        #[expect(deprecated)]
239        let offsets = list.offsets().to_primitive();
240        #[expect(deprecated)]
241        let sizes = list.sizes().to_primitive();
242
243        fn extend_inner<O, OffsetType, SizeType>(
244            builder: &mut ListBuilder<O>,
245            new_elements: &ArrayRef,
246            new_offsets: &[OffsetType],
247            new_sizes: &[SizeType],
248        ) where
249            O: IntegerPType,
250            OffsetType: IntegerPType,
251            SizeType: IntegerPType,
252        {
253            let num_lists = new_offsets.len();
254            debug_assert_eq!(num_lists, new_sizes.len());
255
256            let mut curr_offset = builder.elements_builder.len();
257            let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
258
259            // We need to append each list individually, converting from `ListViewArray` format to
260            // the `ListArray` format that `ListBuilder` expects.
261            for i in 0..new_offsets.len() {
262                let offset: usize = new_offsets[i].as_();
263                let size: usize = new_sizes[i].as_();
264
265                if size > 0 {
266                    let list_elements = new_elements
267                        .slice(offset..offset + size)
268                        .vortex_expect("list builder slice");
269                    builder.elements_builder.extend_from_array(&list_elements);
270                    curr_offset += size;
271                }
272
273                let new_offset =
274                    O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
275
276                offsets_range.set_value(i, new_offset);
277            }
278
279            // SAFETY: We have initialized all `num_lists` values, and since the `offsets` array is
280            // non-nullable, we are done.
281            unsafe { offsets_range.finish() };
282        }
283
284        match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
285            match_each_integer_ptype!(sizes.ptype(), |SizeType| {
286                extend_inner(
287                    self,
288                    elements,
289                    offsets.as_slice::<OffsetType>(),
290                    sizes.as_slice::<SizeType>(),
291                )
292            })
293        })
294    }
295
296    fn reserve_exact(&mut self, additional: usize) {
297        self.elements_builder.reserve_exact(additional);
298        self.offsets_builder.reserve_exact(additional);
299        self.nulls.reserve_exact(additional);
300    }
301
302    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
303        self.nulls = LazyBitBufferBuilder::from_validity_mask(validity);
304    }
305
306    fn finish(&mut self) -> ArrayRef {
307        self.finish_into_list().into_array()
308    }
309
310    fn finish_into_canonical(&mut self) -> Canonical {
311        #[expect(deprecated)]
312        let listview = self.finish_into_list().into_array().to_listview();
313        Canonical::List(listview)
314    }
315}
316
317#[cfg(test)]
318mod tests {
319    use std::sync::Arc;
320
321    use Nullability::NonNullable;
322    use Nullability::Nullable;
323    use vortex_buffer::buffer;
324    use vortex_error::VortexExpect;
325
326    use crate::IntoArray;
327    use crate::LEGACY_SESSION;
328    #[expect(deprecated)]
329    use crate::ToCanonical as _;
330    use crate::arrays::ChunkedArray;
331    use crate::arrays::PrimitiveArray;
332    use crate::arrays::list::ListArrayExt;
333    use crate::arrays::listview::ListViewArrayExt;
334    use crate::assert_arrays_eq;
335    use crate::builders::ArrayBuilder;
336    use crate::builders::list::ListArray;
337    use crate::builders::list::ListBuilder;
338    use crate::dtype::DType;
339    use crate::dtype::IntegerPType;
340    use crate::dtype::Nullability;
341    use crate::dtype::PType::I32;
342    use crate::executor::VortexSessionExecute;
343    use crate::scalar::Scalar;
344    use crate::validity::Validity;
345
346    #[test]
347    fn test_empty() {
348        let mut builder =
349            ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
350
351        let list = builder.finish();
352        assert_eq!(list.len(), 0);
353    }
354
355    #[test]
356    fn test_values() {
357        let dtype: Arc<DType> = Arc::new(I32.into());
358        let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
359
360        builder
361            .append_value(
362                Scalar::list(
363                    Arc::clone(&dtype),
364                    vec![1i32.into(), 2i32.into(), 3i32.into()],
365                    NonNullable,
366                )
367                .as_list(),
368            )
369            .unwrap();
370
371        builder
372            .append_value(
373                Scalar::list(
374                    dtype,
375                    vec![4i32.into(), 5i32.into(), 6i32.into()],
376                    NonNullable,
377                )
378                .as_list(),
379            )
380            .unwrap();
381
382        let list = builder.finish();
383        assert_eq!(list.len(), 2);
384
385        #[expect(deprecated)]
386        let list_array = list.to_listview();
387
388        assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
389        assert_eq!(list_array.list_elements_at(1).unwrap().len(), 3);
390    }
391
392    #[test]
393    fn test_append_empty_list() {
394        let dtype: Arc<DType> = Arc::new(I32.into());
395        let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
396
397        assert!(
398            builder
399                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
400                .is_ok()
401        )
402    }
403
404    #[test]
405    fn test_nullable_values() {
406        let dtype: Arc<DType> = Arc::new(I32.into());
407        let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
408
409        builder
410            .append_value(
411                Scalar::list(
412                    Arc::clone(&dtype),
413                    vec![1i32.into(), 2i32.into(), 3i32.into()],
414                    NonNullable,
415                )
416                .as_list(),
417            )
418            .unwrap();
419
420        builder
421            .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
422            .unwrap();
423
424        builder
425            .append_value(
426                Scalar::list(
427                    dtype,
428                    vec![4i32.into(), 5i32.into(), 6i32.into()],
429                    NonNullable,
430                )
431                .as_list(),
432            )
433            .unwrap();
434
435        let list = builder.finish();
436        assert_eq!(list.len(), 3);
437
438        #[expect(deprecated)]
439        let list_array = list.to_listview();
440
441        assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
442        assert_eq!(list_array.list_elements_at(1).unwrap().len(), 0);
443        assert_eq!(list_array.list_elements_at(2).unwrap().len(), 3);
444    }
445
446    fn test_extend_builder_gen<O: IntegerPType>() {
447        let list = ListArray::from_iter_opt_slow::<O, _, _>(
448            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
449            Arc::new(I32.into()),
450        )
451        .unwrap();
452        assert_eq!(list.len(), 3);
453
454        let mut ctx = LEGACY_SESSION.create_execution_ctx();
455
456        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
457        builder.extend_from_array(&list);
458        builder.extend_from_array(&list);
459        builder.extend_from_array(&list.slice(0..0).unwrap());
460        builder.extend_from_array(&list.slice(1..3).unwrap());
461
462        #[expect(deprecated)]
463        let expected = ListArray::from_iter_opt_slow::<O, _, _>(
464            [
465                Some(vec![0, 1, 2]),
466                None,
467                Some(vec![4, 5]),
468                Some(vec![0, 1, 2]),
469                None,
470                Some(vec![4, 5]),
471                None,
472                Some(vec![4, 5]),
473            ],
474            Arc::new(DType::Primitive(I32, NonNullable)),
475        )
476        .unwrap()
477        .to_listview();
478
479        let actual = builder.finish_into_canonical().into_listview();
480
481        assert_arrays_eq!(actual.elements(), expected.elements());
482
483        assert_arrays_eq!(actual.offsets(), expected.offsets());
484
485        assert!(
486            actual
487                .validity()
488                .vortex_expect("list validity should be derivable")
489                .mask_eq(
490                    &expected
491                        .validity()
492                        .vortex_expect("list validity should be derivable"),
493                    actual.len(),
494                    &mut ctx,
495                )
496                .unwrap(),
497        );
498    }
499
500    #[test]
501    fn test_extend_builder() {
502        test_extend_builder_gen::<i8>();
503        test_extend_builder_gen::<i16>();
504        test_extend_builder_gen::<i32>();
505        test_extend_builder_gen::<i64>();
506
507        test_extend_builder_gen::<u8>();
508        test_extend_builder_gen::<u16>();
509        test_extend_builder_gen::<u32>();
510        test_extend_builder_gen::<u64>();
511    }
512
513    #[test]
514    pub fn test_array_with_gap() {
515        let one_trailing_unused_element = ListArray::try_new(
516            buffer![1, 2, 3, 4].into_array(),
517            buffer![0, 3].into_array(),
518            Validity::NonNullable,
519        )
520        .unwrap();
521
522        let second_array = ListArray::try_new(
523            buffer![5, 6].into_array(),
524            buffer![0, 2].into_array(),
525            Validity::NonNullable,
526        )
527        .unwrap();
528
529        let chunked_list = ChunkedArray::try_new(
530            vec![
531                one_trailing_unused_element.clone().into_array(),
532                second_array.clone().into_array(),
533            ],
534            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
535        );
536
537        #[expect(deprecated)]
538        let canon_values = chunked_list.unwrap().as_array().to_listview();
539
540        assert_eq!(
541            one_trailing_unused_element
542                .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
543                .unwrap(),
544            canon_values
545                .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
546                .unwrap()
547        );
548        assert_eq!(
549            second_array
550                .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
551                .unwrap(),
552            canon_values
553                .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
554                .unwrap()
555        );
556    }
557
558    #[test]
559    fn test_append_scalar() {
560        let dtype: Arc<DType> = Arc::new(I32.into());
561        let mut builder = ListBuilder::<u64>::with_capacity(Arc::clone(&dtype), Nullable, 20, 10);
562
563        // Test appending a valid list.
564        let list_scalar1 =
565            Scalar::list(Arc::clone(&dtype), vec![1i32.into(), 2i32.into()], Nullable);
566        builder.append_scalar(&list_scalar1).unwrap();
567
568        // Test appending another list.
569        let list_scalar2 = Scalar::list(
570            Arc::clone(&dtype),
571            vec![3i32.into(), 4i32.into(), 5i32.into()],
572            Nullable,
573        );
574        builder.append_scalar(&list_scalar2).unwrap();
575
576        // Test appending null value.
577        let null_scalar = Scalar::null(DType::List(Arc::clone(&dtype), Nullable));
578        builder.append_scalar(&null_scalar).unwrap();
579
580        let array = builder.finish_into_list();
581        assert_eq!(array.len(), 3);
582
583        let mut ctx = LEGACY_SESSION.create_execution_ctx();
584
585        // Check actual values using scalar_at.
586
587        let scalar0 = array.execute_scalar(0, &mut ctx).unwrap();
588        let list0 = scalar0.as_list();
589        assert_eq!(list0.len(), 2);
590        if let Some(list0_items) = list0.elements() {
591            assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
592            assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
593        }
594
595        let scalar1 = array.execute_scalar(1, &mut ctx).unwrap();
596        let list1 = scalar1.as_list();
597        assert_eq!(list1.len(), 3);
598        if let Some(list1_items) = list1.elements() {
599            assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
600            assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
601            assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
602        }
603
604        let scalar2 = array.execute_scalar(2, &mut ctx).unwrap();
605        let list2 = scalar2.as_list();
606        assert!(list2.is_null()); // This should be null.
607
608        // Check validity.
609        assert!(
610            array
611                .validity()
612                .vortex_expect("list validity should be derivable")
613                .execute_is_valid(0, &mut ctx)
614                .unwrap()
615        );
616        assert!(
617            array
618                .validity()
619                .vortex_expect("list validity should be derivable")
620                .execute_is_valid(1, &mut ctx)
621                .unwrap()
622        );
623        assert!(
624            !array
625                .validity()
626                .vortex_expect("list validity should be derivable")
627                .execute_is_valid(2, &mut ctx)
628                .unwrap()
629        );
630
631        // Test wrong dtype error.
632        let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
633        let wrong_scalar = Scalar::from(42i32);
634        assert!(builder.append_scalar(&wrong_scalar).is_err());
635    }
636
637    #[test]
638    fn test_append_array_as_list() {
639        let dtype: Arc<DType> = Arc::new(I32.into());
640        let mut builder =
641            ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
642
643        // Append a primitive array as a single list entry.
644        let arr1 = buffer![1i32, 2, 3].into_array();
645        builder.append_array_as_list(&arr1).unwrap();
646
647        // Interleave with a list scalar.
648        builder
649            .append_value(
650                Scalar::list(
651                    Arc::clone(&dtype),
652                    vec![10i32.into(), 11i32.into()],
653                    NonNullable,
654                )
655                .as_list(),
656            )
657            .unwrap();
658
659        // Append another primitive array as a single list entry.
660        let arr2 = buffer![4i32, 5].into_array();
661        builder.append_array_as_list(&arr2).unwrap();
662
663        // Append an empty array as a single list entry (empty list).
664        let arr3 = buffer![0i32; 0].into_array();
665        builder.append_array_as_list(&arr3).unwrap();
666
667        // Interleave with another list scalar (empty list).
668        builder
669            .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
670            .unwrap();
671
672        let list = builder.finish_into_list();
673        assert_eq!(list.len(), 5);
674
675        // Verify elements array: [1, 2, 3, 10, 11, 4, 5].
676        assert_arrays_eq!(
677            list.elements(),
678            PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
679        );
680
681        // Verify offsets array.
682        assert_arrays_eq!(
683            list.offsets(),
684            PrimitiveArray::from_iter([0u32, 3, 5, 7, 7, 7])
685        );
686
687        // Test dtype mismatch error.
688        let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
689        let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
690        assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
691    }
692}