Skip to main content

vortex_array/builders/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::Arc;
6
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_panic;
12use vortex_mask::Mask;
13
14use crate::ArrayRef;
15use crate::Canonical;
16use crate::IntoArray;
17use crate::LEGACY_SESSION;
18use crate::VortexSessionExecute;
19use crate::arrays::ListArray;
20use crate::arrays::listview::ListViewArrayExt;
21use crate::builders::ArrayBuilder;
22use crate::builders::DEFAULT_BUILDER_CAPACITY;
23use crate::builders::LazyBitBufferBuilder;
24use crate::builders::PrimitiveBuilder;
25use crate::builders::builder_with_capacity;
26#[expect(deprecated)]
27use crate::canonical::ToCanonical as _;
28use crate::dtype::DType;
29use crate::dtype::IntegerPType;
30use crate::dtype::Nullability;
31use crate::dtype::Nullability::NonNullable;
32use crate::match_each_integer_ptype;
33use crate::scalar::ListScalar;
34use crate::scalar::Scalar;
35
36/// The builder for building a [`ListArray`], parametrized by the [`IntegerPType`] of the `offsets`
37/// builder.
38pub struct ListBuilder<O: IntegerPType> {
39    /// The [`DType`] of the [`ListArray`]. This **must** be a [`DType::List`].
40    dtype: DType,
41
42    /// The builder for the underlying elements of the [`ListArray`].
43    elements_builder: Box<dyn ArrayBuilder>,
44
45    /// The builder for the `offsets` into the `elements` array.
46    offsets_builder: PrimitiveBuilder<O>,
47
48    /// The null map builder of the [`ListArray`].
49    nulls: LazyBitBufferBuilder,
50}
51
52impl<O: IntegerPType> ListBuilder<O> {
53    /// Creates a new `ListBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
54    pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
55        Self::with_capacity(
56            value_dtype,
57            nullability,
58            // We arbitrarily choose 2 times the number of list scalars for the capacity of the
59            // elements builder since we cannot know this ahead of time.
60            DEFAULT_BUILDER_CAPACITY * 2,
61            DEFAULT_BUILDER_CAPACITY,
62        )
63    }
64
65    /// Create a new [`ListArray`] builder with a with the given `capacity`, as well as an initial
66    /// capacity for the `elements` builder (since we cannot know that ahead of time solely based on
67    /// the outer array `capacity`).
68    ///
69    /// # Notes
70    ///
71    /// The number of offsets is one more than the length (# of list scalars) in the array.
72    pub fn with_capacity(
73        value_dtype: Arc<DType>,
74        nullability: Nullability,
75        elements_capacity: usize,
76        capacity: usize,
77    ) -> Self {
78        let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
79        let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
80
81        // The first offset is always 0 and represents an empty list.
82        offsets_builder.append_zero();
83
84        Self {
85            elements_builder,
86            offsets_builder,
87            nulls: LazyBitBufferBuilder::new(capacity),
88            dtype: DType::List(value_dtype, nullability),
89        }
90    }
91
92    /// Appends an array as a single non-null list entry to the builder.
93    ///
94    /// The input `array` must have the same dtype as the element dtype of this list builder.
95    ///
96    /// Note that the list entry will be non-null but the elements themselves are allowed to be null
97    /// (only if the elements [`DType`] in nullable, of course).
98    pub fn append_array_as_list(&mut self, array: &ArrayRef) -> VortexResult<()> {
99        vortex_ensure!(
100            array.dtype() == self.element_dtype(),
101            "Array dtype {:?} does not match list element dtype {:?}",
102            array.dtype(),
103            self.element_dtype()
104        );
105
106        self.elements_builder.extend_from_array(array);
107        self.nulls.append_non_null();
108        self.offsets_builder.append_value(
109            O::from_usize(self.elements_builder.len())
110                .vortex_expect("Failed to convert from usize to O"),
111        );
112
113        Ok(())
114    }
115
116    /// Appends a list `value` to the builder.
117    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
118        match value.elements() {
119            None => {
120                if self.dtype.nullability() == NonNullable {
121                    vortex_bail!("Cannot append null value to non-nullable list");
122                }
123                self.append_null();
124            }
125            Some(elements) => {
126                for scalar in elements {
127                    // TODO(connor): This is slow, we should be able to append multiple values at
128                    // once, or the list scalar should hold an Array
129                    self.elements_builder.append_scalar(&scalar)?;
130                }
131
132                self.nulls.append_non_null();
133                self.offsets_builder.append_value(
134                    O::from_usize(self.elements_builder.len())
135                        .vortex_expect("Failed to convert from usize to O"),
136                );
137            }
138        }
139
140        Ok(())
141    }
142
143    /// Finishes the builder directly into a [`ListArray`].
144    pub fn finish_into_list(&mut self) -> ListArray {
145        assert_eq!(
146            self.offsets_builder.len(),
147            self.nulls.len() + 1,
148            "offsets length must be one more than nulls length."
149        );
150
151        ListArray::try_new(
152            self.elements_builder.finish(),
153            self.offsets_builder.finish(),
154            self.nulls.finish_with_nullability(self.dtype.nullability()),
155        )
156        .vortex_expect("Buffer, offsets, and validity must have same length.")
157    }
158
159    /// The [`DType`] of the inner elements. Note that this is **not** the same as the [`DType`] of
160    /// the outer `List`.
161    pub fn element_dtype(&self) -> &DType {
162        let DType::List(element_dtype, _) = &self.dtype else {
163            vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
164        };
165
166        element_dtype
167    }
168}
169
170impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
171    fn as_any(&self) -> &dyn Any {
172        self
173    }
174
175    fn as_any_mut(&mut self) -> &mut dyn Any {
176        self
177    }
178
179    fn dtype(&self) -> &DType {
180        &self.dtype
181    }
182
183    fn len(&self) -> usize {
184        self.nulls.len()
185    }
186
187    fn append_zeros(&mut self, n: usize) {
188        let curr_len = self.elements_builder.len();
189        for _ in 0..n {
190            self.offsets_builder.append_value(
191                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
192            )
193        }
194        self.nulls.append_n_non_nulls(n);
195    }
196
197    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
198        let curr_len = self.elements_builder.len();
199        for _ in 0..n {
200            // A list with a null element is can be a list with a zero-span offset and a validity
201            // bit set
202            self.offsets_builder.append_value(
203                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
204            )
205        }
206        self.nulls.append_n_nulls(n);
207    }
208
209    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
210        vortex_ensure!(
211            scalar.dtype() == self.dtype(),
212            "ListBuilder expected scalar with dtype {}, got {}",
213            self.dtype(),
214            scalar.dtype()
215        );
216
217        self.append_value(scalar.as_list())
218    }
219
220    unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
221        #[expect(deprecated)]
222        let list = array.to_listview();
223        if list.is_empty() {
224            return;
225        }
226
227        // Append validity information.
228        self.nulls.append_validity_mask(
229            array
230                .validity()
231                .vortex_expect("validity_mask in extend_from_array_unchecked")
232                .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
233                .vortex_expect("Failed to compute validity mask"),
234        );
235
236        // Note that `ListViewArray` has `n` offsets and sizes, not `n+1` offsets like `ListArray`.
237        let elements = list.elements();
238        #[expect(deprecated)]
239        let offsets = list.offsets().to_primitive();
240        #[expect(deprecated)]
241        let sizes = list.sizes().to_primitive();
242
243        fn extend_inner<O, OffsetType, SizeType>(
244            builder: &mut ListBuilder<O>,
245            new_elements: &ArrayRef,
246            new_offsets: &[OffsetType],
247            new_sizes: &[SizeType],
248        ) where
249            O: IntegerPType,
250            OffsetType: IntegerPType,
251            SizeType: IntegerPType,
252        {
253            let num_lists = new_offsets.len();
254            debug_assert_eq!(num_lists, new_sizes.len());
255
256            let mut curr_offset = builder.elements_builder.len();
257            let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
258
259            // We need to append each list individually, converting from `ListViewArray` format to
260            // the `ListArray` format that `ListBuilder` expects.
261            for i in 0..new_offsets.len() {
262                let offset: usize = new_offsets[i].as_();
263                let size: usize = new_sizes[i].as_();
264
265                if size > 0 {
266                    let list_elements = new_elements
267                        .slice(offset..offset + size)
268                        .vortex_expect("list builder slice");
269                    builder.elements_builder.extend_from_array(&list_elements);
270                    curr_offset += size;
271                }
272
273                let new_offset =
274                    O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
275
276                offsets_range.set_value(i, new_offset);
277            }
278
279            // SAFETY: We have initialized all `num_lists` values, and since the `offsets` array is
280            // non-nullable, we are done.
281            unsafe { offsets_range.finish() };
282        }
283
284        match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
285            match_each_integer_ptype!(sizes.ptype(), |SizeType| {
286                extend_inner(
287                    self,
288                    elements,
289                    offsets.as_slice::<OffsetType>(),
290                    sizes.as_slice::<SizeType>(),
291                )
292            })
293        })
294    }
295
296    fn reserve_exact(&mut self, additional: usize) {
297        self.elements_builder.reserve_exact(additional);
298        self.offsets_builder.reserve_exact(additional);
299        self.nulls.reserve_exact(additional);
300    }
301
302    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
303        self.nulls = LazyBitBufferBuilder::new(validity.len());
304        self.nulls.append_validity_mask(validity);
305    }
306
307    fn finish(&mut self) -> ArrayRef {
308        self.finish_into_list().into_array()
309    }
310
311    fn finish_into_canonical(&mut self) -> Canonical {
312        #[expect(deprecated)]
313        let listview = self.finish_into_list().into_array().to_listview();
314        Canonical::List(listview)
315    }
316}
317
318#[cfg(test)]
319mod tests {
320    use std::sync::Arc;
321
322    use Nullability::NonNullable;
323    use Nullability::Nullable;
324    use vortex_buffer::buffer;
325    use vortex_error::VortexExpect;
326
327    use crate::IntoArray;
328    use crate::LEGACY_SESSION;
329    #[expect(deprecated)]
330    use crate::ToCanonical as _;
331    use crate::arrays::ChunkedArray;
332    use crate::arrays::PrimitiveArray;
333    use crate::arrays::list::ListArrayExt;
334    use crate::arrays::listview::ListViewArrayExt;
335    use crate::assert_arrays_eq;
336    use crate::builders::ArrayBuilder;
337    use crate::builders::list::ListArray;
338    use crate::builders::list::ListBuilder;
339    use crate::dtype::DType;
340    use crate::dtype::IntegerPType;
341    use crate::dtype::Nullability;
342    use crate::dtype::PType::I32;
343    use crate::executor::VortexSessionExecute;
344    use crate::scalar::Scalar;
345    use crate::validity::Validity;
346
347    #[test]
348    fn test_empty() {
349        let mut builder =
350            ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
351
352        let list = builder.finish();
353        assert_eq!(list.len(), 0);
354    }
355
356    #[test]
357    fn test_values() {
358        let dtype: Arc<DType> = Arc::new(I32.into());
359        let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
360
361        builder
362            .append_value(
363                Scalar::list(
364                    Arc::clone(&dtype),
365                    vec![1i32.into(), 2i32.into(), 3i32.into()],
366                    NonNullable,
367                )
368                .as_list(),
369            )
370            .unwrap();
371
372        builder
373            .append_value(
374                Scalar::list(
375                    dtype,
376                    vec![4i32.into(), 5i32.into(), 6i32.into()],
377                    NonNullable,
378                )
379                .as_list(),
380            )
381            .unwrap();
382
383        let list = builder.finish();
384        assert_eq!(list.len(), 2);
385
386        #[expect(deprecated)]
387        let list_array = list.to_listview();
388
389        assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
390        assert_eq!(list_array.list_elements_at(1).unwrap().len(), 3);
391    }
392
393    #[test]
394    fn test_append_empty_list() {
395        let dtype: Arc<DType> = Arc::new(I32.into());
396        let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
397
398        assert!(
399            builder
400                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
401                .is_ok()
402        )
403    }
404
405    #[test]
406    fn test_nullable_values() {
407        let dtype: Arc<DType> = Arc::new(I32.into());
408        let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
409
410        builder
411            .append_value(
412                Scalar::list(
413                    Arc::clone(&dtype),
414                    vec![1i32.into(), 2i32.into(), 3i32.into()],
415                    NonNullable,
416                )
417                .as_list(),
418            )
419            .unwrap();
420
421        builder
422            .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
423            .unwrap();
424
425        builder
426            .append_value(
427                Scalar::list(
428                    dtype,
429                    vec![4i32.into(), 5i32.into(), 6i32.into()],
430                    NonNullable,
431                )
432                .as_list(),
433            )
434            .unwrap();
435
436        let list = builder.finish();
437        assert_eq!(list.len(), 3);
438
439        #[expect(deprecated)]
440        let list_array = list.to_listview();
441
442        assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
443        assert_eq!(list_array.list_elements_at(1).unwrap().len(), 0);
444        assert_eq!(list_array.list_elements_at(2).unwrap().len(), 3);
445    }
446
447    fn test_extend_builder_gen<O: IntegerPType>() {
448        let list = ListArray::from_iter_opt_slow::<O, _, _>(
449            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
450            Arc::new(I32.into()),
451        )
452        .unwrap();
453        assert_eq!(list.len(), 3);
454
455        let mut ctx = LEGACY_SESSION.create_execution_ctx();
456
457        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
458        builder.extend_from_array(&list);
459        builder.extend_from_array(&list);
460        builder.extend_from_array(&list.slice(0..0).unwrap());
461        builder.extend_from_array(&list.slice(1..3).unwrap());
462
463        #[expect(deprecated)]
464        let expected = ListArray::from_iter_opt_slow::<O, _, _>(
465            [
466                Some(vec![0, 1, 2]),
467                None,
468                Some(vec![4, 5]),
469                Some(vec![0, 1, 2]),
470                None,
471                Some(vec![4, 5]),
472                None,
473                Some(vec![4, 5]),
474            ],
475            Arc::new(DType::Primitive(I32, NonNullable)),
476        )
477        .unwrap()
478        .to_listview();
479
480        let actual = builder.finish_into_canonical().into_listview();
481
482        assert_arrays_eq!(actual.elements(), expected.elements());
483
484        assert_arrays_eq!(actual.offsets(), expected.offsets());
485
486        assert!(
487            actual
488                .validity()
489                .vortex_expect("list validity should be derivable")
490                .mask_eq(
491                    &expected
492                        .validity()
493                        .vortex_expect("list validity should be derivable"),
494                    &mut ctx,
495                )
496                .unwrap(),
497        );
498    }
499
500    #[test]
501    fn test_extend_builder() {
502        test_extend_builder_gen::<i8>();
503        test_extend_builder_gen::<i16>();
504        test_extend_builder_gen::<i32>();
505        test_extend_builder_gen::<i64>();
506
507        test_extend_builder_gen::<u8>();
508        test_extend_builder_gen::<u16>();
509        test_extend_builder_gen::<u32>();
510        test_extend_builder_gen::<u64>();
511    }
512
513    #[test]
514    pub fn test_array_with_gap() {
515        let one_trailing_unused_element = ListArray::try_new(
516            buffer![1, 2, 3, 4].into_array(),
517            buffer![0, 3].into_array(),
518            Validity::NonNullable,
519        )
520        .unwrap();
521
522        let second_array = ListArray::try_new(
523            buffer![5, 6].into_array(),
524            buffer![0, 2].into_array(),
525            Validity::NonNullable,
526        )
527        .unwrap();
528
529        let chunked_list = ChunkedArray::try_new(
530            vec![
531                one_trailing_unused_element.clone().into_array(),
532                second_array.clone().into_array(),
533            ],
534            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
535        );
536
537        #[expect(deprecated)]
538        let canon_values = chunked_list.unwrap().as_array().to_listview();
539
540        assert_eq!(
541            one_trailing_unused_element
542                .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
543                .unwrap(),
544            canon_values
545                .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
546                .unwrap()
547        );
548        assert_eq!(
549            second_array
550                .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
551                .unwrap(),
552            canon_values
553                .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
554                .unwrap()
555        );
556    }
557
558    #[test]
559    fn test_append_scalar() {
560        let dtype: Arc<DType> = Arc::new(I32.into());
561        let mut builder = ListBuilder::<u64>::with_capacity(Arc::clone(&dtype), Nullable, 20, 10);
562
563        // Test appending a valid list.
564        let list_scalar1 =
565            Scalar::list(Arc::clone(&dtype), vec![1i32.into(), 2i32.into()], Nullable);
566        builder.append_scalar(&list_scalar1).unwrap();
567
568        // Test appending another list.
569        let list_scalar2 = Scalar::list(
570            Arc::clone(&dtype),
571            vec![3i32.into(), 4i32.into(), 5i32.into()],
572            Nullable,
573        );
574        builder.append_scalar(&list_scalar2).unwrap();
575
576        // Test appending null value.
577        let null_scalar = Scalar::null(DType::List(Arc::clone(&dtype), Nullable));
578        builder.append_scalar(&null_scalar).unwrap();
579
580        let array = builder.finish_into_list();
581        assert_eq!(array.len(), 3);
582
583        // Check actual values using scalar_at.
584
585        let scalar0 = array
586            .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
587            .unwrap();
588        let list0 = scalar0.as_list();
589        assert_eq!(list0.len(), 2);
590        if let Some(list0_items) = list0.elements() {
591            assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
592            assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
593        }
594
595        let scalar1 = array
596            .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
597            .unwrap();
598        let list1 = scalar1.as_list();
599        assert_eq!(list1.len(), 3);
600        if let Some(list1_items) = list1.elements() {
601            assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
602            assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
603            assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
604        }
605
606        let scalar2 = array
607            .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
608            .unwrap();
609        let list2 = scalar2.as_list();
610        assert!(list2.is_null()); // This should be null.
611
612        // Check validity.
613        assert!(
614            array
615                .validity()
616                .vortex_expect("list validity should be derivable")
617                .is_valid(0)
618                .unwrap()
619        );
620        assert!(
621            array
622                .validity()
623                .vortex_expect("list validity should be derivable")
624                .is_valid(1)
625                .unwrap()
626        );
627        assert!(
628            !array
629                .validity()
630                .vortex_expect("list validity should be derivable")
631                .is_valid(2)
632                .unwrap()
633        );
634
635        // Test wrong dtype error.
636        let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
637        let wrong_scalar = Scalar::from(42i32);
638        assert!(builder.append_scalar(&wrong_scalar).is_err());
639    }
640
641    #[test]
642    fn test_append_array_as_list() {
643        let dtype: Arc<DType> = Arc::new(I32.into());
644        let mut builder =
645            ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
646
647        // Append a primitive array as a single list entry.
648        let arr1 = buffer![1i32, 2, 3].into_array();
649        builder.append_array_as_list(&arr1).unwrap();
650
651        // Interleave with a list scalar.
652        builder
653            .append_value(
654                Scalar::list(
655                    Arc::clone(&dtype),
656                    vec![10i32.into(), 11i32.into()],
657                    NonNullable,
658                )
659                .as_list(),
660            )
661            .unwrap();
662
663        // Append another primitive array as a single list entry.
664        let arr2 = buffer![4i32, 5].into_array();
665        builder.append_array_as_list(&arr2).unwrap();
666
667        // Append an empty array as a single list entry (empty list).
668        let arr3 = buffer![0i32; 0].into_array();
669        builder.append_array_as_list(&arr3).unwrap();
670
671        // Interleave with another list scalar (empty list).
672        builder
673            .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
674            .unwrap();
675
676        let list = builder.finish_into_list();
677        assert_eq!(list.len(), 5);
678
679        // Verify elements array: [1, 2, 3, 10, 11, 4, 5].
680        assert_arrays_eq!(
681            list.elements(),
682            PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
683        );
684
685        // Verify offsets array.
686        assert_arrays_eq!(
687            list.offsets(),
688            PrimitiveArray::from_iter([0u32, 3, 5, 7, 7, 7])
689        );
690
691        // Test dtype mismatch error.
692        let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
693        let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
694        assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
695    }
696}