Skip to main content

vortex_array/builders/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::DType;
8use vortex_dtype::IntegerPType;
9use vortex_dtype::Nullability;
10use vortex_dtype::Nullability::NonNullable;
11use vortex_dtype::match_each_integer_ptype;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_panic;
17use vortex_mask::Mask;
18
19use crate::Array;
20use crate::ArrayRef;
21use crate::IntoArray;
22use crate::arrays::ListArray;
23use crate::builders::ArrayBuilder;
24use crate::builders::DEFAULT_BUILDER_CAPACITY;
25use crate::builders::LazyBitBufferBuilder;
26use crate::builders::PrimitiveBuilder;
27use crate::builders::builder_with_capacity;
28use crate::canonical::ToCanonical;
29use crate::scalar::ListScalar;
30use crate::scalar::Scalar;
31
32/// The builder for building a [`ListArray`], parametrized by the [`IntegerPType`] of the `offsets`
33/// builder.
34pub struct ListBuilder<O: IntegerPType> {
35    /// The [`DType`] of the [`ListArray`]. This **must** be a [`DType::List`].
36    dtype: DType,
37
38    /// The builder for the underlying elements of the [`ListArray`].
39    elements_builder: Box<dyn ArrayBuilder>,
40
41    /// The builder for the `offsets` into the `elements` array.
42    offsets_builder: PrimitiveBuilder<O>,
43
44    /// The null map builder of the [`ListArray`].
45    nulls: LazyBitBufferBuilder,
46}
47
48impl<O: IntegerPType> ListBuilder<O> {
49    /// Creates a new `ListBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
50    pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
51        Self::with_capacity(
52            value_dtype,
53            nullability,
54            // We arbitrarily choose 2 times the number of list scalars for the capacity of the
55            // elements builder since we cannot know this ahead of time.
56            DEFAULT_BUILDER_CAPACITY * 2,
57            DEFAULT_BUILDER_CAPACITY,
58        )
59    }
60
61    /// Create a new [`ListArray`] builder with a with the given `capacity`, as well as an initial
62    /// capacity for the `elements` builder (since we cannot know that ahead of time solely based on
63    /// the outer array `capacity`).
64    ///
65    /// # Notes
66    ///
67    /// The number of offsets is one more than the length (# of list scalars) in the array.
68    pub fn with_capacity(
69        value_dtype: Arc<DType>,
70        nullability: Nullability,
71        elements_capacity: usize,
72        capacity: usize,
73    ) -> Self {
74        let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
75        let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
76
77        // The first offset is always 0 and represents an empty list.
78        offsets_builder.append_zero();
79
80        Self {
81            elements_builder,
82            offsets_builder,
83            nulls: LazyBitBufferBuilder::new(capacity),
84            dtype: DType::List(value_dtype, nullability),
85        }
86    }
87
88    /// Appends an array as a single non-null list entry to the builder.
89    ///
90    /// The input `array` must have the same dtype as the element dtype of this list builder.
91    ///
92    /// Note that the list entry will be non-null but the elements themselves are allowed to be null
93    /// (only if the elements [`DType`] in nullable, of course).
94    pub fn append_array_as_list(&mut self, array: &dyn Array) -> VortexResult<()> {
95        vortex_ensure!(
96            array.dtype() == self.element_dtype(),
97            "Array dtype {:?} does not match list element dtype {:?}",
98            array.dtype(),
99            self.element_dtype()
100        );
101
102        self.elements_builder.extend_from_array(array);
103        self.nulls.append_non_null();
104        self.offsets_builder.append_value(
105            O::from_usize(self.elements_builder.len())
106                .vortex_expect("Failed to convert from usize to O"),
107        );
108
109        Ok(())
110    }
111
112    /// Appends a list `value` to the builder.
113    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
114        match value.elements() {
115            None => {
116                if self.dtype.nullability() == NonNullable {
117                    vortex_bail!("Cannot append null value to non-nullable list");
118                }
119                self.append_null();
120            }
121            Some(elements) => {
122                for scalar in elements {
123                    // TODO(connor): This is slow, we should be able to append multiple values at
124                    // once, or the list scalar should hold an Array
125                    self.elements_builder.append_scalar(&scalar)?;
126                }
127
128                self.nulls.append_non_null();
129                self.offsets_builder.append_value(
130                    O::from_usize(self.elements_builder.len())
131                        .vortex_expect("Failed to convert from usize to O"),
132                );
133            }
134        }
135
136        Ok(())
137    }
138
139    /// Finishes the builder directly into a [`ListArray`].
140    pub fn finish_into_list(&mut self) -> ListArray {
141        assert_eq!(
142            self.offsets_builder.len(),
143            self.nulls.len() + 1,
144            "offsets length must be one more than nulls length."
145        );
146
147        ListArray::try_new(
148            self.elements_builder.finish(),
149            self.offsets_builder.finish(),
150            self.nulls.finish_with_nullability(self.dtype.nullability()),
151        )
152        .vortex_expect("Buffer, offsets, and validity must have same length.")
153    }
154
155    /// The [`DType`] of the inner elements. Note that this is **not** the same as the [`DType`] of
156    /// the outer `List`.
157    pub fn element_dtype(&self) -> &DType {
158        let DType::List(element_dtype, _) = &self.dtype else {
159            vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
160        };
161
162        element_dtype
163    }
164}
165
166impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
167    fn as_any(&self) -> &dyn Any {
168        self
169    }
170
171    fn as_any_mut(&mut self) -> &mut dyn Any {
172        self
173    }
174
175    fn dtype(&self) -> &DType {
176        &self.dtype
177    }
178
179    fn len(&self) -> usize {
180        self.nulls.len()
181    }
182
183    fn append_zeros(&mut self, n: usize) {
184        let curr_len = self.elements_builder.len();
185        for _ in 0..n {
186            self.offsets_builder.append_value(
187                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
188            )
189        }
190        self.nulls.append_n_non_nulls(n);
191    }
192
193    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
194        let curr_len = self.elements_builder.len();
195        for _ in 0..n {
196            // A list with a null element is can be a list with a zero-span offset and a validity
197            // bit set
198            self.offsets_builder.append_value(
199                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
200            )
201        }
202        self.nulls.append_n_nulls(n);
203    }
204
205    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
206        vortex_ensure!(
207            scalar.dtype() == self.dtype(),
208            "ListBuilder expected scalar with dtype {}, got {}",
209            self.dtype(),
210            scalar.dtype()
211        );
212
213        self.append_value(scalar.as_list())
214    }
215
216    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
217        let list = array.to_listview();
218        if list.is_empty() {
219            return;
220        }
221
222        // Append validity information.
223        self.nulls.append_validity_mask(
224            array
225                .validity_mask()
226                .vortex_expect("validity_mask in extend_from_array_unchecked"),
227        );
228
229        // Note that `ListViewArray` has `n` offsets and sizes, not `n+1` offsets like `ListArray`.
230        let elements = list.elements();
231        let offsets = list.offsets().to_primitive();
232        let sizes = list.sizes().to_primitive();
233
234        fn extend_inner<O, OffsetType, SizeType>(
235            builder: &mut ListBuilder<O>,
236            new_elements: &ArrayRef,
237            new_offsets: &[OffsetType],
238            new_sizes: &[SizeType],
239        ) where
240            O: IntegerPType,
241            OffsetType: IntegerPType,
242            SizeType: IntegerPType,
243        {
244            let num_lists = new_offsets.len();
245            debug_assert_eq!(num_lists, new_sizes.len());
246
247            let mut curr_offset = builder.elements_builder.len();
248            let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
249
250            // We need to append each list individually, converting from `ListViewArray` format to
251            // the `ListArray` format that `ListBuilder` expects.
252            for i in 0..new_offsets.len() {
253                let offset: usize = new_offsets[i].as_();
254                let size: usize = new_sizes[i].as_();
255
256                if size > 0 {
257                    let list_elements = new_elements
258                        .slice(offset..offset + size)
259                        .vortex_expect("list builder slice");
260                    builder.elements_builder.extend_from_array(&list_elements);
261                    curr_offset += size;
262                }
263
264                let new_offset =
265                    O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
266
267                offsets_range.set_value(i, new_offset);
268            }
269
270            // SAFETY: We have initialized all `num_lists` values, and since the `offsets` array is
271            // non-nullable, we are done.
272            unsafe { offsets_range.finish() };
273        }
274
275        match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
276            match_each_integer_ptype!(sizes.ptype(), |SizeType| {
277                extend_inner(
278                    self,
279                    elements,
280                    offsets.as_slice::<OffsetType>(),
281                    sizes.as_slice::<SizeType>(),
282                )
283            })
284        })
285    }
286
287    fn reserve_exact(&mut self, additional: usize) {
288        self.elements_builder.reserve_exact(additional);
289        self.offsets_builder.reserve_exact(additional);
290        self.nulls.reserve_exact(additional);
291    }
292
293    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
294        self.nulls = LazyBitBufferBuilder::new(validity.len());
295        self.nulls.append_validity_mask(validity);
296    }
297
298    fn finish(&mut self) -> ArrayRef {
299        self.finish_into_list().into_array()
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use std::sync::Arc;
306
307    use Nullability::NonNullable;
308    use Nullability::Nullable;
309    use vortex_buffer::buffer;
310    use vortex_dtype::DType;
311    use vortex_dtype::IntegerPType;
312    use vortex_dtype::Nullability;
313    use vortex_dtype::PType::I32;
314
315    use crate::IntoArray;
316    use crate::ToCanonical;
317    use crate::array::Array;
318    use crate::arrays::ChunkedArray;
319    use crate::arrays::ListArray;
320    use crate::arrays::PrimitiveArray;
321    use crate::assert_arrays_eq;
322    use crate::builders::ArrayBuilder;
323    use crate::builders::list::ListBuilder;
324    use crate::scalar::Scalar;
325    use crate::validity::Validity;
326    use crate::vtable::ValidityHelper;
327
328    #[test]
329    fn test_empty() {
330        let mut builder =
331            ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
332
333        let list = builder.finish();
334        assert_eq!(list.len(), 0);
335    }
336
337    #[test]
338    fn test_values() {
339        let dtype: Arc<DType> = Arc::new(I32.into());
340        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
341
342        builder
343            .append_value(
344                Scalar::list(
345                    dtype.clone(),
346                    vec![1i32.into(), 2i32.into(), 3i32.into()],
347                    NonNullable,
348                )
349                .as_list(),
350            )
351            .unwrap();
352
353        builder
354            .append_value(
355                Scalar::list(
356                    dtype,
357                    vec![4i32.into(), 5i32.into(), 6i32.into()],
358                    NonNullable,
359                )
360                .as_list(),
361            )
362            .unwrap();
363
364        let list = builder.finish();
365        assert_eq!(list.len(), 2);
366
367        let list_array = list.to_listview();
368
369        assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
370        assert_eq!(list_array.list_elements_at(1).unwrap().len(), 3);
371    }
372
373    #[test]
374    fn test_append_empty_list() {
375        let dtype: Arc<DType> = Arc::new(I32.into());
376        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
377
378        assert!(
379            builder
380                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
381                .is_ok()
382        )
383    }
384
385    #[test]
386    fn test_nullable_values() {
387        let dtype: Arc<DType> = Arc::new(I32.into());
388        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
389
390        builder
391            .append_value(
392                Scalar::list(
393                    dtype.clone(),
394                    vec![1i32.into(), 2i32.into(), 3i32.into()],
395                    NonNullable,
396                )
397                .as_list(),
398            )
399            .unwrap();
400
401        builder
402            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
403            .unwrap();
404
405        builder
406            .append_value(
407                Scalar::list(
408                    dtype,
409                    vec![4i32.into(), 5i32.into(), 6i32.into()],
410                    NonNullable,
411                )
412                .as_list(),
413            )
414            .unwrap();
415
416        let list = builder.finish();
417        assert_eq!(list.len(), 3);
418
419        let list_array = list.to_listview();
420
421        assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
422        assert_eq!(list_array.list_elements_at(1).unwrap().len(), 0);
423        assert_eq!(list_array.list_elements_at(2).unwrap().len(), 3);
424    }
425
426    fn test_extend_builder_gen<O: IntegerPType>() {
427        let list = ListArray::from_iter_opt_slow::<O, _, _>(
428            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
429            Arc::new(I32.into()),
430        )
431        .unwrap();
432        assert_eq!(list.len(), 3);
433
434        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
435
436        builder.extend_from_array(&list);
437        builder.extend_from_array(&list);
438        builder.extend_from_array(&list.slice(0..0).unwrap());
439        builder.extend_from_array(&list.slice(1..3).unwrap());
440
441        let expected = ListArray::from_iter_opt_slow::<O, _, _>(
442            [
443                Some(vec![0, 1, 2]),
444                None,
445                Some(vec![4, 5]),
446                Some(vec![0, 1, 2]),
447                None,
448                Some(vec![4, 5]),
449                None,
450                Some(vec![4, 5]),
451            ],
452            Arc::new(DType::Primitive(I32, NonNullable)),
453        )
454        .unwrap()
455        .to_listview();
456
457        let actual = builder.finish_into_canonical().into_listview();
458
459        assert_arrays_eq!(actual.elements(), expected.elements());
460
461        assert_arrays_eq!(actual.offsets(), expected.offsets());
462
463        assert_eq!(actual.validity(), expected.validity())
464    }
465
466    #[test]
467    fn test_extend_builder() {
468        test_extend_builder_gen::<i8>();
469        test_extend_builder_gen::<i16>();
470        test_extend_builder_gen::<i32>();
471        test_extend_builder_gen::<i64>();
472
473        test_extend_builder_gen::<u8>();
474        test_extend_builder_gen::<u16>();
475        test_extend_builder_gen::<u32>();
476        test_extend_builder_gen::<u64>();
477    }
478
479    #[test]
480    pub fn test_array_with_gap() {
481        let one_trailing_unused_element = ListArray::try_new(
482            buffer![1, 2, 3, 4].into_array(),
483            buffer![0, 3].into_array(),
484            Validity::NonNullable,
485        )
486        .unwrap();
487
488        let second_array = ListArray::try_new(
489            buffer![5, 6].into_array(),
490            buffer![0, 2].into_array(),
491            Validity::NonNullable,
492        )
493        .unwrap();
494
495        let chunked_list = ChunkedArray::try_new(
496            vec![
497                one_trailing_unused_element.clone().into_array(),
498                second_array.clone().into_array(),
499            ],
500            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
501        );
502
503        let canon_values = chunked_list.unwrap().to_listview();
504
505        assert_eq!(
506            one_trailing_unused_element.scalar_at(0).unwrap(),
507            canon_values.scalar_at(0).unwrap()
508        );
509        assert_eq!(
510            second_array.scalar_at(0).unwrap(),
511            canon_values.scalar_at(1).unwrap()
512        );
513    }
514
515    #[test]
516    fn test_append_scalar() {
517        let dtype: Arc<DType> = Arc::new(I32.into());
518        let mut builder = ListBuilder::<u64>::with_capacity(dtype.clone(), Nullable, 20, 10);
519
520        // Test appending a valid list.
521        let list_scalar1 = Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable);
522        builder.append_scalar(&list_scalar1).unwrap();
523
524        // Test appending another list.
525        let list_scalar2 = Scalar::list(
526            dtype.clone(),
527            vec![3i32.into(), 4i32.into(), 5i32.into()],
528            Nullable,
529        );
530        builder.append_scalar(&list_scalar2).unwrap();
531
532        // Test appending null value.
533        let null_scalar = Scalar::null(DType::List(dtype.clone(), Nullable));
534        builder.append_scalar(&null_scalar).unwrap();
535
536        let array = builder.finish_into_list();
537        assert_eq!(array.len(), 3);
538
539        // Check actual values using scalar_at.
540
541        let scalar0 = array.scalar_at(0).unwrap();
542        let list0 = scalar0.as_list();
543        assert_eq!(list0.len(), 2);
544        if let Some(list0_items) = list0.elements() {
545            assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
546            assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
547        }
548
549        let scalar1 = array.scalar_at(1).unwrap();
550        let list1 = scalar1.as_list();
551        assert_eq!(list1.len(), 3);
552        if let Some(list1_items) = list1.elements() {
553            assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
554            assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
555            assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
556        }
557
558        let scalar2 = array.scalar_at(2).unwrap();
559        let list2 = scalar2.as_list();
560        assert!(list2.is_null()); // This should be null.
561
562        // Check validity.
563        assert!(array.validity().is_valid(0).unwrap());
564        assert!(array.validity().is_valid(1).unwrap());
565        assert!(!array.validity().is_valid(2).unwrap());
566
567        // Test wrong dtype error.
568        let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
569        let wrong_scalar = Scalar::from(42i32);
570        assert!(builder.append_scalar(&wrong_scalar).is_err());
571    }
572
573    #[test]
574    fn test_append_array_as_list() {
575        let dtype: Arc<DType> = Arc::new(I32.into());
576        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 20, 10);
577
578        // Append a primitive array as a single list entry.
579        let arr1 = buffer![1i32, 2, 3].into_array();
580        builder.append_array_as_list(&arr1).unwrap();
581
582        // Interleave with a list scalar.
583        builder
584            .append_value(
585                Scalar::list(dtype.clone(), vec![10i32.into(), 11i32.into()], NonNullable)
586                    .as_list(),
587            )
588            .unwrap();
589
590        // Append another primitive array as a single list entry.
591        let arr2 = buffer![4i32, 5].into_array();
592        builder.append_array_as_list(&arr2).unwrap();
593
594        // Append an empty array as a single list entry (empty list).
595        let arr3 = buffer![0i32; 0].into_array();
596        builder.append_array_as_list(&arr3).unwrap();
597
598        // Interleave with another list scalar (empty list).
599        builder
600            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
601            .unwrap();
602
603        let list = builder.finish_into_list();
604        assert_eq!(list.len(), 5);
605
606        // Verify elements array: [1, 2, 3, 10, 11, 4, 5].
607        assert_arrays_eq!(
608            list.elements(),
609            PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
610        );
611
612        // Verify offsets array.
613        assert_arrays_eq!(
614            list.offsets(),
615            PrimitiveArray::from_iter([0u32, 3, 5, 7, 7, 7])
616        );
617
618        // Test dtype mismatch error.
619        let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
620        let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
621        assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
622    }
623}