vortex_array/builders/
listview.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! ListView Builder Implementation.
5//!
6//! A builder for [`ListViewArray`] that tracks both offsets and sizes.
7//!
8//! Unlike [`ListArray`] which only tracks offsets, [`ListViewArray`] stores both offsets and sizes
9//! in separate arrays for better compression.
10//!
11//! [`ListArray`]: crate::arrays::ListArray
12
13use std::sync::Arc;
14
15use vortex_dtype::DType;
16use vortex_dtype::IntegerPType;
17use vortex_dtype::Nullability;
18use vortex_dtype::match_each_integer_ptype;
19use vortex_error::VortexExpect;
20use vortex_error::VortexResult;
21use vortex_error::vortex_ensure;
22use vortex_error::vortex_panic;
23use vortex_mask::Mask;
24use vortex_scalar::ListScalar;
25use vortex_scalar::Scalar;
26
27use crate::Canonical;
28use crate::ToCanonical;
29use crate::array::Array;
30use crate::array::ArrayRef;
31use crate::array::IntoArray;
32use crate::arrays::ListViewArray;
33use crate::arrays::ListViewRebuildMode;
34use crate::arrays::PrimitiveArray;
35use crate::builders::ArrayBuilder;
36use crate::builders::DEFAULT_BUILDER_CAPACITY;
37use crate::builders::PrimitiveBuilder;
38use crate::builders::UninitRange;
39use crate::builders::builder_with_capacity;
40use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
41use crate::compute;
42
43/// A builder for creating [`ListViewArray`] instances, parameterized by the [`IntegerPType`] of
44/// the `offsets` and the `sizes` builders.
45///
46/// This builder tracks both offsets and sizes using potentially different integer types for memory
47/// efficiency. For example, you might use `u64` for offsets but only `u8` for sizes if your lists
48/// are small.
49///
50/// Any combination of [`IntegerPType`] types are valid, as long as the type of `sizes` can fit into
51/// the type of `offsets`.
52pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
53    /// The [`DType`] of the [`ListViewArray`]. This **must** be a [`DType::List`].
54    dtype: DType,
55
56    /// The builder for the underlying elements of the [`ListArray`](crate::arrays::ListArray).
57    elements_builder: Box<dyn ArrayBuilder>,
58
59    /// The builder for the `offsets` into the `elements` array.
60    offsets_builder: PrimitiveBuilder<O>,
61
62    /// The builder for the `sizes` of each list view.
63    sizes_builder: PrimitiveBuilder<S>,
64
65    /// The null map builder of the [`ListViewArray`].
66    nulls: LazyBitBufferBuilder,
67}
68
69impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
70    /// Creates a new `ListViewBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
71    pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
72        Self::with_capacity(
73            element_dtype,
74            nullability,
75            // We arbitrarily choose 2 times the number of list scalars for the capacity of the
76            // elements builder since we cannot know this ahead of time.
77            DEFAULT_BUILDER_CAPACITY * 2,
78            DEFAULT_BUILDER_CAPACITY,
79        )
80    }
81
82    /// Create a new [`ListViewArray`] builder with a with the given `capacity`, as well as an
83    /// initial capacity for the `elements` builder (since we cannot know that ahead of time solely
84    /// based on the outer array `capacity`).
85    ///
86    /// # Panics
87    ///
88    /// Panics if the size type `S` cannot fit within the offset type `O`.
89    pub fn with_capacity(
90        element_dtype: Arc<DType>,
91        nullability: Nullability,
92        elements_capacity: usize,
93        capacity: usize,
94    ) -> Self {
95        let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
96
97        let offsets_builder =
98            PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
99        let sizes_builder =
100            PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
101
102        let nulls = LazyBitBufferBuilder::new(capacity);
103
104        Self {
105            dtype: DType::List(element_dtype, nullability),
106            elements_builder,
107            offsets_builder,
108            sizes_builder,
109            nulls,
110        }
111    }
112
113    /// Append a list of values to the builder.
114    ///
115    /// This method extends the value builder with the provided values and records
116    /// the offset and size of the new list.
117    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
118        let Some(elements) = value.elements() else {
119            // If `elements` is `None`, then the `value` is a null value.
120            vortex_ensure!(
121                self.dtype.is_nullable(),
122                "Cannot append null value to non-nullable list builder"
123            );
124            self.append_null();
125            return Ok(());
126        };
127
128        let curr_offset = self.elements_builder.len();
129        let num_elements = elements.len();
130
131        // We must assert this even in release mode to ensure that the safety comment in
132        // `finish_into_listview` is correct.
133        assert!(
134            ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
135            "appending this list would cause an offset overflow"
136        );
137
138        for scalar in elements {
139            self.elements_builder.append_scalar(&scalar)?;
140        }
141        self.nulls.append_non_null();
142
143        self.offsets_builder.append_value(
144            O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
145        );
146        self.sizes_builder.append_value(
147            S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
148        );
149
150        Ok(())
151    }
152
153    /// Finishes the builder directly into a [`ListViewArray`].
154    pub fn finish_into_listview(&mut self) -> ListViewArray {
155        debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
156        debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
157
158        let elements = self.elements_builder.finish();
159        let offsets = self.offsets_builder.finish();
160        let sizes = self.sizes_builder.finish();
161        let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
162
163        // SAFETY:
164        // - Both the offsets and the sizes are non-nullable.
165        // - The offsets, sizes, and validity have the same length since we always appended the same
166        //   amount.
167        // - We checked on construction that the sizes type fits into the offsets.
168        // - In every method that adds values to this builder (`append_value`, `append_scalar`, and
169        //   `extend_from_array_unchecked`), we checked that `offset + size` does not overflow.
170        // - We constructed everything in a way that builds the `ListViewArray` similar to the shape
171        //   of a `ListArray`, so we know the resulting array is zero-copyable to a `ListArray`.
172        unsafe {
173            ListViewArray::new_unchecked(elements, offsets, sizes, validity)
174                .with_zero_copy_to_list(true)
175        }
176    }
177
178    /// The [`DType`] of the inner elements. Note that this is **not** the same as the [`DType`] of
179    /// the outer `FixedSizeList`.
180    pub fn element_dtype(&self) -> &DType {
181        let DType::List(element_dtype, ..) = &self.dtype else {
182            vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
183        };
184
185        element_dtype
186    }
187}
188
189impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
190    fn as_any(&self) -> &dyn std::any::Any {
191        self
192    }
193
194    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
195        self
196    }
197
198    fn dtype(&self) -> &DType {
199        &self.dtype
200    }
201
202    fn len(&self) -> usize {
203        self.offsets_builder.len()
204    }
205
206    fn append_zeros(&mut self, n: usize) {
207        debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
208        debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
209
210        // Get the current position in the elements array.
211        let curr_offset = self.elements_builder.len();
212
213        // Since we consider the "zero" element of a list an empty list, we simply update the
214        // `offsets` and `sizes` metadata to add an empty list.
215        for _ in 0..n {
216            self.offsets_builder.append_value(
217                O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
218            );
219            self.sizes_builder.append_value(S::zero());
220        }
221
222        self.nulls.append_n_non_nulls(n);
223    }
224
225    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
226        debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
227        debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
228
229        // Get the current position in the elements array.
230        let curr_offset = self.elements_builder.len();
231
232        // A null list can have any representation, but we choose to use the zero representation.
233        for _ in 0..n {
234            self.offsets_builder.append_value(
235                O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
236            );
237            self.sizes_builder.append_value(S::zero());
238        }
239
240        // This is the only difference from `append_zeros`.
241        self.nulls.append_n_nulls(n);
242    }
243
244    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
245        vortex_ensure!(
246            scalar.dtype() == self.dtype(),
247            "ListViewBuilder expected scalar with dtype {:?}, got {:?}",
248            self.dtype(),
249            scalar.dtype()
250        );
251
252        let list_scalar = scalar.as_list();
253        self.append_value(list_scalar)
254    }
255
256    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
257        let listview = array.to_listview();
258        if listview.is_empty() {
259            return;
260        }
261
262        // If we do not have the guarantee that the array is zero-copyable to a list, then we have
263        // to manually append each scalar.
264        if !listview.is_zero_copy_to_list() {
265            for i in 0..listview.len() {
266                let list = listview.scalar_at(i);
267
268                self.append_scalar(&list)
269                    .vortex_expect("was unable to extend the `ListViewBuilder`")
270            }
271
272            return;
273        }
274
275        // Otherwise, after removing any leading and trailing elements, we can simply bulk append
276        // the entire array.
277        let listview = listview.rebuild(ListViewRebuildMode::MakeExact);
278        debug_assert!(listview.is_zero_copy_to_list());
279
280        self.nulls.append_validity_mask(array.validity_mask());
281
282        // Bulk append the new elements (which should have no gaps or overlaps).
283        let old_elements_len = self.elements_builder.len();
284        self.elements_builder
285            .reserve_exact(listview.elements().len());
286        self.elements_builder.extend_from_array(listview.elements());
287        let new_elements_len = self.elements_builder.len();
288
289        // Reserve enough space for the new views.
290        let extend_length = listview.len();
291        self.sizes_builder.reserve_exact(extend_length);
292        self.offsets_builder.reserve_exact(extend_length);
293
294        // The incoming sizes might have a different type than the builder, so we need to cast.
295        let cast_sizes = compute::cast(listview.sizes(), self.sizes_builder.dtype()).vortex_expect(
296            "was somehow unable to cast the new sizes to the type of the builder sizes",
297        );
298        self.sizes_builder.extend_from_array(cast_sizes.as_ref());
299
300        // Now we need to adjust all of the offsets by adding the current number of elements in the
301        // builder.
302
303        let uninit_range = self.offsets_builder.uninit_range(extend_length);
304
305        // This should be cheap because we didn't compress after rebuilding.
306        let new_offsets = listview.offsets().to_primitive();
307
308        match_each_integer_ptype!(new_offsets.ptype(), |A| {
309            adjust_and_extend_offsets::<O, A>(
310                uninit_range,
311                new_offsets,
312                old_elements_len,
313                new_elements_len,
314            );
315        })
316    }
317
318    fn reserve_exact(&mut self, capacity: usize) {
319        self.elements_builder.reserve_exact(capacity * 2);
320        self.offsets_builder.reserve_exact(capacity);
321        self.sizes_builder.reserve_exact(capacity);
322        self.nulls.reserve_exact(capacity);
323    }
324
325    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
326        self.nulls = LazyBitBufferBuilder::new(validity.len());
327        self.nulls.append_validity_mask(validity);
328    }
329
330    fn finish(&mut self) -> ArrayRef {
331        self.finish_into_listview().into_array()
332    }
333
334    fn finish_into_canonical(&mut self) -> Canonical {
335        Canonical::List(self.finish_into_listview())
336    }
337}
338
339/// Given new offsets, adds them to the `UninitRange` after adding the `old_elements_len` to each
340/// offset.
341fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
342    mut uninit_range: UninitRange<'a, O>,
343    new_offsets: PrimitiveArray,
344    old_elements_len: usize,
345    new_elements_len: usize,
346) {
347    let new_offsets_slice = new_offsets.as_slice::<A>();
348    let old_elements_len = O::from_usize(old_elements_len)
349        .vortex_expect("the old elements length did not fit into the offset type (impossible)");
350    let new_elements_len = O::from_usize(new_elements_len)
351        .vortex_expect("the current elements length did not fit into the offset type (impossible)");
352
353    for i in 0..uninit_range.len() {
354        let new_offset = O::from_usize(
355            new_offsets_slice[i]
356                .to_usize()
357                .vortex_expect("Offsets must always fit in usize"),
358        )
359        .vortex_expect("New offset somehow did not fit into the builder's offset type");
360
361        // We have to check this even in release mode to ensure the final `new_unchecked`
362        // construction in `finish_into_listview` is valid.
363        let adjusted_new_offset = new_offset + old_elements_len;
364        assert!(
365            adjusted_new_offset <= new_elements_len,
366            "[{i}/{}]: {new_offset} + {old_elements_len} \
367                = {adjusted_new_offset} <= {new_elements_len} failed",
368            uninit_range.len()
369        );
370
371        uninit_range.set_value(i, adjusted_new_offset);
372    }
373
374    // SAFETY: We have set all the values in the range, and since `offsets` are non-nullable, we are
375    // done.
376    unsafe { uninit_range.finish() };
377}
378
379#[cfg(test)]
380mod tests {
381    use std::sync::Arc;
382
383    use vortex_dtype::DType;
384    use vortex_dtype::Nullability::NonNullable;
385    use vortex_dtype::Nullability::Nullable;
386    use vortex_dtype::PType::I32;
387    use vortex_scalar::Scalar;
388
389    use super::ListViewBuilder;
390    use crate::IntoArray;
391    use crate::array::Array;
392    use crate::arrays::ListArray;
393    use crate::builders::ArrayBuilder;
394    use crate::vtable::ValidityHelper;
395
396    #[test]
397    fn test_empty() {
398        let mut builder =
399            ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
400
401        let listview = builder.finish();
402        assert_eq!(listview.len(), 0);
403    }
404
405    #[test]
406    fn test_basic_append_and_nulls() {
407        let dtype: Arc<DType> = Arc::new(I32.into());
408        let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
409
410        // Append a regular list.
411        builder
412            .append_value(
413                Scalar::list(
414                    dtype.clone(),
415                    vec![1i32.into(), 2i32.into(), 3i32.into()],
416                    NonNullable,
417                )
418                .as_list(),
419            )
420            .unwrap();
421
422        // Append an empty list.
423        builder
424            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
425            .unwrap();
426
427        // Append a null list.
428        builder.append_null();
429
430        // Append another regular list.
431        builder
432            .append_value(
433                Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
434            )
435            .unwrap();
436
437        let listview = builder.finish_into_listview();
438        assert_eq!(listview.len(), 4);
439
440        // Check first list: [1, 2, 3].
441        let first_list = listview.list_elements_at(0);
442        assert_eq!(first_list.len(), 3);
443        assert_eq!(first_list.scalar_at(0), 1i32.into());
444        assert_eq!(first_list.scalar_at(1), 2i32.into());
445        assert_eq!(first_list.scalar_at(2), 3i32.into());
446
447        // Check empty list.
448        let empty_list = listview.list_elements_at(1);
449        assert_eq!(empty_list.len(), 0);
450
451        // Check null list.
452        assert!(!listview.validity().is_valid(2));
453
454        // Check last list: [4, 5].
455        let last_list = listview.list_elements_at(3);
456        assert_eq!(last_list.len(), 2);
457        assert_eq!(last_list.scalar_at(0), 4i32.into());
458        assert_eq!(last_list.scalar_at(1), 5i32.into());
459    }
460
461    #[test]
462    fn test_different_offset_size_types() {
463        // Test u32 offsets with u8 sizes.
464        let dtype: Arc<DType> = Arc::new(I32.into());
465        let mut builder =
466            ListViewBuilder::<u32, u8>::with_capacity(dtype.clone(), NonNullable, 0, 0);
467
468        builder
469            .append_value(
470                Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], NonNullable).as_list(),
471            )
472            .unwrap();
473
474        builder
475            .append_value(
476                Scalar::list(
477                    dtype,
478                    vec![3i32.into(), 4i32.into(), 5i32.into()],
479                    NonNullable,
480                )
481                .as_list(),
482            )
483            .unwrap();
484
485        let listview = builder.finish_into_listview();
486        assert_eq!(listview.len(), 2);
487
488        // Verify first list: [1, 2].
489        let first = listview.list_elements_at(0);
490        assert_eq!(first.scalar_at(0), 1i32.into());
491        assert_eq!(first.scalar_at(1), 2i32.into());
492
493        // Verify second list: [3, 4, 5].
494        let second = listview.list_elements_at(1);
495        assert_eq!(second.scalar_at(0), 3i32.into());
496        assert_eq!(second.scalar_at(1), 4i32.into());
497        assert_eq!(second.scalar_at(2), 5i32.into());
498
499        // Test u64 offsets with u16 sizes.
500        let dtype2: Arc<DType> = Arc::new(I32.into());
501        let mut builder2 =
502            ListViewBuilder::<u64, u16>::with_capacity(dtype2.clone(), NonNullable, 0, 0);
503
504        for i in 0..5 {
505            builder2
506                .append_value(
507                    Scalar::list(dtype2.clone(), vec![(i * 10).into()], NonNullable).as_list(),
508                )
509                .unwrap();
510        }
511
512        let listview2 = builder2.finish_into_listview();
513        assert_eq!(listview2.len(), 5);
514
515        // Verify the values: [0], [10], [20], [30], [40].
516        for i in 0..5i32 {
517            let list = listview2.list_elements_at(i as usize);
518            assert_eq!(list.len(), 1);
519            assert_eq!(list.scalar_at(0), (i * 10).into());
520        }
521    }
522
523    #[test]
524    fn test_builder_trait_methods() {
525        let dtype: Arc<DType> = Arc::new(I32.into());
526        let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
527
528        // Test append_zeros (creates empty lists).
529        builder.append_zeros(2);
530        assert_eq!(builder.len(), 2);
531
532        // Test append_nulls.
533        unsafe {
534            builder.append_nulls_unchecked(2);
535        }
536        assert_eq!(builder.len(), 4);
537
538        // Test append_scalar.
539        let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
540        builder.append_scalar(&list_scalar).unwrap();
541        assert_eq!(builder.len(), 5);
542
543        let listview = builder.finish_into_listview();
544        assert_eq!(listview.len(), 5);
545
546        // First two are empty lists (from append_zeros).
547        assert_eq!(listview.list_elements_at(0).len(), 0);
548        assert_eq!(listview.list_elements_at(1).len(), 0);
549
550        // Next two are nulls.
551        assert!(!listview.validity().is_valid(2));
552        assert!(!listview.validity().is_valid(3));
553
554        // Last is the regular list: [10, 20].
555        let last_list = listview.list_elements_at(4);
556        assert_eq!(last_list.len(), 2);
557        assert_eq!(last_list.scalar_at(0), 10i32.into());
558        assert_eq!(last_list.scalar_at(1), 20i32.into());
559    }
560
561    #[test]
562    fn test_extend_from_array() {
563        let dtype: Arc<DType> = Arc::new(I32.into());
564
565        // Create a source ListArray.
566        let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
567            [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
568            Arc::new(I32.into()),
569        )
570        .unwrap();
571
572        let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
573
574        // Add initial data.
575        builder
576            .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
577            .unwrap();
578
579        // Extend from the ListArray.
580        builder.extend_from_array(&source.into_array());
581
582        // Extend from empty array (should be no-op).
583        let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
584            std::iter::empty::<Option<Vec<i32>>>(),
585            Arc::new(I32.into()),
586        )
587        .unwrap();
588        builder.extend_from_array(&empty_source.into_array());
589
590        let listview = builder.finish_into_listview();
591        assert_eq!(listview.len(), 4);
592
593        // Check the extended data.
594        // First list: [0] (initial data).
595        let first = listview.list_elements_at(0);
596        assert_eq!(first.len(), 1);
597        assert_eq!(first.scalar_at(0), 0i32.into());
598
599        // Second list: [1, 2, 3] (from source).
600        let second = listview.list_elements_at(1);
601        assert_eq!(second.len(), 3);
602        assert_eq!(second.scalar_at(0), 1i32.into());
603        assert_eq!(second.scalar_at(1), 2i32.into());
604        assert_eq!(second.scalar_at(2), 3i32.into());
605
606        // Third list: null (from source).
607        assert!(!listview.validity().is_valid(2));
608
609        // Fourth list: [4, 5] (from source).
610        let fourth = listview.list_elements_at(3);
611        assert_eq!(fourth.len(), 2);
612        assert_eq!(fourth.scalar_at(0), 4i32.into());
613        assert_eq!(fourth.scalar_at(1), 5i32.into());
614    }
615
616    #[test]
617    fn test_error_append_null_to_non_nullable() {
618        let dtype: Arc<DType> = Arc::new(I32.into());
619        let mut builder =
620            ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
621
622        // Create a null list with nullable type (since Scalar::null requires nullable type).
623        let null_scalar = Scalar::null(DType::List(dtype, Nullable));
624        let null_list = null_scalar.as_list();
625
626        // This should fail because we're trying to append a null to a non-nullable builder.
627        let result = builder.append_value(null_list);
628        assert!(result.is_err());
629        assert!(
630            result
631                .unwrap_err()
632                .to_string()
633                .contains("null value to non-nullable")
634        );
635    }
636}