vortex_array/builders/
list.rs

1use std::any::Any;
2use std::sync::Arc;
3
4use vortex_dtype::Nullability::NonNullable;
5use vortex_dtype::{DType, NativePType, Nullability};
6use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
7use vortex_mask::Mask;
8use vortex_scalar::{ListScalar, NumericOperator};
9
10use crate::arrays::{ConstantArray, ListArray, OffsetPType};
11use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
12use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
13use crate::compute::{cast, numeric};
14use crate::{Array, ArrayRef, IntoArray, ToCanonical};
15
16pub struct ListBuilder<O: NativePType> {
17    value_builder: Box<dyn ArrayBuilder>,
18    index_builder: PrimitiveBuilder<O>,
19    nulls: LazyNullBufferBuilder,
20    nullability: Nullability,
21    dtype: DType,
22}
23
24impl<O: OffsetPType> ListBuilder<O> {
25    // TODO(joe): add value + index capacity ctor.
26    pub fn with_capacity(
27        value_dtype: Arc<DType>,
28        nullability: Nullability,
29        capacity: usize,
30    ) -> Self {
31        // I would expect the list to have more than one value per index
32        let value_builder = builder_with_capacity(value_dtype.as_ref(), 2 * capacity);
33        let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, capacity);
34
35        // The first index of the list, which is always 0 and represents an empty list.
36        index_builder.append_zero();
37
38        Self {
39            value_builder,
40            index_builder,
41            nulls: LazyNullBufferBuilder::new(capacity),
42            nullability,
43            dtype: DType::List(value_dtype, nullability),
44        }
45    }
46
47    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
48        match value.elements() {
49            None => {
50                if self.nullability == NonNullable {
51                    vortex_bail!("Cannot append null value to non-nullable list");
52                }
53                self.append_null();
54                Ok(())
55            }
56            Some(elements) => {
57                for scalar in elements {
58                    // TODO(joe): This is slow, we should be able to append multiple values at once,
59                    // or the list scalar should hold an Array
60                    self.value_builder.append_scalar(&scalar)?;
61                }
62                self.nulls.append_non_null();
63                self.append_index(
64                    O::from_usize(self.value_builder.len())
65                        .vortex_expect("Failed to convert from usize to O"),
66                )
67            }
68        }
69    }
70
71    fn append_index(&mut self, index: O) -> VortexResult<()> {
72        self.index_builder.append_scalar(&index.into())
73    }
74}
75
76impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
77    fn as_any(&self) -> &dyn Any {
78        self
79    }
80
81    fn as_any_mut(&mut self) -> &mut dyn Any {
82        self
83    }
84
85    fn dtype(&self) -> &DType {
86        &self.dtype
87    }
88
89    fn len(&self) -> usize {
90        self.nulls.len()
91    }
92
93    fn append_zeros(&mut self, n: usize) {
94        let count = self.value_builder.len();
95        self.value_builder.append_zeros(n);
96        for i in 0..n {
97            self.append_index(
98                O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
99            )
100            .vortex_expect("Failed to append index");
101        }
102        self.nulls.append_n_non_nulls(n);
103    }
104
105    fn append_nulls(&mut self, n: usize) {
106        let count = self.value_builder.len();
107        for _ in 0..n {
108            // A list with a null element is can be a list with a zero-span offset and a validity
109            // bit set
110            self.append_index(
111                O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
112            )
113            .vortex_expect("Failed to append index");
114        }
115        self.nulls.append_n_nulls(n);
116    }
117
118    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
119        self.nulls.append_validity_mask(array.validity_mask()?);
120
121        let list = array.to_list()?;
122
123        let cursor_usize = self.value_builder.len();
124        let cursor = O::from_usize(cursor_usize).ok_or_else(|| {
125            vortex_err!(
126                "cannot convert length {} to type {:?}",
127                cursor_usize,
128                O::PTYPE
129            )
130        })?;
131
132        let offsets = numeric(
133            &cast(
134                &list.offsets().slice(1, list.offsets().len())?,
135                &DType::Primitive(O::PTYPE, NonNullable),
136            )?,
137            ConstantArray::new(cursor, list.len()).as_ref(),
138            NumericOperator::Add,
139        )?;
140        self.index_builder.extend_from_array(&offsets)?;
141
142        if !list.is_empty() {
143            let last_used_index = self.index_builder.values().last().vortex_expect("there must be at least one index because we just extended a non-zero list of offsets");
144            let sliced_values = list
145                .elements()
146                .slice(0, last_used_index.as_() - cursor_usize)?;
147            self.value_builder.ensure_capacity(sliced_values.len());
148            self.value_builder.extend_from_array(&sliced_values)?;
149        }
150
151        Ok(())
152    }
153
154    fn ensure_capacity(&mut self, capacity: usize) {
155        self.index_builder.ensure_capacity(capacity);
156        self.value_builder.ensure_capacity(capacity);
157        self.nulls.ensure_capacity(capacity);
158    }
159
160    fn set_validity(&mut self, validity: Mask) {
161        self.nulls = LazyNullBufferBuilder::new(validity.len());
162        self.nulls.append_validity_mask(validity);
163    }
164
165    fn finish(&mut self) -> ArrayRef {
166        assert_eq!(
167            self.index_builder.len(),
168            self.nulls.len() + 1,
169            "Indices length must be one more than nulls length."
170        );
171
172        ListArray::try_new(
173            self.value_builder.finish(),
174            self.index_builder.finish(),
175            self.nulls.finish_with_nullability(self.nullability),
176        )
177        .vortex_expect("Buffer, offsets, and validity must have same length.")
178        .into_array()
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use std::sync::Arc;
185
186    use Nullability::{NonNullable, Nullable};
187    use vortex_buffer::buffer;
188    use vortex_dtype::PType::I32;
189    use vortex_dtype::{DType, Nullability};
190    use vortex_scalar::Scalar;
191
192    use crate::array::Array;
193    use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
194    use crate::builders::ArrayBuilder;
195    use crate::builders::list::ListBuilder;
196    use crate::validity::Validity;
197    use crate::vtable::ValidityHelper;
198    use crate::{IntoArray as _, ToCanonical};
199
200    #[test]
201    fn test_empty() {
202        let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
203
204        let list = builder.finish();
205        assert_eq!(list.len(), 0);
206    }
207
208    #[test]
209    fn test_values() {
210        let dtype: Arc<DType> = Arc::new(I32.into());
211        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
212
213        builder
214            .append_value(
215                Scalar::list(
216                    dtype.clone(),
217                    vec![1i32.into(), 2i32.into(), 3i32.into()],
218                    NonNullable,
219                )
220                .as_list(),
221            )
222            .unwrap();
223
224        builder
225            .append_value(
226                Scalar::list(
227                    dtype,
228                    vec![4i32.into(), 5i32.into(), 6i32.into()],
229                    NonNullable,
230                )
231                .as_list(),
232            )
233            .unwrap();
234
235        let list = builder.finish();
236        assert_eq!(list.len(), 2);
237
238        let list_array = list.to_list().unwrap();
239
240        assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
241        assert_eq!(list_array.elements_at(1).unwrap().len(), 3);
242    }
243
244    #[test]
245    fn test_non_null_fails() {
246        let dtype: Arc<DType> = Arc::new(I32.into());
247        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
248
249        assert!(
250            builder
251                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
252                .is_err()
253        )
254    }
255
256    #[test]
257    fn test_nullable_values() {
258        let dtype: Arc<DType> = Arc::new(I32.into());
259        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
260
261        builder
262            .append_value(
263                Scalar::list(
264                    dtype.clone(),
265                    vec![1i32.into(), 2i32.into(), 3i32.into()],
266                    NonNullable,
267                )
268                .as_list(),
269            )
270            .unwrap();
271
272        builder
273            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
274            .unwrap();
275
276        builder
277            .append_value(
278                Scalar::list(
279                    dtype,
280                    vec![4i32.into(), 5i32.into(), 6i32.into()],
281                    NonNullable,
282                )
283                .as_list(),
284            )
285            .unwrap();
286
287        let list = builder.finish();
288        assert_eq!(list.len(), 3);
289
290        let list_array = list.to_list().unwrap();
291
292        assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
293        assert_eq!(list_array.elements_at(1).unwrap().len(), 0);
294        assert_eq!(list_array.elements_at(2).unwrap().len(), 3);
295    }
296
297    fn test_extend_builder_gen<O: OffsetPType>() {
298        let list = ListArray::from_iter_opt_slow::<O, _, _>(
299            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
300            Arc::new(I32.into()),
301        )
302        .unwrap();
303
304        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
305
306        builder.extend_from_array(&list).unwrap();
307        builder.extend_from_array(&list).unwrap();
308
309        let expect = ListArray::from_iter_opt_slow::<O, _, _>(
310            [
311                Some(vec![0, 1, 2]),
312                None,
313                Some(vec![4, 5]),
314                Some(vec![0, 1, 2]),
315                None,
316                Some(vec![4, 5]),
317            ],
318            Arc::new(DType::Primitive(I32, NonNullable)),
319        )
320        .unwrap()
321        .to_list()
322        .unwrap();
323
324        let res = builder
325            .finish()
326            .to_canonical()
327            .unwrap()
328            .into_list()
329            .unwrap();
330
331        assert_eq!(
332            res.elements().to_primitive().unwrap().as_slice::<i32>(),
333            expect.elements().to_primitive().unwrap().as_slice::<i32>()
334        );
335
336        assert_eq!(
337            res.offsets().to_primitive().unwrap().as_slice::<O>(),
338            expect.offsets().to_primitive().unwrap().as_slice::<O>()
339        );
340
341        assert_eq!(res.validity(), expect.validity())
342    }
343
344    #[test]
345    fn test_extend_builder() {
346        test_extend_builder_gen::<i8>();
347        test_extend_builder_gen::<i16>();
348        test_extend_builder_gen::<i32>();
349        test_extend_builder_gen::<i64>();
350
351        test_extend_builder_gen::<u8>();
352        test_extend_builder_gen::<u16>();
353        test_extend_builder_gen::<u32>();
354        test_extend_builder_gen::<u64>();
355    }
356
357    #[test]
358    pub fn test_array_with_gap() {
359        let one_trailing_unused_element = ListArray::try_new(
360            buffer![1, 2, 3, 4].into_array(),
361            buffer![0, 3].into_array(),
362            Validity::NonNullable,
363        )
364        .unwrap();
365
366        let second_array = ListArray::try_new(
367            buffer![5, 6].into_array(),
368            buffer![0, 2].into_array(),
369            Validity::NonNullable,
370        )
371        .unwrap();
372
373        let chunked_list = ChunkedArray::try_new(
374            vec![
375                one_trailing_unused_element.clone().into_array(),
376                second_array.clone().into_array(),
377            ],
378            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
379        );
380
381        let canon_values = chunked_list.unwrap().to_list().unwrap();
382
383        assert_eq!(
384            one_trailing_unused_element.scalar_at(0).unwrap(),
385            canon_values.scalar_at(0).unwrap()
386        );
387        assert_eq!(
388            second_array.scalar_at(0).unwrap(),
389            canon_values.scalar_at(1).unwrap()
390        );
391    }
392}