vortex_array/builders/
list.rs

1use std::any::Any;
2use std::sync::Arc;
3
4use vortex_dtype::Nullability::NonNullable;
5use vortex_dtype::{DType, NativePType, Nullability};
6use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
7use vortex_scalar::{BinaryNumericOperator, ListScalar};
8
9use crate::arrays::{ConstantArray, ListArray, OffsetPType};
10use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
11use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
12use crate::compute::{binary_numeric, slice, try_cast};
13use crate::{Array, ArrayRef};
14
15pub struct ListBuilder<O: NativePType> {
16    value_builder: Box<dyn ArrayBuilder>,
17    index_builder: PrimitiveBuilder<O>,
18    nulls: LazyNullBufferBuilder,
19    nullability: Nullability,
20    dtype: DType,
21}
22
23impl<O: OffsetPType> ListBuilder<O> {
24    // TODO(joe): add value + index capacity ctor.
25    pub fn with_capacity(
26        value_dtype: Arc<DType>,
27        nullability: Nullability,
28        capacity: usize,
29    ) -> Self {
30        // I would expect the list to have more than one value per index
31        let value_builder = builder_with_capacity(value_dtype.as_ref(), 2 * capacity);
32        let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, capacity);
33
34        // The first index of the list, which is always 0 and represents an empty list.
35        index_builder.append_zero();
36
37        Self {
38            value_builder,
39            index_builder,
40            nulls: LazyNullBufferBuilder::new(capacity),
41            nullability,
42            dtype: DType::List(value_dtype, nullability),
43        }
44    }
45
46    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
47        match value.elements() {
48            None => {
49                if self.nullability == NonNullable {
50                    vortex_bail!("Cannot append null value to non-nullable list");
51                }
52                self.append_null();
53                Ok(())
54            }
55            Some(elements) => {
56                for scalar in elements {
57                    // TODO(joe): This is slow, we should be able to append multiple values at once,
58                    // or the list scalar should hold an Array
59                    self.value_builder.append_scalar(&scalar)?;
60                }
61                self.nulls.append_non_null();
62                self.append_index(
63                    O::from_usize(self.value_builder.len())
64                        .vortex_expect("Failed to convert from usize to O"),
65                )
66            }
67        }
68    }
69
70    fn append_index(&mut self, index: O) -> VortexResult<()> {
71        self.index_builder.append_scalar(&index.into())
72    }
73}
74
75impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
76    fn as_any(&self) -> &dyn Any {
77        self
78    }
79
80    fn as_any_mut(&mut self) -> &mut dyn Any {
81        self
82    }
83
84    fn dtype(&self) -> &DType {
85        &self.dtype
86    }
87
88    fn len(&self) -> usize {
89        self.nulls.len()
90    }
91
92    fn append_zeros(&mut self, n: usize) {
93        let count = self.value_builder.len();
94        self.value_builder.append_zeros(n);
95        for i in 0..n {
96            self.append_index(
97                O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
98            )
99            .vortex_expect("Failed to append index");
100        }
101        self.nulls.append_n_non_nulls(n);
102    }
103
104    fn append_nulls(&mut self, n: usize) {
105        let count = self.value_builder.len();
106        for _ in 0..n {
107            // A list with a null element is can be a list with a zero-span offset and a validity
108            // bit set
109            self.append_index(
110                O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
111            )
112            .vortex_expect("Failed to append index");
113        }
114        self.nulls.append_n_nulls(n);
115    }
116
117    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
118        self.nulls.append_validity_mask(array.validity_mask()?);
119
120        let list = array.to_canonical()?.into_list()?;
121
122        let cursor_usize = self.value_builder.len();
123        let cursor = O::from_usize(cursor_usize).ok_or_else(|| {
124            vortex_err!(
125                "cannot convert length {} to type {:?}",
126                cursor_usize,
127                O::PTYPE
128            )
129        })?;
130
131        let offsets = binary_numeric(
132            &try_cast(
133                &slice(list.offsets(), 1, list.offsets().len())?,
134                &DType::Primitive(O::PTYPE, NonNullable),
135            )?,
136            &ConstantArray::new(cursor, list.len()),
137            BinaryNumericOperator::Add,
138        )?;
139        self.index_builder.extend_from_array(&offsets)?;
140
141        if !list.is_empty() {
142            let last_used_index = self.index_builder.values().last().vortex_expect("there must be at least one index because we just extended a non-zero list of offsets");
143            self.value_builder.extend_from_array(&slice(
144                list.elements(),
145                0,
146                last_used_index.as_() - cursor_usize,
147            )?)?;
148        }
149
150        Ok(())
151    }
152
153    fn finish(&mut self) -> ArrayRef {
154        assert_eq!(
155            self.index_builder.len(),
156            self.nulls.len() + 1,
157            "Indices length must be one more than nulls length."
158        );
159
160        ListArray::try_new(
161            self.value_builder.finish(),
162            self.index_builder.finish(),
163            self.nulls.finish_with_nullability(self.nullability),
164        )
165        .vortex_expect("Buffer, offsets, and validity must have same length.")
166        .into_array()
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use std::sync::Arc;
173
174    use Nullability::{NonNullable, Nullable};
175    use vortex_buffer::buffer;
176    use vortex_dtype::PType::I32;
177    use vortex_dtype::{DType, Nullability};
178    use vortex_scalar::Scalar;
179
180    use crate::array::Array;
181    use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
182    use crate::builders::ArrayBuilder;
183    use crate::builders::list::ListBuilder;
184    use crate::compute::scalar_at;
185    use crate::validity::Validity;
186    use crate::{IntoArray as _, ToCanonical};
187
188    #[test]
189    fn test_empty() {
190        let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
191
192        let list = builder.finish();
193        assert_eq!(list.len(), 0);
194    }
195
196    #[test]
197    fn test_values() {
198        let dtype: Arc<DType> = Arc::new(I32.into());
199        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
200
201        builder
202            .append_value(
203                Scalar::list(
204                    dtype.clone(),
205                    vec![1i32.into(), 2i32.into(), 3i32.into()],
206                    NonNullable,
207                )
208                .as_list(),
209            )
210            .unwrap();
211
212        builder
213            .append_value(
214                Scalar::list(
215                    dtype,
216                    vec![4i32.into(), 5i32.into(), 6i32.into()],
217                    NonNullable,
218                )
219                .as_list(),
220            )
221            .unwrap();
222
223        let list = builder.finish();
224        assert_eq!(list.len(), 2);
225
226        let list_array = list.to_list().unwrap();
227
228        assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
229        assert_eq!(list_array.elements_at(1).unwrap().len(), 3);
230    }
231
232    #[test]
233    fn test_non_null_fails() {
234        let dtype: Arc<DType> = Arc::new(I32.into());
235        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
236
237        assert!(
238            builder
239                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
240                .is_err()
241        )
242    }
243
244    #[test]
245    fn test_nullable_values() {
246        let dtype: Arc<DType> = Arc::new(I32.into());
247        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
248
249        builder
250            .append_value(
251                Scalar::list(
252                    dtype.clone(),
253                    vec![1i32.into(), 2i32.into(), 3i32.into()],
254                    NonNullable,
255                )
256                .as_list(),
257            )
258            .unwrap();
259
260        builder
261            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
262            .unwrap();
263
264        builder
265            .append_value(
266                Scalar::list(
267                    dtype,
268                    vec![4i32.into(), 5i32.into(), 6i32.into()],
269                    NonNullable,
270                )
271                .as_list(),
272            )
273            .unwrap();
274
275        let list = builder.finish();
276        assert_eq!(list.len(), 3);
277
278        let list_array = list.to_list().unwrap();
279
280        assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
281        assert_eq!(list_array.elements_at(1).unwrap().len(), 0);
282        assert_eq!(list_array.elements_at(2).unwrap().len(), 3);
283    }
284
285    fn test_extend_builder_gen<O: OffsetPType>() {
286        let list = ListArray::from_iter_opt_slow::<O, _, _>(
287            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
288            Arc::new(I32.into()),
289        )
290        .unwrap();
291
292        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
293
294        builder.extend_from_array(&list).unwrap();
295        builder.extend_from_array(&list).unwrap();
296
297        let expect = ListArray::from_iter_opt_slow::<O, _, _>(
298            [
299                Some(vec![0, 1, 2]),
300                None,
301                Some(vec![4, 5]),
302                Some(vec![0, 1, 2]),
303                None,
304                Some(vec![4, 5]),
305            ],
306            Arc::new(DType::Primitive(I32, NonNullable)),
307        )
308        .unwrap()
309        .to_list()
310        .unwrap();
311
312        let res = builder
313            .finish()
314            .to_canonical()
315            .unwrap()
316            .into_list()
317            .unwrap();
318
319        assert_eq!(
320            res.elements().to_primitive().unwrap().as_slice::<i32>(),
321            expect.elements().to_primitive().unwrap().as_slice::<i32>()
322        );
323
324        assert_eq!(
325            res.offsets().to_primitive().unwrap().as_slice::<O>(),
326            expect.offsets().to_primitive().unwrap().as_slice::<O>()
327        );
328
329        assert_eq!(res.validity(), expect.validity())
330    }
331
332    #[test]
333    fn test_extend_builder() {
334        test_extend_builder_gen::<i8>();
335        test_extend_builder_gen::<i16>();
336        test_extend_builder_gen::<i32>();
337        test_extend_builder_gen::<i64>();
338
339        test_extend_builder_gen::<u8>();
340        test_extend_builder_gen::<u16>();
341        test_extend_builder_gen::<u32>();
342        test_extend_builder_gen::<u64>();
343    }
344
345    #[test]
346    pub fn test_array_with_gap() {
347        let one_trailing_unused_element = ListArray::try_new(
348            buffer![1, 2, 3, 4].into_array(),
349            buffer![0, 3].into_array(),
350            Validity::NonNullable,
351        )
352        .unwrap();
353
354        let second_array = ListArray::try_new(
355            buffer![5, 6].into_array(),
356            buffer![0, 2].into_array(),
357            Validity::NonNullable,
358        )
359        .unwrap();
360
361        let chunked_list = ChunkedArray::try_new(
362            vec![
363                one_trailing_unused_element.clone().into_array(),
364                second_array.clone().into_array(),
365            ],
366            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
367        );
368
369        let canon_values = chunked_list.unwrap().to_list().unwrap();
370
371        assert_eq!(
372            scalar_at(&one_trailing_unused_element, 0).unwrap(),
373            scalar_at(&canon_values, 0).unwrap()
374        );
375        assert_eq!(
376            scalar_at(&second_array, 0).unwrap(),
377            scalar_at(&canon_values, 1).unwrap()
378        );
379    }
380}