1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::Nullability::NonNullable;
8use vortex_dtype::{DType, NativePType, Nullability};
9use vortex_error::{VortexExpect, VortexResult, vortex_bail};
10use vortex_mask::Mask;
11use vortex_scalar::ListScalar;
12
13use crate::arrays::{ListArray, OffsetPType};
14use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
15use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
16use crate::compute::{add_scalar, cast, sub_scalar};
17use crate::{Array, ArrayRef, IntoArray, ToCanonical};
18
19pub struct ListBuilder<O: NativePType> {
20 value_builder: Box<dyn ArrayBuilder>,
22 index_builder: PrimitiveBuilder<O>,
24 nulls: LazyNullBufferBuilder,
25 nullability: Nullability,
26 dtype: DType,
27}
28
29impl<O: OffsetPType> ListBuilder<O> {
30 pub fn with_capacity(
38 value_dtype: Arc<DType>,
39 nullability: Nullability,
40 index_capacity: usize,
41 ) -> Self {
42 Self::with_values_and_index_capacity(
44 value_dtype,
45 nullability,
46 2 * index_capacity,
47 index_capacity,
48 )
49 }
50
51 pub fn with_values_and_index_capacity(
57 value_dtype: Arc<DType>,
58 nullability: Nullability,
59 values_capacity: usize,
60 index_capacity: usize,
61 ) -> Self {
62 let value_builder = builder_with_capacity(value_dtype.as_ref(), values_capacity);
63 let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, index_capacity);
64
65 index_builder.append_zero();
67
68 Self {
69 value_builder,
70 index_builder,
71 nulls: LazyNullBufferBuilder::new(index_capacity),
72 nullability,
73 dtype: DType::List(value_dtype, nullability),
74 }
75 }
76
77 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
78 match value.elements() {
79 None => {
80 if self.nullability == NonNullable {
81 vortex_bail!("Cannot append null value to non-nullable list");
82 }
83 self.append_null();
84 Ok(())
85 }
86 Some(elements) => {
87 for scalar in elements {
88 self.value_builder.append_scalar(&scalar)?;
91 }
92 self.nulls.append_non_null();
93 self.append_index(
94 O::from_usize(self.value_builder.len())
95 .vortex_expect("Failed to convert from usize to O"),
96 )
97 }
98 }
99 }
100
101 fn append_index(&mut self, index: O) -> VortexResult<()> {
102 self.index_builder.append_scalar(&index.into())
103 }
104}
105
106impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
107 fn as_any(&self) -> &dyn Any {
108 self
109 }
110
111 fn as_any_mut(&mut self) -> &mut dyn Any {
112 self
113 }
114
115 fn dtype(&self) -> &DType {
116 &self.dtype
117 }
118
119 fn len(&self) -> usize {
120 self.nulls.len()
121 }
122
123 fn append_zeros(&mut self, n: usize) {
124 let count = self.value_builder.len();
125 self.value_builder.append_zeros(n);
126 for i in 0..n {
127 self.append_index(
128 O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
129 )
130 .vortex_expect("Failed to append index");
131 }
132 self.nulls.append_n_non_nulls(n);
133 }
134
135 fn append_nulls(&mut self, n: usize) {
136 let count = self.value_builder.len();
137 for _ in 0..n {
138 self.append_index(
141 O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
142 )
143 .vortex_expect("Failed to append index");
144 }
145 self.nulls.append_n_nulls(n);
146 }
147
148 fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
149 let list = array.to_list()?;
150 if list.is_empty() {
151 return Ok(());
152 }
153
154 let n_already_added_values = self.value_builder.len();
155 let Some(n_already_added_values) = O::from_usize(n_already_added_values) else {
156 vortex_bail!(
157 "cannot convert length {} to type {:?}",
158 n_already_added_values,
159 O::PTYPE
160 )
161 };
162
163 let offsets = list.offsets();
164 let elements = list.elements();
165
166 let index_dtype = self.index_builder.dtype();
167
168 let n_leading_junk_values_scalar = offsets.scalar_at(0).cast(index_dtype)?;
169 let n_leading_junk_values = usize::try_from(&n_leading_junk_values_scalar)?;
170
171 let casted_offsets = cast(&offsets.slice(1, offsets.len()), index_dtype)?;
172 let offsets_without_leading_junk =
173 sub_scalar(&casted_offsets, n_leading_junk_values_scalar)?;
174 let offsets_into_builder =
175 add_scalar(&offsets_without_leading_junk, n_already_added_values.into())?;
176
177 let last_offset = offsets.scalar_at(offsets.len() - 1);
178 let last_offset = usize::try_from(&last_offset)?;
179 let non_junk_values = elements.slice(n_leading_junk_values, last_offset);
180
181 self.nulls.append_validity_mask(array.validity_mask()?);
182 self.index_builder
183 .extend_from_array(&offsets_into_builder)?;
184 self.value_builder.ensure_capacity(non_junk_values.len());
185 self.value_builder.extend_from_array(&non_junk_values)?;
186
187 Ok(())
188 }
189
190 fn ensure_capacity(&mut self, capacity: usize) {
191 self.index_builder.ensure_capacity(capacity);
192 self.value_builder.ensure_capacity(capacity);
193 self.nulls.ensure_capacity(capacity);
194 }
195
196 fn set_validity(&mut self, validity: Mask) {
197 self.nulls = LazyNullBufferBuilder::new(validity.len());
198 self.nulls.append_validity_mask(validity);
199 }
200
201 fn finish(&mut self) -> ArrayRef {
202 assert_eq!(
203 self.index_builder.len(),
204 self.nulls.len() + 1,
205 "Indices length must be one more than nulls length."
206 );
207
208 ListArray::try_new(
209 self.value_builder.finish(),
210 self.index_builder.finish(),
211 self.nulls.finish_with_nullability(self.nullability),
212 )
213 .vortex_expect("Buffer, offsets, and validity must have same length.")
214 .into_array()
215 }
216}
217
218#[cfg(test)]
219mod tests {
220 use std::sync::Arc;
221
222 use Nullability::{NonNullable, Nullable};
223 use vortex_buffer::buffer;
224 use vortex_dtype::PType::I32;
225 use vortex_dtype::{DType, Nullability};
226 use vortex_scalar::Scalar;
227
228 use crate::array::Array;
229 use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
230 use crate::builders::ArrayBuilder;
231 use crate::builders::list::ListBuilder;
232 use crate::validity::Validity;
233 use crate::vtable::ValidityHelper;
234 use crate::{IntoArray as _, ToCanonical};
235
236 #[test]
237 fn test_empty() {
238 let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
239
240 let list = builder.finish();
241 assert_eq!(list.len(), 0);
242 }
243
244 #[test]
245 fn test_values() {
246 let dtype: Arc<DType> = Arc::new(I32.into());
247 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
248
249 builder
250 .append_value(
251 Scalar::list(
252 dtype.clone(),
253 vec![1i32.into(), 2i32.into(), 3i32.into()],
254 NonNullable,
255 )
256 .as_list(),
257 )
258 .unwrap();
259
260 builder
261 .append_value(
262 Scalar::list(
263 dtype,
264 vec![4i32.into(), 5i32.into(), 6i32.into()],
265 NonNullable,
266 )
267 .as_list(),
268 )
269 .unwrap();
270
271 let list = builder.finish();
272 assert_eq!(list.len(), 2);
273
274 let list_array = list.to_list().unwrap();
275
276 assert_eq!(list_array.elements_at(0).len(), 3);
277 assert_eq!(list_array.elements_at(1).len(), 3);
278 }
279
280 #[test]
281 fn test_append_empty_list() {
282 let dtype: Arc<DType> = Arc::new(I32.into());
283 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
284
285 assert!(
286 builder
287 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
288 .is_ok()
289 )
290 }
291
292 #[test]
293 fn test_nullable_values() {
294 let dtype: Arc<DType> = Arc::new(I32.into());
295 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
296
297 builder
298 .append_value(
299 Scalar::list(
300 dtype.clone(),
301 vec![1i32.into(), 2i32.into(), 3i32.into()],
302 NonNullable,
303 )
304 .as_list(),
305 )
306 .unwrap();
307
308 builder
309 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
310 .unwrap();
311
312 builder
313 .append_value(
314 Scalar::list(
315 dtype,
316 vec![4i32.into(), 5i32.into(), 6i32.into()],
317 NonNullable,
318 )
319 .as_list(),
320 )
321 .unwrap();
322
323 let list = builder.finish();
324 assert_eq!(list.len(), 3);
325
326 let list_array = list.to_list().unwrap();
327
328 assert_eq!(list_array.elements_at(0).len(), 3);
329 assert_eq!(list_array.elements_at(1).len(), 0);
330 assert_eq!(list_array.elements_at(2).len(), 3);
331 }
332
333 fn test_extend_builder_gen<O: OffsetPType>() {
334 let list = ListArray::from_iter_opt_slow::<O, _, _>(
335 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
336 Arc::new(I32.into()),
337 )
338 .unwrap();
339
340 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
341
342 builder.extend_from_array(&list).unwrap();
343 builder.extend_from_array(&list).unwrap();
344 builder.extend_from_array(&list.slice(0, 0)).unwrap();
345 builder.extend_from_array(&list.slice(1, 3)).unwrap();
346
347 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
348 [
349 Some(vec![0, 1, 2]),
350 None,
351 Some(vec![4, 5]),
352 Some(vec![0, 1, 2]),
353 None,
354 Some(vec![4, 5]),
355 None,
356 Some(vec![4, 5]),
357 ],
358 Arc::new(DType::Primitive(I32, NonNullable)),
359 )
360 .unwrap()
361 .to_list()
362 .unwrap();
363
364 let actual = builder
365 .finish()
366 .to_canonical()
367 .unwrap()
368 .into_list()
369 .unwrap();
370
371 assert_eq!(
372 actual.elements().to_primitive().unwrap().as_slice::<i32>(),
373 expected
374 .elements()
375 .to_primitive()
376 .unwrap()
377 .as_slice::<i32>()
378 );
379
380 assert_eq!(
381 actual.offsets().to_primitive().unwrap().as_slice::<O>(),
382 expected.offsets().to_primitive().unwrap().as_slice::<O>()
383 );
384
385 assert_eq!(actual.validity(), expected.validity())
386 }
387
388 #[test]
389 fn test_extend_builder() {
390 test_extend_builder_gen::<i8>();
391 test_extend_builder_gen::<i16>();
392 test_extend_builder_gen::<i32>();
393 test_extend_builder_gen::<i64>();
394
395 test_extend_builder_gen::<u8>();
396 test_extend_builder_gen::<u16>();
397 test_extend_builder_gen::<u32>();
398 test_extend_builder_gen::<u64>();
399 }
400
401 #[test]
402 pub fn test_array_with_gap() {
403 let one_trailing_unused_element = ListArray::try_new(
404 buffer![1, 2, 3, 4].into_array(),
405 buffer![0, 3].into_array(),
406 Validity::NonNullable,
407 )
408 .unwrap();
409
410 let second_array = ListArray::try_new(
411 buffer![5, 6].into_array(),
412 buffer![0, 2].into_array(),
413 Validity::NonNullable,
414 )
415 .unwrap();
416
417 let chunked_list = ChunkedArray::try_new(
418 vec![
419 one_trailing_unused_element.clone().into_array(),
420 second_array.clone().into_array(),
421 ],
422 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
423 );
424
425 let canon_values = chunked_list.unwrap().to_list().unwrap();
426
427 assert_eq!(
428 one_trailing_unused_element.scalar_at(0),
429 canon_values.scalar_at(0)
430 );
431 assert_eq!(second_array.scalar_at(0), canon_values.scalar_at(1));
432 }
433}