1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::DType;
8use vortex_dtype::IntegerPType;
9use vortex_dtype::Nullability;
10use vortex_dtype::Nullability::NonNullable;
11use vortex_dtype::match_each_integer_ptype;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_panic;
17use vortex_mask::Mask;
18use vortex_scalar::ListScalar;
19use vortex_scalar::Scalar;
20
21use crate::Array;
22use crate::ArrayRef;
23use crate::IntoArray;
24use crate::arrays::ListArray;
25use crate::arrays::list_view_from_list;
26use crate::builders::ArrayBuilder;
27use crate::builders::DEFAULT_BUILDER_CAPACITY;
28use crate::builders::LazyBitBufferBuilder;
29use crate::builders::PrimitiveBuilder;
30use crate::builders::builder_with_capacity;
31use crate::canonical::Canonical;
32use crate::canonical::ToCanonical;
33
34pub struct ListBuilder<O: IntegerPType> {
37 dtype: DType,
39
40 elements_builder: Box<dyn ArrayBuilder>,
42
43 offsets_builder: PrimitiveBuilder<O>,
45
46 nulls: LazyBitBufferBuilder,
48}
49
50impl<O: IntegerPType> ListBuilder<O> {
51 pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
53 Self::with_capacity(
54 value_dtype,
55 nullability,
56 DEFAULT_BUILDER_CAPACITY * 2,
59 DEFAULT_BUILDER_CAPACITY,
60 )
61 }
62
63 pub fn with_capacity(
71 value_dtype: Arc<DType>,
72 nullability: Nullability,
73 elements_capacity: usize,
74 capacity: usize,
75 ) -> Self {
76 let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
77 let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
78
79 offsets_builder.append_zero();
81
82 Self {
83 elements_builder,
84 offsets_builder,
85 nulls: LazyBitBufferBuilder::new(capacity),
86 dtype: DType::List(value_dtype, nullability),
87 }
88 }
89
90 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
92 match value.elements() {
93 None => {
94 if self.dtype.nullability() == NonNullable {
95 vortex_bail!("Cannot append null value to non-nullable list");
96 }
97 self.append_null();
98 }
99 Some(elements) => {
100 for scalar in elements {
101 self.elements_builder.append_scalar(&scalar)?;
104 }
105
106 self.nulls.append_non_null();
107 self.offsets_builder.append_value(
108 O::from_usize(self.elements_builder.len())
109 .vortex_expect("Failed to convert from usize to O"),
110 );
111 }
112 }
113
114 Ok(())
115 }
116
117 pub fn finish_into_list(&mut self) -> ListArray {
119 assert_eq!(
120 self.offsets_builder.len(),
121 self.nulls.len() + 1,
122 "offsets length must be one more than nulls length."
123 );
124
125 ListArray::try_new(
126 self.elements_builder.finish(),
127 self.offsets_builder.finish(),
128 self.nulls.finish_with_nullability(self.dtype.nullability()),
129 )
130 .vortex_expect("Buffer, offsets, and validity must have same length.")
131 }
132
133 pub fn element_dtype(&self) -> &DType {
136 let DType::List(element_dtype, _) = &self.dtype else {
137 vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
138 };
139
140 element_dtype
141 }
142}
143
144impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
145 fn as_any(&self) -> &dyn Any {
146 self
147 }
148
149 fn as_any_mut(&mut self) -> &mut dyn Any {
150 self
151 }
152
153 fn dtype(&self) -> &DType {
154 &self.dtype
155 }
156
157 fn len(&self) -> usize {
158 self.nulls.len()
159 }
160
161 fn append_zeros(&mut self, n: usize) {
162 let curr_len = self.elements_builder.len();
163 for _ in 0..n {
164 self.offsets_builder.append_value(
165 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
166 )
167 }
168 self.nulls.append_n_non_nulls(n);
169 }
170
171 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
172 let curr_len = self.elements_builder.len();
173 for _ in 0..n {
174 self.offsets_builder.append_value(
177 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
178 )
179 }
180 self.nulls.append_n_nulls(n);
181 }
182
183 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
184 vortex_ensure!(
185 scalar.dtype() == self.dtype(),
186 "ListBuilder expected scalar with dtype {:?}, got {:?}",
187 self.dtype(),
188 scalar.dtype()
189 );
190
191 let list_scalar = ListScalar::try_from(scalar)?;
192 self.append_value(list_scalar)
193 }
194
195 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
196 let list = array.to_listview();
197 if list.is_empty() {
198 return;
199 }
200
201 self.nulls.append_validity_mask(array.validity_mask());
203
204 let elements = list.elements();
206 let offsets = list.offsets().to_primitive();
207 let sizes = list.sizes().to_primitive();
208
209 fn extend_inner<O, OffsetType, SizeType>(
210 builder: &mut ListBuilder<O>,
211 new_elements: &ArrayRef,
212 new_offsets: &[OffsetType],
213 new_sizes: &[SizeType],
214 ) where
215 O: IntegerPType,
216 OffsetType: IntegerPType,
217 SizeType: IntegerPType,
218 {
219 let num_lists = new_offsets.len();
220 debug_assert_eq!(num_lists, new_sizes.len());
221
222 let mut curr_offset = builder.elements_builder.len();
223 let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
224
225 for i in 0..new_offsets.len() {
228 let offset: usize = new_offsets[i].as_();
229 let size: usize = new_sizes[i].as_();
230
231 if size > 0 {
232 let list_elements = new_elements.slice(offset..offset + size);
233 builder.elements_builder.extend_from_array(&list_elements);
234 curr_offset += size;
235 }
236
237 let new_offset =
238 O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
239
240 offsets_range.set_value(i, new_offset);
241 }
242
243 unsafe { offsets_range.finish() };
246 }
247
248 match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
249 match_each_integer_ptype!(sizes.ptype(), |SizeType| {
250 extend_inner(
251 self,
252 elements,
253 offsets.as_slice::<OffsetType>(),
254 sizes.as_slice::<SizeType>(),
255 )
256 })
257 })
258 }
259
260 fn reserve_exact(&mut self, additional: usize) {
261 self.elements_builder.reserve_exact(additional);
262 self.offsets_builder.reserve_exact(additional);
263 self.nulls.reserve_exact(additional);
264 }
265
266 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
267 self.nulls = LazyBitBufferBuilder::new(validity.len());
268 self.nulls.append_validity_mask(validity);
269 }
270
271 fn finish(&mut self) -> ArrayRef {
272 self.finish_into_list().into_array()
273 }
274
275 fn finish_into_canonical(&mut self) -> Canonical {
276 Canonical::List(list_view_from_list(self.finish_into_list()))
277 }
278}
279
280#[cfg(test)]
281mod tests {
282 use std::sync::Arc;
283
284 use Nullability::NonNullable;
285 use Nullability::Nullable;
286 use vortex_buffer::buffer;
287 use vortex_dtype::DType;
288 use vortex_dtype::IntegerPType;
289 use vortex_dtype::Nullability;
290 use vortex_dtype::PType::I32;
291 use vortex_scalar::Scalar;
292
293 use crate::IntoArray;
294 use crate::ToCanonical;
295 use crate::array::Array;
296 use crate::arrays::ChunkedArray;
297 use crate::arrays::ListArray;
298 use crate::builders::ArrayBuilder;
299 use crate::builders::list::ListBuilder;
300 use crate::validity::Validity;
301 use crate::vtable::ValidityHelper;
302
303 #[test]
304 fn test_empty() {
305 let mut builder =
306 ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
307
308 let list = builder.finish();
309 assert_eq!(list.len(), 0);
310 }
311
312 #[test]
313 fn test_values() {
314 let dtype: Arc<DType> = Arc::new(I32.into());
315 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
316
317 builder
318 .append_value(
319 Scalar::list(
320 dtype.clone(),
321 vec![1i32.into(), 2i32.into(), 3i32.into()],
322 NonNullable,
323 )
324 .as_list(),
325 )
326 .unwrap();
327
328 builder
329 .append_value(
330 Scalar::list(
331 dtype,
332 vec![4i32.into(), 5i32.into(), 6i32.into()],
333 NonNullable,
334 )
335 .as_list(),
336 )
337 .unwrap();
338
339 let list = builder.finish();
340 assert_eq!(list.len(), 2);
341
342 let list_array = list.to_listview();
343
344 assert_eq!(list_array.list_elements_at(0).len(), 3);
345 assert_eq!(list_array.list_elements_at(1).len(), 3);
346 }
347
348 #[test]
349 fn test_append_empty_list() {
350 let dtype: Arc<DType> = Arc::new(I32.into());
351 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
352
353 assert!(
354 builder
355 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
356 .is_ok()
357 )
358 }
359
360 #[test]
361 fn test_nullable_values() {
362 let dtype: Arc<DType> = Arc::new(I32.into());
363 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
364
365 builder
366 .append_value(
367 Scalar::list(
368 dtype.clone(),
369 vec![1i32.into(), 2i32.into(), 3i32.into()],
370 NonNullable,
371 )
372 .as_list(),
373 )
374 .unwrap();
375
376 builder
377 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
378 .unwrap();
379
380 builder
381 .append_value(
382 Scalar::list(
383 dtype,
384 vec![4i32.into(), 5i32.into(), 6i32.into()],
385 NonNullable,
386 )
387 .as_list(),
388 )
389 .unwrap();
390
391 let list = builder.finish();
392 assert_eq!(list.len(), 3);
393
394 let list_array = list.to_listview();
395
396 assert_eq!(list_array.list_elements_at(0).len(), 3);
397 assert_eq!(list_array.list_elements_at(1).len(), 0);
398 assert_eq!(list_array.list_elements_at(2).len(), 3);
399 }
400
401 fn test_extend_builder_gen<O: IntegerPType>() {
402 let list = ListArray::from_iter_opt_slow::<O, _, _>(
403 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
404 Arc::new(I32.into()),
405 )
406 .unwrap();
407 assert_eq!(list.len(), 3);
408
409 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
410
411 builder.extend_from_array(&list);
412 builder.extend_from_array(&list);
413 builder.extend_from_array(&list.slice(0..0));
414 builder.extend_from_array(&list.slice(1..3));
415
416 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
417 [
418 Some(vec![0, 1, 2]),
419 None,
420 Some(vec![4, 5]),
421 Some(vec![0, 1, 2]),
422 None,
423 Some(vec![4, 5]),
424 None,
425 Some(vec![4, 5]),
426 ],
427 Arc::new(DType::Primitive(I32, NonNullable)),
428 )
429 .unwrap()
430 .to_listview();
431
432 let actual = builder.finish_into_canonical().into_listview();
433
434 assert_eq!(
435 actual.elements().to_primitive().as_slice::<i32>(),
436 expected.elements().to_primitive().as_slice::<i32>()
437 );
438
439 assert_eq!(
440 actual.offsets().to_primitive().as_slice::<O>(),
441 expected.offsets().to_primitive().as_slice::<O>()
442 );
443
444 assert_eq!(actual.validity(), expected.validity())
445 }
446
447 #[test]
448 fn test_extend_builder() {
449 test_extend_builder_gen::<i8>();
450 test_extend_builder_gen::<i16>();
451 test_extend_builder_gen::<i32>();
452 test_extend_builder_gen::<i64>();
453
454 test_extend_builder_gen::<u8>();
455 test_extend_builder_gen::<u16>();
456 test_extend_builder_gen::<u32>();
457 test_extend_builder_gen::<u64>();
458 }
459
460 #[test]
461 pub fn test_array_with_gap() {
462 let one_trailing_unused_element = ListArray::try_new(
463 buffer![1, 2, 3, 4].into_array(),
464 buffer![0, 3].into_array(),
465 Validity::NonNullable,
466 )
467 .unwrap();
468
469 let second_array = ListArray::try_new(
470 buffer![5, 6].into_array(),
471 buffer![0, 2].into_array(),
472 Validity::NonNullable,
473 )
474 .unwrap();
475
476 let chunked_list = ChunkedArray::try_new(
477 vec![
478 one_trailing_unused_element.clone().into_array(),
479 second_array.clone().into_array(),
480 ],
481 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
482 );
483
484 let canon_values = chunked_list.unwrap().to_listview();
485
486 assert_eq!(
487 one_trailing_unused_element.scalar_at(0),
488 canon_values.scalar_at(0)
489 );
490 assert_eq!(second_array.scalar_at(0), canon_values.scalar_at(1));
491 }
492
493 #[test]
494 fn test_append_scalar() {
495 let dtype: Arc<DType> = Arc::new(I32.into());
496 let mut builder = ListBuilder::<u64>::with_capacity(dtype.clone(), Nullable, 20, 10);
497
498 let list_scalar1 = Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable);
500 builder.append_scalar(&list_scalar1).unwrap();
501
502 let list_scalar2 = Scalar::list(
504 dtype.clone(),
505 vec![3i32.into(), 4i32.into(), 5i32.into()],
506 Nullable,
507 );
508 builder.append_scalar(&list_scalar2).unwrap();
509
510 let null_scalar = Scalar::null(DType::List(dtype.clone(), Nullable));
512 builder.append_scalar(&null_scalar).unwrap();
513
514 let array = builder.finish_into_list();
515 assert_eq!(array.len(), 3);
516
517 let scalar0 = array.scalar_at(0);
520 let list0 = scalar0.as_list();
521 assert_eq!(list0.len(), 2);
522 if let Some(list0_items) = list0.elements() {
523 assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
524 assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
525 }
526
527 let scalar1 = array.scalar_at(1);
528 let list1 = scalar1.as_list();
529 assert_eq!(list1.len(), 3);
530 if let Some(list1_items) = list1.elements() {
531 assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
532 assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
533 assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
534 }
535
536 let scalar2 = array.scalar_at(2);
537 let list2 = scalar2.as_list();
538 assert!(list2.is_null()); assert!(array.validity().is_valid(0));
542 assert!(array.validity().is_valid(1));
543 assert!(!array.validity().is_valid(2));
544
545 let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
547 let wrong_scalar = Scalar::from(42i32);
548 assert!(builder.append_scalar(&wrong_scalar).is_err());
549 }
550}