1use std::sync::Arc;
14
15use vortex_dtype::DType;
16use vortex_dtype::IntegerPType;
17use vortex_dtype::Nullability;
18use vortex_dtype::match_each_integer_ptype;
19use vortex_error::VortexExpect;
20use vortex_error::VortexResult;
21use vortex_error::vortex_ensure;
22use vortex_error::vortex_panic;
23use vortex_mask::Mask;
24use vortex_scalar::ListScalar;
25use vortex_scalar::Scalar;
26
27use crate::Canonical;
28use crate::ToCanonical;
29use crate::array::Array;
30use crate::array::ArrayRef;
31use crate::array::IntoArray;
32use crate::arrays::ListViewArray;
33use crate::arrays::ListViewRebuildMode;
34use crate::arrays::PrimitiveArray;
35use crate::builders::ArrayBuilder;
36use crate::builders::DEFAULT_BUILDER_CAPACITY;
37use crate::builders::PrimitiveBuilder;
38use crate::builders::UninitRange;
39use crate::builders::builder_with_capacity;
40use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
41use crate::compute;
42
43pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
53 dtype: DType,
55
56 elements_builder: Box<dyn ArrayBuilder>,
58
59 offsets_builder: PrimitiveBuilder<O>,
61
62 sizes_builder: PrimitiveBuilder<S>,
64
65 nulls: LazyBitBufferBuilder,
67}
68
69impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
70 pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
72 Self::with_capacity(
73 element_dtype,
74 nullability,
75 DEFAULT_BUILDER_CAPACITY * 2,
78 DEFAULT_BUILDER_CAPACITY,
79 )
80 }
81
82 pub fn with_capacity(
90 element_dtype: Arc<DType>,
91 nullability: Nullability,
92 elements_capacity: usize,
93 capacity: usize,
94 ) -> Self {
95 let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
96
97 let offsets_builder =
98 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
99 let sizes_builder =
100 PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
101
102 let nulls = LazyBitBufferBuilder::new(capacity);
103
104 Self {
105 dtype: DType::List(element_dtype, nullability),
106 elements_builder,
107 offsets_builder,
108 sizes_builder,
109 nulls,
110 }
111 }
112
113 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
118 let Some(elements) = value.elements() else {
119 vortex_ensure!(
121 self.dtype.is_nullable(),
122 "Cannot append null value to non-nullable list builder"
123 );
124 self.append_null();
125 return Ok(());
126 };
127
128 let curr_offset = self.elements_builder.len();
129 let num_elements = elements.len();
130
131 assert!(
134 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
135 "appending this list would cause an offset overflow"
136 );
137
138 for scalar in elements {
139 self.elements_builder.append_scalar(&scalar)?;
140 }
141 self.nulls.append_non_null();
142
143 self.offsets_builder.append_value(
144 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
145 );
146 self.sizes_builder.append_value(
147 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
148 );
149
150 Ok(())
151 }
152
153 pub fn finish_into_listview(&mut self) -> ListViewArray {
155 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
156 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
157
158 let elements = self.elements_builder.finish();
159 let offsets = self.offsets_builder.finish();
160 let sizes = self.sizes_builder.finish();
161 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
162
163 unsafe {
173 ListViewArray::new_unchecked(elements, offsets, sizes, validity)
174 .with_zero_copy_to_list(true)
175 }
176 }
177
178 pub fn element_dtype(&self) -> &DType {
181 let DType::List(element_dtype, ..) = &self.dtype else {
182 vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
183 };
184
185 element_dtype
186 }
187}
188
189impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
190 fn as_any(&self) -> &dyn std::any::Any {
191 self
192 }
193
194 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
195 self
196 }
197
198 fn dtype(&self) -> &DType {
199 &self.dtype
200 }
201
202 fn len(&self) -> usize {
203 self.offsets_builder.len()
204 }
205
206 fn append_zeros(&mut self, n: usize) {
207 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
208 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
209
210 let curr_offset = self.elements_builder.len();
212
213 for _ in 0..n {
216 self.offsets_builder.append_value(
217 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
218 );
219 self.sizes_builder.append_value(S::zero());
220 }
221
222 self.nulls.append_n_non_nulls(n);
223 }
224
225 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
226 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
227 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
228
229 let curr_offset = self.elements_builder.len();
231
232 for _ in 0..n {
234 self.offsets_builder.append_value(
235 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
236 );
237 self.sizes_builder.append_value(S::zero());
238 }
239
240 self.nulls.append_n_nulls(n);
242 }
243
244 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
245 vortex_ensure!(
246 scalar.dtype() == self.dtype(),
247 "ListViewBuilder expected scalar with dtype {:?}, got {:?}",
248 self.dtype(),
249 scalar.dtype()
250 );
251
252 let list_scalar = scalar.as_list();
253 self.append_value(list_scalar)
254 }
255
256 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
257 let listview = array.to_listview();
258 if listview.is_empty() {
259 return;
260 }
261
262 if !listview.is_zero_copy_to_list() {
265 for i in 0..listview.len() {
266 let list = listview.scalar_at(i);
267
268 self.append_scalar(&list)
269 .vortex_expect("was unable to extend the `ListViewBuilder`")
270 }
271
272 return;
273 }
274
275 let listview = listview.rebuild(ListViewRebuildMode::MakeExact);
278 debug_assert!(listview.is_zero_copy_to_list());
279
280 self.nulls.append_validity_mask(array.validity_mask());
281
282 let old_elements_len = self.elements_builder.len();
284 self.elements_builder
285 .reserve_exact(listview.elements().len());
286 self.elements_builder.extend_from_array(listview.elements());
287 let new_elements_len = self.elements_builder.len();
288
289 let extend_length = listview.len();
291 self.sizes_builder.reserve_exact(extend_length);
292 self.offsets_builder.reserve_exact(extend_length);
293
294 let cast_sizes = compute::cast(listview.sizes(), self.sizes_builder.dtype()).vortex_expect(
296 "was somehow unable to cast the new sizes to the type of the builder sizes",
297 );
298 self.sizes_builder.extend_from_array(cast_sizes.as_ref());
299
300 let uninit_range = self.offsets_builder.uninit_range(extend_length);
304
305 let new_offsets = listview.offsets().to_primitive();
307
308 match_each_integer_ptype!(new_offsets.ptype(), |A| {
309 adjust_and_extend_offsets::<O, A>(
310 uninit_range,
311 new_offsets,
312 old_elements_len,
313 new_elements_len,
314 );
315 })
316 }
317
318 fn reserve_exact(&mut self, capacity: usize) {
319 self.elements_builder.reserve_exact(capacity * 2);
320 self.offsets_builder.reserve_exact(capacity);
321 self.sizes_builder.reserve_exact(capacity);
322 self.nulls.reserve_exact(capacity);
323 }
324
325 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
326 self.nulls = LazyBitBufferBuilder::new(validity.len());
327 self.nulls.append_validity_mask(validity);
328 }
329
330 fn finish(&mut self) -> ArrayRef {
331 self.finish_into_listview().into_array()
332 }
333
334 fn finish_into_canonical(&mut self) -> Canonical {
335 Canonical::List(self.finish_into_listview())
336 }
337}
338
339fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
342 mut uninit_range: UninitRange<'a, O>,
343 new_offsets: PrimitiveArray,
344 old_elements_len: usize,
345 new_elements_len: usize,
346) {
347 let new_offsets_slice = new_offsets.as_slice::<A>();
348 let old_elements_len = O::from_usize(old_elements_len)
349 .vortex_expect("the old elements length did not fit into the offset type (impossible)");
350 let new_elements_len = O::from_usize(new_elements_len)
351 .vortex_expect("the current elements length did not fit into the offset type (impossible)");
352
353 for i in 0..uninit_range.len() {
354 let new_offset = O::from_usize(
355 new_offsets_slice[i]
356 .to_usize()
357 .vortex_expect("Offsets must always fit in usize"),
358 )
359 .vortex_expect("New offset somehow did not fit into the builder's offset type");
360
361 let adjusted_new_offset = new_offset + old_elements_len;
364 assert!(
365 adjusted_new_offset <= new_elements_len,
366 "[{i}/{}]: {new_offset} + {old_elements_len} \
367 = {adjusted_new_offset} <= {new_elements_len} failed",
368 uninit_range.len()
369 );
370
371 uninit_range.set_value(i, adjusted_new_offset);
372 }
373
374 unsafe { uninit_range.finish() };
377}
378
379#[cfg(test)]
380mod tests {
381 use std::sync::Arc;
382
383 use vortex_dtype::DType;
384 use vortex_dtype::Nullability::NonNullable;
385 use vortex_dtype::Nullability::Nullable;
386 use vortex_dtype::PType::I32;
387 use vortex_scalar::Scalar;
388
389 use super::ListViewBuilder;
390 use crate::IntoArray;
391 use crate::array::Array;
392 use crate::arrays::ListArray;
393 use crate::builders::ArrayBuilder;
394 use crate::vtable::ValidityHelper;
395
396 #[test]
397 fn test_empty() {
398 let mut builder =
399 ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
400
401 let listview = builder.finish();
402 assert_eq!(listview.len(), 0);
403 }
404
405 #[test]
406 fn test_basic_append_and_nulls() {
407 let dtype: Arc<DType> = Arc::new(I32.into());
408 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
409
410 builder
412 .append_value(
413 Scalar::list(
414 dtype.clone(),
415 vec![1i32.into(), 2i32.into(), 3i32.into()],
416 NonNullable,
417 )
418 .as_list(),
419 )
420 .unwrap();
421
422 builder
424 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
425 .unwrap();
426
427 builder.append_null();
429
430 builder
432 .append_value(
433 Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
434 )
435 .unwrap();
436
437 let listview = builder.finish_into_listview();
438 assert_eq!(listview.len(), 4);
439
440 let first_list = listview.list_elements_at(0);
442 assert_eq!(first_list.len(), 3);
443 assert_eq!(first_list.scalar_at(0), 1i32.into());
444 assert_eq!(first_list.scalar_at(1), 2i32.into());
445 assert_eq!(first_list.scalar_at(2), 3i32.into());
446
447 let empty_list = listview.list_elements_at(1);
449 assert_eq!(empty_list.len(), 0);
450
451 assert!(!listview.validity().is_valid(2));
453
454 let last_list = listview.list_elements_at(3);
456 assert_eq!(last_list.len(), 2);
457 assert_eq!(last_list.scalar_at(0), 4i32.into());
458 assert_eq!(last_list.scalar_at(1), 5i32.into());
459 }
460
461 #[test]
462 fn test_different_offset_size_types() {
463 let dtype: Arc<DType> = Arc::new(I32.into());
465 let mut builder =
466 ListViewBuilder::<u32, u8>::with_capacity(dtype.clone(), NonNullable, 0, 0);
467
468 builder
469 .append_value(
470 Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], NonNullable).as_list(),
471 )
472 .unwrap();
473
474 builder
475 .append_value(
476 Scalar::list(
477 dtype,
478 vec![3i32.into(), 4i32.into(), 5i32.into()],
479 NonNullable,
480 )
481 .as_list(),
482 )
483 .unwrap();
484
485 let listview = builder.finish_into_listview();
486 assert_eq!(listview.len(), 2);
487
488 let first = listview.list_elements_at(0);
490 assert_eq!(first.scalar_at(0), 1i32.into());
491 assert_eq!(first.scalar_at(1), 2i32.into());
492
493 let second = listview.list_elements_at(1);
495 assert_eq!(second.scalar_at(0), 3i32.into());
496 assert_eq!(second.scalar_at(1), 4i32.into());
497 assert_eq!(second.scalar_at(2), 5i32.into());
498
499 let dtype2: Arc<DType> = Arc::new(I32.into());
501 let mut builder2 =
502 ListViewBuilder::<u64, u16>::with_capacity(dtype2.clone(), NonNullable, 0, 0);
503
504 for i in 0..5 {
505 builder2
506 .append_value(
507 Scalar::list(dtype2.clone(), vec![(i * 10).into()], NonNullable).as_list(),
508 )
509 .unwrap();
510 }
511
512 let listview2 = builder2.finish_into_listview();
513 assert_eq!(listview2.len(), 5);
514
515 for i in 0..5i32 {
517 let list = listview2.list_elements_at(i as usize);
518 assert_eq!(list.len(), 1);
519 assert_eq!(list.scalar_at(0), (i * 10).into());
520 }
521 }
522
523 #[test]
524 fn test_builder_trait_methods() {
525 let dtype: Arc<DType> = Arc::new(I32.into());
526 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
527
528 builder.append_zeros(2);
530 assert_eq!(builder.len(), 2);
531
532 unsafe {
534 builder.append_nulls_unchecked(2);
535 }
536 assert_eq!(builder.len(), 4);
537
538 let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
540 builder.append_scalar(&list_scalar).unwrap();
541 assert_eq!(builder.len(), 5);
542
543 let listview = builder.finish_into_listview();
544 assert_eq!(listview.len(), 5);
545
546 assert_eq!(listview.list_elements_at(0).len(), 0);
548 assert_eq!(listview.list_elements_at(1).len(), 0);
549
550 assert!(!listview.validity().is_valid(2));
552 assert!(!listview.validity().is_valid(3));
553
554 let last_list = listview.list_elements_at(4);
556 assert_eq!(last_list.len(), 2);
557 assert_eq!(last_list.scalar_at(0), 10i32.into());
558 assert_eq!(last_list.scalar_at(1), 20i32.into());
559 }
560
561 #[test]
562 fn test_extend_from_array() {
563 let dtype: Arc<DType> = Arc::new(I32.into());
564
565 let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
567 [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
568 Arc::new(I32.into()),
569 )
570 .unwrap();
571
572 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
573
574 builder
576 .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
577 .unwrap();
578
579 builder.extend_from_array(&source.into_array());
581
582 let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
584 std::iter::empty::<Option<Vec<i32>>>(),
585 Arc::new(I32.into()),
586 )
587 .unwrap();
588 builder.extend_from_array(&empty_source.into_array());
589
590 let listview = builder.finish_into_listview();
591 assert_eq!(listview.len(), 4);
592
593 let first = listview.list_elements_at(0);
596 assert_eq!(first.len(), 1);
597 assert_eq!(first.scalar_at(0), 0i32.into());
598
599 let second = listview.list_elements_at(1);
601 assert_eq!(second.len(), 3);
602 assert_eq!(second.scalar_at(0), 1i32.into());
603 assert_eq!(second.scalar_at(1), 2i32.into());
604 assert_eq!(second.scalar_at(2), 3i32.into());
605
606 assert!(!listview.validity().is_valid(2));
608
609 let fourth = listview.list_elements_at(3);
611 assert_eq!(fourth.len(), 2);
612 assert_eq!(fourth.scalar_at(0), 4i32.into());
613 assert_eq!(fourth.scalar_at(1), 5i32.into());
614 }
615
616 #[test]
617 fn test_error_append_null_to_non_nullable() {
618 let dtype: Arc<DType> = Arc::new(I32.into());
619 let mut builder =
620 ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
621
622 let null_scalar = Scalar::null(DType::List(dtype, Nullable));
624 let null_list = null_scalar.as_list();
625
626 let result = builder.append_value(null_list);
628 assert!(result.is_err());
629 assert!(
630 result
631 .unwrap_err()
632 .to_string()
633 .contains("null value to non-nullable")
634 );
635 }
636}