1use std::sync::Arc;
14
15use vortex_dtype::{DType, IntegerPType, Nullability, match_each_integer_ptype};
16use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
17use vortex_mask::Mask;
18use vortex_scalar::{ListScalar, Scalar};
19
20use crate::array::{Array, ArrayRef, IntoArray};
21use crate::arrays::{ListViewArray, ListViewRebuildMode, PrimitiveArray};
22use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
23use crate::builders::{
24 ArrayBuilder, DEFAULT_BUILDER_CAPACITY, PrimitiveBuilder, UninitRange, builder_with_capacity,
25};
26use crate::{Canonical, ToCanonical, compute};
27
28pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
38 dtype: DType,
40
41 elements_builder: Box<dyn ArrayBuilder>,
43
44 offsets_builder: PrimitiveBuilder<O>,
46
47 sizes_builder: PrimitiveBuilder<S>,
49
50 nulls: LazyBitBufferBuilder,
52}
53
54impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
55 pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
57 Self::with_capacity(
58 element_dtype,
59 nullability,
60 DEFAULT_BUILDER_CAPACITY * 2,
63 DEFAULT_BUILDER_CAPACITY,
64 )
65 }
66
67 pub fn with_capacity(
75 element_dtype: Arc<DType>,
76 nullability: Nullability,
77 elements_capacity: usize,
78 capacity: usize,
79 ) -> Self {
80 assert!(
83 S::max_value_as_u64() <= O::max_value_as_u64(),
84 "Size type {:?} (max offset {}) must fit within offset type {:?} (max offset {})",
85 S::PTYPE,
86 S::max_value_as_u64(),
87 O::PTYPE,
88 O::max_value_as_u64()
89 );
90
91 let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
92
93 let offsets_builder =
94 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
95 let sizes_builder =
96 PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
97
98 let nulls = LazyBitBufferBuilder::new(capacity);
99
100 Self {
101 dtype: DType::List(element_dtype, nullability),
102 elements_builder,
103 offsets_builder,
104 sizes_builder,
105 nulls,
106 }
107 }
108
109 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
114 let Some(elements) = value.elements() else {
115 vortex_ensure!(
117 self.dtype.is_nullable(),
118 "Cannot append null value to non-nullable list builder"
119 );
120 self.append_null();
121 return Ok(());
122 };
123
124 let curr_offset = self.elements_builder.len();
125 let num_elements = elements.len();
126
127 assert!(
130 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
131 "appending this list would cause an offset overflow"
132 );
133
134 for scalar in elements {
135 self.elements_builder.append_scalar(&scalar)?;
136 }
137 self.nulls.append_non_null();
138
139 self.offsets_builder.append_value(
140 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
141 );
142 self.sizes_builder.append_value(
143 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
144 );
145
146 Ok(())
147 }
148
149 pub fn finish_into_listview(&mut self) -> ListViewArray {
151 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
152 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
153
154 let elements = self.elements_builder.finish();
155 let offsets = self.offsets_builder.finish();
156 let sizes = self.sizes_builder.finish();
157 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
158
159 unsafe {
169 ListViewArray::new_unchecked(elements, offsets, sizes, validity)
170 .with_zero_copy_to_list(true)
171 }
172 }
173
174 pub fn element_dtype(&self) -> &DType {
177 let DType::List(element_dtype, ..) = &self.dtype else {
178 vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
179 };
180
181 element_dtype
182 }
183}
184
185impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
186 fn as_any(&self) -> &dyn std::any::Any {
187 self
188 }
189
190 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
191 self
192 }
193
194 fn dtype(&self) -> &DType {
195 &self.dtype
196 }
197
198 fn len(&self) -> usize {
199 self.offsets_builder.len()
200 }
201
202 fn append_zeros(&mut self, n: usize) {
203 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
204 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
205
206 let curr_offset = self.elements_builder.len();
208
209 for _ in 0..n {
212 self.offsets_builder.append_value(
213 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
214 );
215 self.sizes_builder.append_value(S::zero());
216 }
217
218 self.nulls.append_n_non_nulls(n);
219 }
220
221 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
222 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
223 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
224
225 let curr_offset = self.elements_builder.len();
227
228 for _ in 0..n {
230 self.offsets_builder.append_value(
231 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
232 );
233 self.sizes_builder.append_value(S::zero());
234 }
235
236 self.nulls.append_n_nulls(n);
238 }
239
240 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
241 vortex_ensure!(
242 scalar.dtype() == self.dtype(),
243 "ListViewBuilder expected scalar with dtype {:?}, got {:?}",
244 self.dtype(),
245 scalar.dtype()
246 );
247
248 let list_scalar = scalar.as_list();
249 self.append_value(list_scalar)
250 }
251
252 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
253 let listview = array.to_listview();
254 if listview.is_empty() {
255 return;
256 }
257
258 if !listview.is_zero_copy_to_list() {
261 for i in 0..listview.len() {
262 let list = listview.scalar_at(i);
263
264 self.append_scalar(&list)
265 .vortex_expect("was unable to extend the `ListViewBuilder`")
266 }
267
268 return;
269 }
270
271 let listview = listview.rebuild(ListViewRebuildMode::MakeExact);
274 debug_assert!(listview.is_zero_copy_to_list());
275
276 self.nulls.append_validity_mask(array.validity_mask());
277
278 let old_elements_len = self.elements_builder.len();
280 self.elements_builder
281 .reserve_exact(listview.elements().len());
282 self.elements_builder.extend_from_array(listview.elements());
283 let new_elements_len = self.elements_builder.len();
284
285 let extend_length = listview.len();
287 self.sizes_builder.reserve_exact(extend_length);
288 self.offsets_builder.reserve_exact(extend_length);
289
290 let cast_sizes = compute::cast(listview.sizes(), self.sizes_builder.dtype()).vortex_expect(
292 "was somehow unable to cast the new sizes to the type of the builder sizes",
293 );
294 self.sizes_builder.extend_from_array(cast_sizes.as_ref());
295
296 let uninit_range = self.offsets_builder.uninit_range(extend_length);
300
301 let new_offsets = listview.offsets().to_primitive();
303
304 match_each_integer_ptype!(new_offsets.ptype(), |A| {
305 adjust_and_extend_offsets::<O, A>(
306 uninit_range,
307 new_offsets,
308 old_elements_len,
309 new_elements_len,
310 );
311 })
312 }
313
314 fn reserve_exact(&mut self, capacity: usize) {
315 self.elements_builder.reserve_exact(capacity * 2);
316 self.offsets_builder.reserve_exact(capacity);
317 self.sizes_builder.reserve_exact(capacity);
318 self.nulls.reserve_exact(capacity);
319 }
320
321 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
322 self.nulls = LazyBitBufferBuilder::new(validity.len());
323 self.nulls.append_validity_mask(validity);
324 }
325
326 fn finish(&mut self) -> ArrayRef {
327 self.finish_into_listview().into_array()
328 }
329
330 fn finish_into_canonical(&mut self) -> Canonical {
331 Canonical::List(self.finish_into_listview())
332 }
333}
334
335fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
338 mut uninit_range: UninitRange<'a, O>,
339 new_offsets: PrimitiveArray,
340 old_elements_len: usize,
341 new_elements_len: usize,
342) {
343 let new_offsets_slice = new_offsets.as_slice::<A>();
344 let old_elements_len = O::from_usize(old_elements_len)
345 .vortex_expect("the old elements length did not fit into the offset type (impossible)");
346 let new_elements_len = O::from_usize(new_elements_len)
347 .vortex_expect("the current elements length did not fit into the offset type (impossible)");
348
349 for i in 0..uninit_range.len() {
350 let new_offset = O::from_usize(
351 new_offsets_slice[i]
352 .to_usize()
353 .vortex_expect("Offsets must always fit in usize"),
354 )
355 .vortex_expect("New offset somehow did not fit into the builder's offset type");
356
357 let adjusted_new_offset = new_offset + old_elements_len;
360 assert!(
361 adjusted_new_offset <= new_elements_len,
362 "[{i}/{}]: {new_offset} + {old_elements_len} \
363 = {adjusted_new_offset} <= {new_elements_len} failed",
364 uninit_range.len()
365 );
366
367 uninit_range.set_value(i, adjusted_new_offset);
368 }
369
370 unsafe { uninit_range.finish() };
373}
374
375#[cfg(test)]
376mod tests {
377 use std::sync::Arc;
378
379 use vortex_dtype::DType;
380 use vortex_dtype::Nullability::{NonNullable, Nullable};
381 use vortex_dtype::PType::I32;
382 use vortex_scalar::Scalar;
383
384 use super::ListViewBuilder;
385 use crate::IntoArray;
386 use crate::array::Array;
387 use crate::arrays::ListArray;
388 use crate::builders::ArrayBuilder;
389 use crate::vtable::ValidityHelper;
390
391 #[test]
392 fn test_empty() {
393 let mut builder =
394 ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
395
396 let listview = builder.finish();
397 assert_eq!(listview.len(), 0);
398 }
399
400 #[test]
401 fn test_basic_append_and_nulls() {
402 let dtype: Arc<DType> = Arc::new(I32.into());
403 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
404
405 builder
407 .append_value(
408 Scalar::list(
409 dtype.clone(),
410 vec![1i32.into(), 2i32.into(), 3i32.into()],
411 NonNullable,
412 )
413 .as_list(),
414 )
415 .unwrap();
416
417 builder
419 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
420 .unwrap();
421
422 builder.append_null();
424
425 builder
427 .append_value(
428 Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
429 )
430 .unwrap();
431
432 let listview = builder.finish_into_listview();
433 assert_eq!(listview.len(), 4);
434
435 let first_list = listview.list_elements_at(0);
437 assert_eq!(first_list.len(), 3);
438 assert_eq!(first_list.scalar_at(0), 1i32.into());
439 assert_eq!(first_list.scalar_at(1), 2i32.into());
440 assert_eq!(first_list.scalar_at(2), 3i32.into());
441
442 let empty_list = listview.list_elements_at(1);
444 assert_eq!(empty_list.len(), 0);
445
446 assert!(!listview.validity().is_valid(2));
448
449 let last_list = listview.list_elements_at(3);
451 assert_eq!(last_list.len(), 2);
452 assert_eq!(last_list.scalar_at(0), 4i32.into());
453 assert_eq!(last_list.scalar_at(1), 5i32.into());
454 }
455
456 #[test]
457 fn test_different_offset_size_types() {
458 let dtype: Arc<DType> = Arc::new(I32.into());
460 let mut builder =
461 ListViewBuilder::<u32, u8>::with_capacity(dtype.clone(), NonNullable, 0, 0);
462
463 builder
464 .append_value(
465 Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], NonNullable).as_list(),
466 )
467 .unwrap();
468
469 builder
470 .append_value(
471 Scalar::list(
472 dtype,
473 vec![3i32.into(), 4i32.into(), 5i32.into()],
474 NonNullable,
475 )
476 .as_list(),
477 )
478 .unwrap();
479
480 let listview = builder.finish_into_listview();
481 assert_eq!(listview.len(), 2);
482
483 let first = listview.list_elements_at(0);
485 assert_eq!(first.scalar_at(0), 1i32.into());
486 assert_eq!(first.scalar_at(1), 2i32.into());
487
488 let second = listview.list_elements_at(1);
490 assert_eq!(second.scalar_at(0), 3i32.into());
491 assert_eq!(second.scalar_at(1), 4i32.into());
492 assert_eq!(second.scalar_at(2), 5i32.into());
493
494 let dtype2: Arc<DType> = Arc::new(I32.into());
496 let mut builder2 =
497 ListViewBuilder::<u64, u16>::with_capacity(dtype2.clone(), NonNullable, 0, 0);
498
499 for i in 0..5 {
500 builder2
501 .append_value(
502 Scalar::list(dtype2.clone(), vec![(i * 10).into()], NonNullable).as_list(),
503 )
504 .unwrap();
505 }
506
507 let listview2 = builder2.finish_into_listview();
508 assert_eq!(listview2.len(), 5);
509
510 for i in 0..5i32 {
512 let list = listview2.list_elements_at(i as usize);
513 assert_eq!(list.len(), 1);
514 assert_eq!(list.scalar_at(0), (i * 10).into());
515 }
516 }
517
518 #[test]
519 fn test_builder_trait_methods() {
520 let dtype: Arc<DType> = Arc::new(I32.into());
521 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
522
523 builder.append_zeros(2);
525 assert_eq!(builder.len(), 2);
526
527 unsafe {
529 builder.append_nulls_unchecked(2);
530 }
531 assert_eq!(builder.len(), 4);
532
533 let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
535 builder.append_scalar(&list_scalar).unwrap();
536 assert_eq!(builder.len(), 5);
537
538 let listview = builder.finish_into_listview();
539 assert_eq!(listview.len(), 5);
540
541 assert_eq!(listview.list_elements_at(0).len(), 0);
543 assert_eq!(listview.list_elements_at(1).len(), 0);
544
545 assert!(!listview.validity().is_valid(2));
547 assert!(!listview.validity().is_valid(3));
548
549 let last_list = listview.list_elements_at(4);
551 assert_eq!(last_list.len(), 2);
552 assert_eq!(last_list.scalar_at(0), 10i32.into());
553 assert_eq!(last_list.scalar_at(1), 20i32.into());
554 }
555
556 #[test]
557 fn test_extend_from_array() {
558 let dtype: Arc<DType> = Arc::new(I32.into());
559
560 let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
562 [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
563 Arc::new(I32.into()),
564 )
565 .unwrap();
566
567 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
568
569 builder
571 .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
572 .unwrap();
573
574 builder.extend_from_array(&source.into_array());
576
577 let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
579 std::iter::empty::<Option<Vec<i32>>>(),
580 Arc::new(I32.into()),
581 )
582 .unwrap();
583 builder.extend_from_array(&empty_source.into_array());
584
585 let listview = builder.finish_into_listview();
586 assert_eq!(listview.len(), 4);
587
588 let first = listview.list_elements_at(0);
591 assert_eq!(first.len(), 1);
592 assert_eq!(first.scalar_at(0), 0i32.into());
593
594 let second = listview.list_elements_at(1);
596 assert_eq!(second.len(), 3);
597 assert_eq!(second.scalar_at(0), 1i32.into());
598 assert_eq!(second.scalar_at(1), 2i32.into());
599 assert_eq!(second.scalar_at(2), 3i32.into());
600
601 assert!(!listview.validity().is_valid(2));
603
604 let fourth = listview.list_elements_at(3);
606 assert_eq!(fourth.len(), 2);
607 assert_eq!(fourth.scalar_at(0), 4i32.into());
608 assert_eq!(fourth.scalar_at(1), 5i32.into());
609 }
610
611 #[test]
612 fn test_error_append_null_to_non_nullable() {
613 let dtype: Arc<DType> = Arc::new(I32.into());
614 let mut builder =
615 ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
616
617 let null_scalar = Scalar::null(DType::List(dtype, Nullable));
619 let null_list = null_scalar.as_list();
620
621 let result = builder.append_value(null_list);
623 assert!(result.is_err());
624 assert!(
625 result
626 .unwrap_err()
627 .to_string()
628 .contains("null value to non-nullable")
629 );
630 }
631
632 #[test]
633 #[should_panic(
634 expected = "Size type I32 (max offset 2147483647) must fit within offset type I16 (max offset 32767)"
635 )]
636 fn test_error_invalid_type_combination() {
637 let dtype: Arc<DType> = Arc::new(I32.into());
638 let _builder = ListViewBuilder::<i16, i32>::with_capacity(dtype, NonNullable, 0, 0);
640 }
641}