1use std::sync::Arc;
12
13use vortex_dtype::{DType, IntegerPType, Nullability};
14use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
15use vortex_mask::Mask;
16use vortex_scalar::{ListScalar, Scalar};
17
18use super::lazy_null_builder::LazyNullBufferBuilder;
19use crate::array::{Array, ArrayRef, IntoArray};
20use crate::arrays::ListViewArray;
21use crate::builders::{
22 ArrayBuilder, DEFAULT_BUILDER_CAPACITY, PrimitiveBuilder, builder_with_capacity,
23};
24use crate::{Canonical, ToCanonical};
25
26pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
36 dtype: DType,
38
39 elements_builder: Box<dyn ArrayBuilder>,
41
42 offsets_builder: PrimitiveBuilder<O>,
44
45 sizes_builder: PrimitiveBuilder<S>,
47
48 nulls: LazyNullBufferBuilder,
50}
51
52impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
53 pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
55 Self::with_capacity(
56 element_dtype,
57 nullability,
58 DEFAULT_BUILDER_CAPACITY * 2,
61 DEFAULT_BUILDER_CAPACITY,
62 )
63 }
64
65 pub fn with_capacity(
73 element_dtype: Arc<DType>,
74 nullability: Nullability,
75 elements_capacity: usize,
76 capacity: usize,
77 ) -> Self {
78 assert!(
81 S::max_value_as_u64() <= O::max_value_as_u64(),
82 "Size type {:?} (max offset {}) must fit within offset type {:?} (max offset {})",
83 S::PTYPE,
84 S::max_value_as_u64(),
85 O::PTYPE,
86 O::max_value_as_u64()
87 );
88
89 let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
90
91 let offsets_builder =
92 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
93 let sizes_builder =
94 PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
95
96 let nulls = LazyNullBufferBuilder::new(capacity);
97
98 Self {
99 dtype: DType::List(element_dtype, nullability),
100 elements_builder,
101 offsets_builder,
102 sizes_builder,
103 nulls,
104 }
105 }
106
107 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
112 let Some(elements) = value.elements() else {
113 vortex_ensure!(
115 self.dtype.is_nullable(),
116 "Cannot append null value to non-nullable list builder"
117 );
118 self.append_null();
119 return Ok(());
120 };
121
122 let curr_offset = self.elements_builder.len();
123 let num_elements = elements.len();
124
125 for scalar in elements {
126 self.elements_builder.append_scalar(&scalar)?;
129 }
130 self.nulls.append_non_null();
131
132 self.offsets_builder.append_value(
133 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
134 );
135 self.sizes_builder.append_value(
136 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
137 );
138
139 Ok(())
140 }
141
142 pub fn finish_into_listview(&mut self) -> ListViewArray {
144 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
145 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
146
147 let elements = self.elements_builder.finish();
148 let offsets = self.offsets_builder.finish();
149 let sizes = self.sizes_builder.finish();
150 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
151
152 ListViewArray::try_new(elements, offsets, sizes, validity)
153 .vortex_expect("Failed to create ListViewArray")
154 }
155
156 pub fn element_dtype(&self) -> &DType {
159 let DType::List(element_dtype, ..) = &self.dtype else {
160 vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
161 };
162
163 element_dtype
164 }
165}
166
167impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
168 fn as_any(&self) -> &dyn std::any::Any {
169 self
170 }
171
172 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
173 self
174 }
175
176 fn dtype(&self) -> &DType {
177 &self.dtype
178 }
179
180 fn len(&self) -> usize {
181 self.offsets_builder.len()
182 }
183
184 fn append_zeros(&mut self, n: usize) {
185 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
186 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
187
188 let curr_offset = self.elements_builder.len();
190
191 for _ in 0..n {
194 self.offsets_builder.append_value(
195 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
196 );
197 self.sizes_builder.append_value(S::zero());
198 }
199
200 self.nulls.append_n_non_nulls(n);
201 }
202
203 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
204 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
205 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
206
207 let curr_offset = self.elements_builder.len();
209
210 for _ in 0..n {
212 self.offsets_builder.append_value(
213 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
214 );
215 self.sizes_builder.append_value(S::zero());
216 }
217
218 self.nulls.append_n_nulls(n);
220 }
221
222 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
223 vortex_ensure!(
224 scalar.dtype() == self.dtype(),
225 "ListViewBuilder expected scalar with dtype {:?}, got {:?}",
226 self.dtype(),
227 scalar.dtype()
228 );
229
230 let list_scalar = scalar.as_list();
231 self.append_value(list_scalar)
232 }
233
234 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
235 let listview_array = array.to_listview();
236 if listview_array.is_empty() {
237 return;
238 }
239
240 for i in 0..listview_array.len() {
249 let list = listview_array.scalar_at(i);
250
251 self.append_scalar(&list)
252 .vortex_expect("was unable to extend the `ListViewBuilder`")
253 }
254 }
255
256 fn reserve_exact(&mut self, capacity: usize) {
257 self.elements_builder.reserve_exact(capacity * 2);
258 self.offsets_builder.reserve_exact(capacity);
259 self.sizes_builder.reserve_exact(capacity);
260 self.nulls.reserve_exact(capacity);
261 }
262
263 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
264 self.nulls = LazyNullBufferBuilder::new(validity.len());
265 self.nulls.append_validity_mask(validity);
266 }
267
268 fn finish(&mut self) -> ArrayRef {
269 self.finish_into_listview().into_array()
270 }
271
272 fn finish_into_canonical(&mut self) -> Canonical {
273 Canonical::List(self.finish_into_listview())
274 }
275}
276
277#[cfg(test)]
278mod tests {
279 use std::sync::Arc;
280
281 use vortex_dtype::DType;
282 use vortex_dtype::Nullability::{NonNullable, Nullable};
283 use vortex_dtype::PType::I32;
284 use vortex_scalar::Scalar;
285
286 use super::ListViewBuilder;
287 use crate::IntoArray;
288 use crate::array::Array;
289 use crate::arrays::ListArray;
290 use crate::builders::ArrayBuilder;
291 use crate::vtable::ValidityHelper;
292
293 #[test]
294 fn test_empty() {
295 let mut builder =
296 ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
297
298 let listview = builder.finish();
299 assert_eq!(listview.len(), 0);
300 }
301
302 #[test]
303 fn test_basic_append_and_nulls() {
304 let dtype: Arc<DType> = Arc::new(I32.into());
305 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
306
307 builder
309 .append_value(
310 Scalar::list(
311 dtype.clone(),
312 vec![1i32.into(), 2i32.into(), 3i32.into()],
313 NonNullable,
314 )
315 .as_list(),
316 )
317 .unwrap();
318
319 builder
321 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
322 .unwrap();
323
324 builder.append_null();
326
327 builder
329 .append_value(
330 Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
331 )
332 .unwrap();
333
334 let listview = builder.finish_into_listview();
335 assert_eq!(listview.len(), 4);
336
337 let first_list = listview.list_elements_at(0);
339 assert_eq!(first_list.len(), 3);
340 assert_eq!(first_list.scalar_at(0), 1i32.into());
341 assert_eq!(first_list.scalar_at(1), 2i32.into());
342 assert_eq!(first_list.scalar_at(2), 3i32.into());
343
344 let empty_list = listview.list_elements_at(1);
346 assert_eq!(empty_list.len(), 0);
347
348 assert!(!listview.validity().is_valid(2));
350
351 let last_list = listview.list_elements_at(3);
353 assert_eq!(last_list.len(), 2);
354 assert_eq!(last_list.scalar_at(0), 4i32.into());
355 assert_eq!(last_list.scalar_at(1), 5i32.into());
356 }
357
358 #[test]
359 fn test_different_offset_size_types() {
360 let dtype: Arc<DType> = Arc::new(I32.into());
362 let mut builder =
363 ListViewBuilder::<u32, u8>::with_capacity(dtype.clone(), NonNullable, 0, 0);
364
365 builder
366 .append_value(
367 Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], NonNullable).as_list(),
368 )
369 .unwrap();
370
371 builder
372 .append_value(
373 Scalar::list(
374 dtype,
375 vec![3i32.into(), 4i32.into(), 5i32.into()],
376 NonNullable,
377 )
378 .as_list(),
379 )
380 .unwrap();
381
382 let listview = builder.finish_into_listview();
383 assert_eq!(listview.len(), 2);
384
385 let first = listview.list_elements_at(0);
387 assert_eq!(first.scalar_at(0), 1i32.into());
388 assert_eq!(first.scalar_at(1), 2i32.into());
389
390 let second = listview.list_elements_at(1);
392 assert_eq!(second.scalar_at(0), 3i32.into());
393 assert_eq!(second.scalar_at(1), 4i32.into());
394 assert_eq!(second.scalar_at(2), 5i32.into());
395
396 let dtype2: Arc<DType> = Arc::new(I32.into());
398 let mut builder2 =
399 ListViewBuilder::<u64, u16>::with_capacity(dtype2.clone(), NonNullable, 0, 0);
400
401 for i in 0..5 {
402 builder2
403 .append_value(
404 Scalar::list(dtype2.clone(), vec![(i * 10).into()], NonNullable).as_list(),
405 )
406 .unwrap();
407 }
408
409 let listview2 = builder2.finish_into_listview();
410 assert_eq!(listview2.len(), 5);
411
412 for i in 0..5i32 {
414 let list = listview2.list_elements_at(i as usize);
415 assert_eq!(list.len(), 1);
416 assert_eq!(list.scalar_at(0), (i * 10).into());
417 }
418 }
419
420 #[test]
421 fn test_builder_trait_methods() {
422 let dtype: Arc<DType> = Arc::new(I32.into());
423 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
424
425 builder.append_zeros(2);
427 assert_eq!(builder.len(), 2);
428
429 unsafe {
431 builder.append_nulls_unchecked(2);
432 }
433 assert_eq!(builder.len(), 4);
434
435 let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
437 builder.append_scalar(&list_scalar).unwrap();
438 assert_eq!(builder.len(), 5);
439
440 let listview = builder.finish_into_listview();
441 assert_eq!(listview.len(), 5);
442
443 assert_eq!(listview.list_elements_at(0).len(), 0);
445 assert_eq!(listview.list_elements_at(1).len(), 0);
446
447 assert!(!listview.validity().is_valid(2));
449 assert!(!listview.validity().is_valid(3));
450
451 let last_list = listview.list_elements_at(4);
453 assert_eq!(last_list.len(), 2);
454 assert_eq!(last_list.scalar_at(0), 10i32.into());
455 assert_eq!(last_list.scalar_at(1), 20i32.into());
456 }
457
458 #[test]
459 fn test_extend_from_array() {
460 let dtype: Arc<DType> = Arc::new(I32.into());
461
462 let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
464 [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
465 Arc::new(I32.into()),
466 )
467 .unwrap();
468
469 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
470
471 builder
473 .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
474 .unwrap();
475
476 unsafe {
478 builder.extend_from_array_unchecked(&source.into_array());
479 }
480
481 let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
483 std::iter::empty::<Option<Vec<i32>>>(),
484 Arc::new(I32.into()),
485 )
486 .unwrap();
487 unsafe {
488 builder.extend_from_array_unchecked(&empty_source.into_array());
489 }
490
491 let listview = builder.finish_into_listview();
492 assert_eq!(listview.len(), 4);
493
494 let first = listview.list_elements_at(0);
497 assert_eq!(first.len(), 1);
498 assert_eq!(first.scalar_at(0), 0i32.into());
499
500 let second = listview.list_elements_at(1);
502 assert_eq!(second.len(), 3);
503 assert_eq!(second.scalar_at(0), 1i32.into());
504 assert_eq!(second.scalar_at(1), 2i32.into());
505 assert_eq!(second.scalar_at(2), 3i32.into());
506
507 assert!(!listview.validity().is_valid(2));
509
510 let fourth = listview.list_elements_at(3);
512 assert_eq!(fourth.len(), 2);
513 assert_eq!(fourth.scalar_at(0), 4i32.into());
514 assert_eq!(fourth.scalar_at(1), 5i32.into());
515 }
516
517 #[test]
518 fn test_error_append_null_to_non_nullable() {
519 let dtype: Arc<DType> = Arc::new(I32.into());
520 let mut builder =
521 ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
522
523 let null_scalar = Scalar::null(DType::List(dtype, Nullable));
525 let null_list = null_scalar.as_list();
526
527 let result = builder.append_value(null_list);
529 assert!(result.is_err());
530 assert!(
531 result
532 .unwrap_err()
533 .to_string()
534 .contains("null value to non-nullable")
535 );
536 }
537
538 #[test]
539 #[should_panic(
540 expected = "Size type I32 (max offset 2147483647) must fit within offset type I16 (max offset 32767)"
541 )]
542 fn test_error_invalid_type_combination() {
543 let dtype: Arc<DType> = Arc::new(I32.into());
544 let _builder = ListViewBuilder::<i16, i32>::with_capacity(dtype, NonNullable, 0, 0);
546 }
547}