1use crate::builder::ArrayBuilder;
19use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
20use arrow_buffer::NullBufferBuilder;
21use arrow_buffer::{Buffer, OffsetBuffer};
22use arrow_schema::{Field, FieldRef};
23use std::any::Any;
24use std::sync::Arc;
25
26#[derive(Debug)]
88pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
89 offsets_builder: Vec<OffsetSize>,
90 null_buffer_builder: NullBufferBuilder,
91 values_builder: T,
92 field: Option<FieldRef>,
93}
94
95impl<O: OffsetSizeTrait, T: ArrayBuilder + Default> Default for GenericListBuilder<O, T> {
96 fn default() -> Self {
97 Self::new(T::default())
98 }
99}
100
101impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> {
102 pub fn new(values_builder: T) -> Self {
104 let capacity = values_builder.len();
105 Self::with_capacity(values_builder, capacity)
106 }
107
108 pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
111 let mut offsets_builder = Vec::with_capacity(capacity + 1);
112 offsets_builder.push(OffsetSize::zero());
113 Self {
114 offsets_builder,
115 null_buffer_builder: NullBufferBuilder::new(capacity),
116 values_builder,
117 field: None,
118 }
119 }
120
121 pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
128 Self {
129 field: Some(field.into()),
130 ..self
131 }
132 }
133}
134
135impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
136 for GenericListBuilder<OffsetSize, T>
137where
138 T: 'static,
139{
140 fn as_any(&self) -> &dyn Any {
142 self
143 }
144
145 fn as_any_mut(&mut self) -> &mut dyn Any {
147 self
148 }
149
150 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
152 self
153 }
154
155 fn len(&self) -> usize {
157 self.null_buffer_builder.len()
158 }
159
160 fn finish(&mut self) -> ArrayRef {
162 Arc::new(self.finish())
163 }
164
165 fn finish_cloned(&self) -> ArrayRef {
167 Arc::new(self.finish_cloned())
168 }
169
170 fn finish_preserve_values(&mut self) -> ArrayRef {
171 Arc::new(self.finish_preserve_values())
172 }
173}
174
175impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
176where
177 T: 'static,
178{
179 pub fn values(&mut self) -> &mut T {
184 &mut self.values_builder
185 }
186
187 pub fn values_ref(&self) -> &T {
189 &self.values_builder
190 }
191
192 #[inline]
198 pub fn append(&mut self, is_valid: bool) {
199 self.offsets_builder.push(self.next_offset());
200 self.null_buffer_builder.append(is_valid);
201 }
202
203 #[inline]
209 fn next_offset(&self) -> OffsetSize {
210 OffsetSize::from_usize(self.values_builder.len()).unwrap()
211 }
212
213 #[inline]
260 pub fn append_value<I, V>(&mut self, i: I)
261 where
262 T: Extend<Option<V>>,
263 I: IntoIterator<Item = Option<V>>,
264 {
265 self.extend(std::iter::once(Some(i)))
266 }
267
268 #[inline]
272 pub fn append_null(&mut self) {
273 self.offsets_builder.push(self.next_offset());
274 self.null_buffer_builder.append_null();
275 }
276
277 #[inline]
279 pub fn append_nulls(&mut self, n: usize) {
280 let next_offset = self.next_offset();
281 self.offsets_builder
282 .extend(std::iter::repeat_n(next_offset, n));
283 self.null_buffer_builder.append_n_nulls(n);
284 }
285
286 #[inline]
290 pub fn append_option<I, V>(&mut self, i: Option<I>)
291 where
292 T: Extend<Option<V>>,
293 I: IntoIterator<Item = Option<V>>,
294 {
295 match i {
296 Some(i) => self.append_value(i),
297 None => self.append_null(),
298 }
299 }
300
301 pub fn finish(&mut self) -> GenericListArray<OffsetSize> {
303 let values = self.values_builder.finish();
304 let nulls = self.null_buffer_builder.finish();
305
306 let offsets = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
307 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
309 self.offsets_builder.push(OffsetSize::zero());
310
311 let field = match &self.field {
312 Some(f) => f.clone(),
313 None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
314 };
315
316 GenericListArray::new(field, offsets, values, nulls)
317 }
318
319 pub fn finish_cloned(&self) -> GenericListArray<OffsetSize> {
321 let values = self.values_builder.finish_cloned();
322 let nulls = self.null_buffer_builder.finish_cloned();
323
324 let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice());
325 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
327
328 let field = match &self.field {
329 Some(f) => f.clone(),
330 None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
331 };
332
333 GenericListArray::new(field, offsets, values, nulls)
334 }
335
336 fn finish_preserve_values(&mut self) -> GenericListArray<OffsetSize> {
337 let values = self.values_builder.finish_preserve_values();
338 let nulls = self.null_buffer_builder.finish();
339
340 let offsets = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
341 let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
343 self.offsets_builder.push(OffsetSize::zero());
344
345 let field = match &self.field {
346 Some(f) => f.clone(),
347 None => Arc::new(Field::new_list_field(values.data_type().clone(), true)),
348 };
349
350 GenericListArray::new(field, offsets, values, nulls)
351 }
352
353 pub fn offsets_slice(&self) -> &[OffsetSize] {
355 self.offsets_builder.as_slice()
356 }
357
358 pub fn validity_slice(&self) -> Option<&[u8]> {
360 self.null_buffer_builder.as_slice()
361 }
362}
363
364impl<O, B, V, E> Extend<Option<V>> for GenericListBuilder<O, B>
365where
366 O: OffsetSizeTrait,
367 B: ArrayBuilder + Extend<E>,
368 V: IntoIterator<Item = E>,
369{
370 #[inline]
371 fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) {
372 for v in iter {
373 match v {
374 Some(elements) => {
375 self.values_builder.extend(elements);
376 self.append(true);
377 }
378 None => self.append(false),
379 }
380 }
381 }
382}
383
384#[cfg(test)]
385mod tests {
386 use super::*;
387 use crate::Int32Array;
388 use crate::builder::{Int32Builder, ListBuilder, make_builder, tests::PreserveValuesMock};
389 use crate::cast::AsArray;
390 use crate::types::Int32Type;
391 use arrow_schema::DataType;
392
393 fn _test_generic_list_array_builder<O: OffsetSizeTrait>() {
394 let values_builder = Int32Builder::with_capacity(10);
395 let mut builder = GenericListBuilder::<O, _>::new(values_builder);
396
397 builder.values().append_value(0);
399 builder.values().append_value(1);
400 builder.values().append_value(2);
401 builder.append(true);
402 builder.values().append_value(3);
403 builder.values().append_value(4);
404 builder.values().append_value(5);
405 builder.append(true);
406 builder.values().append_value(6);
407 builder.values().append_value(7);
408 builder.append(true);
409 let list_array = builder.finish();
410
411 let list_values = list_array.values().as_primitive::<Int32Type>();
412 assert_eq!(list_values.values(), &[0, 1, 2, 3, 4, 5, 6, 7]);
413 assert_eq!(list_array.value_offsets(), [0, 3, 6, 8].map(O::usize_as));
414 assert_eq!(DataType::Int32, list_array.value_type());
415 assert_eq!(3, list_array.len());
416 assert_eq!(0, list_array.null_count());
417 assert_eq!(O::from_usize(6).unwrap(), list_array.value_offsets()[2]);
418 assert_eq!(O::from_usize(2).unwrap(), list_array.value_length(2));
419 for i in 0..3 {
420 assert!(list_array.is_valid(i));
421 assert!(!list_array.is_null(i));
422 }
423 }
424
425 #[test]
426 fn test_list_array_builder() {
427 _test_generic_list_array_builder::<i32>()
428 }
429
430 #[test]
431 fn test_large_list_array_builder() {
432 _test_generic_list_array_builder::<i64>()
433 }
434
435 fn _test_generic_list_array_builder_nulls<O: OffsetSizeTrait>() {
436 let values_builder = Int32Builder::with_capacity(10);
437 let mut builder = GenericListBuilder::<O, _>::new(values_builder);
438
439 builder.values().append_value(0);
441 builder.values().append_value(1);
442 builder.values().append_value(2);
443 builder.append(true);
444 builder.append(false);
445 builder.values().append_value(3);
446 builder.values().append_null();
447 builder.values().append_value(5);
448 builder.append(true);
449 builder.values().append_value(6);
450 builder.values().append_value(7);
451 builder.append(true);
452 builder.append_nulls(2);
453 builder.values().append_value(8);
454 builder.append(true);
455
456 let list_array = builder.finish();
457
458 assert_eq!(DataType::Int32, list_array.value_type());
459 assert_eq!(7, list_array.len());
460 assert_eq!(3, list_array.null_count());
461 assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]);
462 assert_eq!(O::from_usize(9).unwrap(), list_array.value_offsets()[7]);
463 assert_eq!(O::from_usize(3).unwrap(), list_array.value_length(2));
464 assert!(list_array.is_null(4));
465 assert!(list_array.is_null(5));
466 }
467
468 #[test]
469 fn test_list_array_builder_nulls() {
470 _test_generic_list_array_builder_nulls::<i32>()
471 }
472
473 #[test]
474 fn test_large_list_array_builder_nulls() {
475 _test_generic_list_array_builder_nulls::<i64>()
476 }
477
478 #[test]
479 fn test_list_array_builder_finish() {
480 let values_builder = Int32Array::builder(5);
481 let mut builder = ListBuilder::new(values_builder);
482
483 builder.values().append_slice(&[1, 2, 3]);
484 builder.append(true);
485 builder.values().append_slice(&[4, 5, 6]);
486 builder.append(true);
487
488 let mut arr = builder.finish();
489 assert_eq!(2, arr.len());
490 assert!(builder.is_empty());
491
492 builder.values().append_slice(&[7, 8, 9]);
493 builder.append(true);
494 arr = builder.finish();
495 assert_eq!(1, arr.len());
496 assert!(builder.is_empty());
497 }
498
499 #[test]
500 fn test_list_array_builder_finish_cloned() {
501 let values_builder = Int32Array::builder(5);
502 let mut builder = ListBuilder::new(values_builder);
503
504 builder.values().append_slice(&[1, 2, 3]);
505 builder.append(true);
506 builder.values().append_slice(&[4, 5, 6]);
507 builder.append(true);
508
509 let mut arr = builder.finish_cloned();
510 assert_eq!(2, arr.len());
511 assert!(!builder.is_empty());
512
513 builder.values().append_slice(&[7, 8, 9]);
514 builder.append(true);
515 arr = builder.finish();
516 assert_eq!(3, arr.len());
517 assert!(builder.is_empty());
518 }
519
520 #[test]
521 fn test_list_list_array_builder() {
522 let primitive_builder = Int32Builder::with_capacity(10);
523 let values_builder = ListBuilder::new(primitive_builder);
524 let mut builder = ListBuilder::new(values_builder);
525
526 builder.values().values().append_value(1);
528 builder.values().values().append_value(2);
529 builder.values().append(true);
530 builder.values().values().append_value(3);
531 builder.values().values().append_value(4);
532 builder.values().append(true);
533 builder.append(true);
534
535 builder.values().values().append_value(5);
536 builder.values().values().append_value(6);
537 builder.values().values().append_value(7);
538 builder.values().append(true);
539 builder.values().append(false);
540 builder.values().values().append_value(8);
541 builder.values().append(true);
542 builder.append(true);
543
544 builder.append(false);
545
546 builder.values().values().append_value(9);
547 builder.values().values().append_value(10);
548 builder.values().append(true);
549 builder.append(true);
550
551 let l1 = builder.finish();
552
553 assert_eq!(4, l1.len());
554 assert_eq!(1, l1.null_count());
555
556 assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6]);
557 let l2 = l1.values().as_list::<i32>();
558
559 assert_eq!(6, l2.len());
560 assert_eq!(1, l2.null_count());
561 assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10]);
562
563 let i1 = l2.values().as_primitive::<Int32Type>();
564 assert_eq!(10, i1.len());
565 assert_eq!(0, i1.null_count());
566 assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
567 }
568
569 #[test]
570 fn test_extend() {
571 let mut builder = ListBuilder::new(Int32Builder::new());
572 builder.extend([
573 Some(vec![Some(1), Some(2), Some(7), None]),
574 Some(vec![]),
575 Some(vec![Some(4), Some(5)]),
576 None,
577 ]);
578
579 let array = builder.finish();
580 assert_eq!(array.value_offsets(), [0, 4, 4, 6, 6]);
581 assert_eq!(array.null_count(), 1);
582 assert_eq!(array.logical_null_count(), 1);
583 assert!(array.is_null(3));
584 let elements = array.values().as_primitive::<Int32Type>();
585 assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
586 assert_eq!(elements.null_count(), 1);
587 assert_eq!(elements.logical_null_count(), 1);
588 assert!(elements.is_null(3));
589 }
590
591 #[test]
592 fn test_boxed_primitive_array_builder() {
593 let values_builder = make_builder(&DataType::Int32, 5);
594 let mut builder = ListBuilder::new(values_builder);
595
596 builder
597 .values()
598 .as_any_mut()
599 .downcast_mut::<Int32Builder>()
600 .expect("should be an Int32Builder")
601 .append_slice(&[1, 2, 3]);
602 builder.append(true);
603
604 builder
605 .values()
606 .as_any_mut()
607 .downcast_mut::<Int32Builder>()
608 .expect("should be an Int32Builder")
609 .append_slice(&[4, 5, 6]);
610 builder.append(true);
611
612 let arr = builder.finish();
613 assert_eq!(2, arr.len());
614
615 let elements = arr.values().as_primitive::<Int32Type>();
616 assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]);
617 }
618
619 #[test]
620 fn test_boxed_list_list_array_builder() {
621 let values_builder = make_builder(
623 &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
624 10,
625 );
626 test_boxed_generic_list_generic_list_array_builder::<i32>(values_builder);
627 }
628
629 #[test]
630 fn test_boxed_large_list_large_list_array_builder() {
631 let values_builder = make_builder(
633 &DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))),
634 10,
635 );
636 test_boxed_generic_list_generic_list_array_builder::<i64>(values_builder);
637 }
638
639 fn test_boxed_generic_list_generic_list_array_builder<O: OffsetSizeTrait + PartialEq>(
640 values_builder: Box<dyn ArrayBuilder>,
641 ) {
642 let mut builder: GenericListBuilder<O, Box<dyn ArrayBuilder>> =
643 GenericListBuilder::<O, Box<dyn ArrayBuilder>>::new(values_builder);
644
645 builder
647 .values()
648 .as_any_mut()
649 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
650 .expect("should be an (Large)ListBuilder")
651 .values()
652 .as_any_mut()
653 .downcast_mut::<Int32Builder>()
654 .expect("should be an Int32Builder")
655 .append_value(1);
656 builder
657 .values()
658 .as_any_mut()
659 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
660 .expect("should be an (Large)ListBuilder")
661 .values()
662 .as_any_mut()
663 .downcast_mut::<Int32Builder>()
664 .expect("should be an Int32Builder")
665 .append_value(2);
666 builder
667 .values()
668 .as_any_mut()
669 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
670 .expect("should be an (Large)ListBuilder")
671 .append(true);
672 builder
673 .values()
674 .as_any_mut()
675 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
676 .expect("should be an (Large)ListBuilder")
677 .values()
678 .as_any_mut()
679 .downcast_mut::<Int32Builder>()
680 .expect("should be an Int32Builder")
681 .append_value(3);
682 builder
683 .values()
684 .as_any_mut()
685 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
686 .expect("should be an (Large)ListBuilder")
687 .values()
688 .as_any_mut()
689 .downcast_mut::<Int32Builder>()
690 .expect("should be an Int32Builder")
691 .append_value(4);
692 builder
693 .values()
694 .as_any_mut()
695 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
696 .expect("should be an (Large)ListBuilder")
697 .append(true);
698 builder.append(true);
699
700 builder
701 .values()
702 .as_any_mut()
703 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
704 .expect("should be an (Large)ListBuilder")
705 .values()
706 .as_any_mut()
707 .downcast_mut::<Int32Builder>()
708 .expect("should be an Int32Builder")
709 .append_value(5);
710 builder
711 .values()
712 .as_any_mut()
713 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
714 .expect("should be an (Large)ListBuilder")
715 .values()
716 .as_any_mut()
717 .downcast_mut::<Int32Builder>()
718 .expect("should be an Int32Builder")
719 .append_value(6);
720 builder
721 .values()
722 .as_any_mut()
723 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
724 .expect("should be an (Large)ListBuilder")
725 .values()
726 .as_any_mut()
727 .downcast_mut::<Int32Builder>()
728 .expect("should be an (Large)ListBuilder")
729 .append_value(7);
730 builder
731 .values()
732 .as_any_mut()
733 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
734 .expect("should be an (Large)ListBuilder")
735 .append(true);
736 builder
737 .values()
738 .as_any_mut()
739 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
740 .expect("should be an (Large)ListBuilder")
741 .append(false);
742 builder
743 .values()
744 .as_any_mut()
745 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
746 .expect("should be an (Large)ListBuilder")
747 .values()
748 .as_any_mut()
749 .downcast_mut::<Int32Builder>()
750 .expect("should be an Int32Builder")
751 .append_value(8);
752 builder
753 .values()
754 .as_any_mut()
755 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
756 .expect("should be an (Large)ListBuilder")
757 .append(true);
758 builder.append(true);
759
760 builder.append(false);
761
762 builder
763 .values()
764 .as_any_mut()
765 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
766 .expect("should be an (Large)ListBuilder")
767 .values()
768 .as_any_mut()
769 .downcast_mut::<Int32Builder>()
770 .expect("should be an Int32Builder")
771 .append_value(9);
772 builder
773 .values()
774 .as_any_mut()
775 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
776 .expect("should be an (Large)ListBuilder")
777 .values()
778 .as_any_mut()
779 .downcast_mut::<Int32Builder>()
780 .expect("should be an Int32Builder")
781 .append_value(10);
782 builder
783 .values()
784 .as_any_mut()
785 .downcast_mut::<GenericListBuilder<O, Box<dyn ArrayBuilder>>>()
786 .expect("should be an (Large)ListBuilder")
787 .append(true);
788 builder.append(true);
789
790 let l1 = builder.finish();
791
792 assert_eq!(4, l1.len());
793 assert_eq!(1, l1.null_count());
794
795 assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6].map(O::usize_as));
796 let l2 = l1.values().as_list::<O>();
797
798 assert_eq!(6, l2.len());
799 assert_eq!(1, l2.null_count());
800 assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10].map(O::usize_as));
801
802 let i1 = l2.values().as_primitive::<Int32Type>();
803 assert_eq!(10, i1.len());
804 assert_eq!(0, i1.null_count());
805 assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
806 }
807
808 #[test]
809 fn test_with_field() {
810 let field = Arc::new(Field::new("bar", DataType::Int32, false));
811 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
812 builder.append_value([Some(1), Some(2), Some(3)]);
813 builder.append_null(); builder.append_value([Some(4)]);
815 let array = builder.finish();
816 assert_eq!(array.len(), 3);
817 assert_eq!(array.data_type(), &DataType::List(field.clone()));
818
819 builder.append_value([Some(4), Some(5)]);
820 let array = builder.finish();
821 assert_eq!(array.data_type(), &DataType::List(field));
822 assert_eq!(array.len(), 1);
823 }
824
825 #[test]
826 #[should_panic(expected = "Non-nullable field of ListArray \\\"item\\\" cannot contain nulls")]
827 fn test_checks_nullability() {
828 let field = Arc::new(Field::new_list_field(DataType::Int32, false));
829 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
830 builder.append_value([Some(1), None]);
831 builder.finish();
832 }
833
834 #[test]
835 #[should_panic(expected = "ListArray expected data type Int64 got Int32")]
836 fn test_checks_data_type() {
837 let field = Arc::new(Field::new_list_field(DataType::Int64, false));
838 let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone());
839 builder.append_value([Some(1)]);
840 builder.finish();
841 }
842
843 #[test]
844 fn test_finish_preserve_values() {
845 let mut builder = ListBuilder::new(PreserveValuesMock::default());
846
847 builder.values().inner.append_value(1);
848 builder.append(true);
849
850 let arr = builder.finish_preserve_values();
851
852 assert_eq!(1, arr.len());
853 assert_eq!(1, builder.values().called);
854 }
855}