1use crate::array::{get_offsets, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
22 ArrowPrimitiveType, FixedSizeListArray,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
41 const IS_LARGE: bool;
43 const PREFIX: &'static str;
45 const MAX_OFFSET: usize;
47}
48
49impl OffsetSizeTrait for i32 {
50 const IS_LARGE: bool = false;
51 const PREFIX: &'static str = "";
52 const MAX_OFFSET: usize = i32::MAX as usize;
53}
54
55impl OffsetSizeTrait for i64 {
56 const IS_LARGE: bool = true;
57 const PREFIX: &'static str = "Large";
58 const MAX_OFFSET: usize = i64::MAX as usize;
59}
60
61pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
170 data_type: DataType,
171 nulls: Option<NullBuffer>,
172 values: ArrayRef,
173 value_offsets: OffsetBuffer<OffsetSize>,
174}
175
176impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
177 fn clone(&self) -> Self {
178 Self {
179 data_type: self.data_type.clone(),
180 nulls: self.nulls.clone(),
181 values: self.values.clone(),
182 value_offsets: self.value_offsets.clone(),
183 }
184 }
185}
186
187impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
188 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
192 DataType::LargeList
193 } else {
194 DataType::List
195 };
196
197 pub fn try_new(
208 field: FieldRef,
209 offsets: OffsetBuffer<OffsetSize>,
210 values: ArrayRef,
211 nulls: Option<NullBuffer>,
212 ) -> Result<Self, ArrowError> {
213 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
215 if end_offset > values.len() {
218 return Err(ArrowError::InvalidArgumentError(format!(
219 "Max offset of {end_offset} exceeds length of values {}",
220 values.len()
221 )));
222 }
223
224 if let Some(n) = nulls.as_ref() {
225 if n.len() != len {
226 return Err(ArrowError::InvalidArgumentError(format!(
227 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
228 OffsetSize::PREFIX,
229 n.len(),
230 )));
231 }
232 }
233 if !field.is_nullable() && values.is_nullable() {
234 return Err(ArrowError::InvalidArgumentError(format!(
235 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
236 OffsetSize::PREFIX,
237 field.name()
238 )));
239 }
240
241 if field.data_type() != values.data_type() {
242 return Err(ArrowError::InvalidArgumentError(format!(
243 "{}ListArray expected data type {} got {} for {:?}",
244 OffsetSize::PREFIX,
245 field.data_type(),
246 values.data_type(),
247 field.name()
248 )));
249 }
250
251 Ok(Self {
252 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
253 nulls,
254 values,
255 value_offsets: offsets,
256 })
257 }
258
259 pub fn new(
265 field: FieldRef,
266 offsets: OffsetBuffer<OffsetSize>,
267 values: ArrayRef,
268 nulls: Option<NullBuffer>,
269 ) -> Self {
270 Self::try_new(field, offsets, values, nulls).unwrap()
271 }
272
273 pub fn new_null(field: FieldRef, len: usize) -> Self {
275 let values = new_empty_array(field.data_type());
276 Self {
277 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
278 nulls: Some(NullBuffer::new_null(len)),
279 value_offsets: OffsetBuffer::new_zeroed(len),
280 values,
281 }
282 }
283
284 pub fn into_parts(
286 self,
287 ) -> (
288 FieldRef,
289 OffsetBuffer<OffsetSize>,
290 ArrayRef,
291 Option<NullBuffer>,
292 ) {
293 let f = match self.data_type {
294 DataType::List(f) | DataType::LargeList(f) => f,
295 _ => unreachable!(),
296 };
297 (f, self.value_offsets, self.values, self.nulls)
298 }
299
300 #[inline]
309 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
310 &self.value_offsets
311 }
312
313 #[inline]
320 pub fn values(&self) -> &ArrayRef {
321 &self.values
322 }
323
324 pub fn value_type(&self) -> DataType {
326 self.values.data_type().clone()
327 }
328
329 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
337 let end = self.value_offsets().get_unchecked(i + 1).as_usize();
338 let start = self.value_offsets().get_unchecked(i).as_usize();
339 self.values.slice(start, end - start)
340 }
341
342 pub fn value(&self, i: usize) -> ArrayRef {
350 let end = self.value_offsets()[i + 1].as_usize();
351 let start = self.value_offsets()[i].as_usize();
352 self.values.slice(start, end - start)
353 }
354
355 #[inline]
359 pub fn value_offsets(&self) -> &[OffsetSize] {
360 &self.value_offsets
361 }
362
363 #[inline]
365 pub fn value_length(&self, i: usize) -> OffsetSize {
366 let offsets = self.value_offsets();
367 offsets[i + 1] - offsets[i]
368 }
369
370 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
372 GenericListArrayIter::<'a, OffsetSize>::new(self)
373 }
374
375 #[inline]
376 fn get_type(data_type: &DataType) -> Option<&DataType> {
377 match (OffsetSize::IS_LARGE, data_type) {
378 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
379 Some(child.data_type())
380 }
381 _ => None,
382 }
383 }
384
385 pub fn slice(&self, offset: usize, length: usize) -> Self {
391 Self {
392 data_type: self.data_type.clone(),
393 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
394 values: self.values.clone(),
395 value_offsets: self.value_offsets.slice(offset, length),
396 }
397 }
398
399 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
415 where
416 T: ArrowPrimitiveType,
417 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
418 I: IntoIterator<Item = Option<P>>,
419 {
420 let iter = iter.into_iter();
421 let size_hint = iter.size_hint().0;
422 let mut builder =
423 GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
424
425 for i in iter {
426 match i {
427 Some(p) => {
428 for t in p {
429 builder.values().append_option(t);
430 }
431 builder.append(true);
432 }
433 None => builder.append(false),
434 }
435 }
436 builder.finish()
437 }
438}
439
440impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
441 fn from(data: ArrayData) -> Self {
442 Self::try_new_from_array_data(data)
443 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
444 }
445}
446
447impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
448 fn from(array: GenericListArray<OffsetSize>) -> Self {
449 let len = array.len();
450 let builder = ArrayDataBuilder::new(array.data_type)
451 .len(len)
452 .nulls(array.nulls)
453 .buffers(vec![array.value_offsets.into_inner().into_inner()])
454 .child_data(vec![array.values.to_data()]);
455
456 unsafe { builder.build_unchecked() }
457 }
458}
459
460impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
461 fn from(value: FixedSizeListArray) -> Self {
462 let (field, size) = match value.data_type() {
463 DataType::FixedSizeList(f, size) => (f, *size as usize),
464 _ => unreachable!(),
465 };
466
467 let offsets = OffsetBuffer::from_lengths(std::iter::repeat_n(size, value.len()));
468
469 Self {
470 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
471 nulls: value.nulls().cloned(),
472 values: value.values().clone(),
473 value_offsets: offsets,
474 }
475 }
476}
477
478impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
479 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
480 if data.buffers().len() != 1 {
481 return Err(ArrowError::InvalidArgumentError(format!(
482 "ListArray data should contain a single buffer only (value offsets), had {}",
483 data.buffers().len()
484 )));
485 }
486
487 if data.child_data().len() != 1 {
488 return Err(ArrowError::InvalidArgumentError(format!(
489 "ListArray should contain a single child array (values array), had {}",
490 data.child_data().len()
491 )));
492 }
493
494 let values = data.child_data()[0].clone();
495
496 if let Some(child_data_type) = Self::get_type(data.data_type()) {
497 if values.data_type() != child_data_type {
498 return Err(ArrowError::InvalidArgumentError(format!(
499 "[Large]ListArray's child datatype {:?} does not \
500 correspond to the List's datatype {:?}",
501 values.data_type(),
502 child_data_type
503 )));
504 }
505 } else {
506 return Err(ArrowError::InvalidArgumentError(format!(
507 "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
508 data.data_type()
509 )));
510 }
511
512 let values = make_array(values);
513 let value_offsets = unsafe { get_offsets(&data) };
516
517 Ok(Self {
518 data_type: data.data_type().clone(),
519 nulls: data.nulls().cloned(),
520 values,
521 value_offsets,
522 })
523 }
524}
525
526impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
527 fn as_any(&self) -> &dyn Any {
528 self
529 }
530
531 fn to_data(&self) -> ArrayData {
532 self.clone().into()
533 }
534
535 fn into_data(self) -> ArrayData {
536 self.into()
537 }
538
539 fn data_type(&self) -> &DataType {
540 &self.data_type
541 }
542
543 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
544 Arc::new(self.slice(offset, length))
545 }
546
547 fn len(&self) -> usize {
548 self.value_offsets.len() - 1
549 }
550
551 fn is_empty(&self) -> bool {
552 self.value_offsets.len() <= 1
553 }
554
555 fn shrink_to_fit(&mut self) {
556 if let Some(nulls) = &mut self.nulls {
557 nulls.shrink_to_fit();
558 }
559 self.values.shrink_to_fit();
560 self.value_offsets.shrink_to_fit();
561 }
562
563 fn offset(&self) -> usize {
564 0
565 }
566
567 fn nulls(&self) -> Option<&NullBuffer> {
568 self.nulls.as_ref()
569 }
570
571 fn logical_null_count(&self) -> usize {
572 self.null_count()
574 }
575
576 fn get_buffer_memory_size(&self) -> usize {
577 let mut size = self.values.get_buffer_memory_size();
578 size += self.value_offsets.inner().inner().capacity();
579 if let Some(n) = self.nulls.as_ref() {
580 size += n.buffer().capacity();
581 }
582 size
583 }
584
585 fn get_array_memory_size(&self) -> usize {
586 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
587 size += self.value_offsets.inner().inner().capacity();
588 if let Some(n) = self.nulls.as_ref() {
589 size += n.buffer().capacity();
590 }
591 size
592 }
593}
594
595impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
596 type Item = ArrayRef;
597
598 fn value(&self, index: usize) -> Self::Item {
599 GenericListArray::value(self, index)
600 }
601
602 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
603 GenericListArray::value(self, index)
604 }
605}
606
607impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
608 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
609 let prefix = OffsetSize::PREFIX;
610
611 write!(f, "{prefix}ListArray\n[\n")?;
612 print_long_array(self, f, |array, index, f| {
613 std::fmt::Debug::fmt(&array.value(index), f)
614 })?;
615 write!(f, "]")
616 }
617}
618
619pub type ListArray = GenericListArray<i32>;
623
624pub type LargeListArray = GenericListArray<i64>;
628
629#[cfg(test)]
630mod tests {
631 use super::*;
632 use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
633 use crate::cast::AsArray;
634 use crate::types::Int32Type;
635 use crate::{Int32Array, Int64Array};
636 use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
637 use arrow_schema::Field;
638
639 fn create_from_buffers() -> ListArray {
640 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
642 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
643 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
644 ListArray::new(field, offsets, Arc::new(values), None)
645 }
646
647 #[test]
648 fn test_from_iter_primitive() {
649 let data = vec![
650 Some(vec![Some(0), Some(1), Some(2)]),
651 Some(vec![Some(3), Some(4), Some(5)]),
652 Some(vec![Some(6), Some(7)]),
653 ];
654 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
655
656 let another = create_from_buffers();
657 assert_eq!(list_array, another)
658 }
659
660 #[test]
661 fn test_empty_list_array() {
662 let value_data = ArrayData::builder(DataType::Int32)
664 .len(0)
665 .add_buffer(Buffer::from([]))
666 .build()
667 .unwrap();
668
669 let value_offsets = Buffer::from([]);
671
672 let list_data_type =
674 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
675 let list_data = ArrayData::builder(list_data_type)
676 .len(0)
677 .add_buffer(value_offsets)
678 .add_child_data(value_data)
679 .build()
680 .unwrap();
681
682 let list_array = ListArray::from(list_data);
683 assert_eq!(list_array.len(), 0)
684 }
685
686 #[test]
687 fn test_list_array() {
688 let value_data = ArrayData::builder(DataType::Int32)
690 .len(8)
691 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
692 .build()
693 .unwrap();
694
695 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
698
699 let list_data_type =
701 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
702 let list_data = ArrayData::builder(list_data_type.clone())
703 .len(3)
704 .add_buffer(value_offsets.clone())
705 .add_child_data(value_data.clone())
706 .build()
707 .unwrap();
708 let list_array = ListArray::from(list_data);
709
710 let values = list_array.values();
711 assert_eq!(value_data, values.to_data());
712 assert_eq!(DataType::Int32, list_array.value_type());
713 assert_eq!(3, list_array.len());
714 assert_eq!(0, list_array.null_count());
715 assert_eq!(6, list_array.value_offsets()[2]);
716 assert_eq!(2, list_array.value_length(2));
717 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
718 assert_eq!(
719 0,
720 unsafe { list_array.value_unchecked(0) }
721 .as_primitive::<Int32Type>()
722 .value(0)
723 );
724 for i in 0..3 {
725 assert!(list_array.is_valid(i));
726 assert!(!list_array.is_null(i));
727 }
728
729 let list_data = ArrayData::builder(list_data_type)
732 .len(2)
733 .offset(1)
734 .add_buffer(value_offsets)
735 .add_child_data(value_data.clone())
736 .build()
737 .unwrap();
738 let list_array = ListArray::from(list_data);
739
740 let values = list_array.values();
741 assert_eq!(value_data, values.to_data());
742 assert_eq!(DataType::Int32, list_array.value_type());
743 assert_eq!(2, list_array.len());
744 assert_eq!(0, list_array.null_count());
745 assert_eq!(6, list_array.value_offsets()[1]);
746 assert_eq!(2, list_array.value_length(1));
747 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
748 assert_eq!(
749 3,
750 unsafe { list_array.value_unchecked(0) }
751 .as_primitive::<Int32Type>()
752 .value(0)
753 );
754 }
755
756 #[test]
757 fn test_large_list_array() {
758 let value_data = ArrayData::builder(DataType::Int32)
760 .len(8)
761 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
762 .build()
763 .unwrap();
764
765 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
768
769 let list_data_type = DataType::new_large_list(DataType::Int32, false);
771 let list_data = ArrayData::builder(list_data_type.clone())
772 .len(3)
773 .add_buffer(value_offsets.clone())
774 .add_child_data(value_data.clone())
775 .build()
776 .unwrap();
777 let list_array = LargeListArray::from(list_data);
778
779 let values = list_array.values();
780 assert_eq!(value_data, values.to_data());
781 assert_eq!(DataType::Int32, list_array.value_type());
782 assert_eq!(3, list_array.len());
783 assert_eq!(0, list_array.null_count());
784 assert_eq!(6, list_array.value_offsets()[2]);
785 assert_eq!(2, list_array.value_length(2));
786 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
787 assert_eq!(
788 0,
789 unsafe { list_array.value_unchecked(0) }
790 .as_primitive::<Int32Type>()
791 .value(0)
792 );
793 for i in 0..3 {
794 assert!(list_array.is_valid(i));
795 assert!(!list_array.is_null(i));
796 }
797
798 let list_data = ArrayData::builder(list_data_type)
801 .len(2)
802 .offset(1)
803 .add_buffer(value_offsets)
804 .add_child_data(value_data.clone())
805 .build()
806 .unwrap();
807 let list_array = LargeListArray::from(list_data);
808
809 let values = list_array.values();
810 assert_eq!(value_data, values.to_data());
811 assert_eq!(DataType::Int32, list_array.value_type());
812 assert_eq!(2, list_array.len());
813 assert_eq!(0, list_array.null_count());
814 assert_eq!(6, list_array.value_offsets()[1]);
815 assert_eq!(2, list_array.value_length(1));
816 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
817 assert_eq!(
818 3,
819 unsafe { list_array.value_unchecked(0) }
820 .as_primitive::<Int32Type>()
821 .value(0)
822 );
823 }
824
825 #[test]
826 fn test_list_array_slice() {
827 let value_data = ArrayData::builder(DataType::Int32)
829 .len(10)
830 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
831 .build()
832 .unwrap();
833
834 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
837 let mut null_bits: [u8; 2] = [0; 2];
839 bit_util::set_bit(&mut null_bits, 0);
840 bit_util::set_bit(&mut null_bits, 3);
841 bit_util::set_bit(&mut null_bits, 4);
842 bit_util::set_bit(&mut null_bits, 6);
843 bit_util::set_bit(&mut null_bits, 8);
844
845 let list_data_type =
847 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
848 let list_data = ArrayData::builder(list_data_type)
849 .len(9)
850 .add_buffer(value_offsets)
851 .add_child_data(value_data.clone())
852 .null_bit_buffer(Some(Buffer::from(null_bits)))
853 .build()
854 .unwrap();
855 let list_array = ListArray::from(list_data);
856
857 let values = list_array.values();
858 assert_eq!(value_data, values.to_data());
859 assert_eq!(DataType::Int32, list_array.value_type());
860 assert_eq!(9, list_array.len());
861 assert_eq!(4, list_array.null_count());
862 assert_eq!(2, list_array.value_offsets()[3]);
863 assert_eq!(2, list_array.value_length(3));
864
865 let sliced_array = list_array.slice(1, 6);
866 assert_eq!(6, sliced_array.len());
867 assert_eq!(3, sliced_array.null_count());
868
869 for i in 0..sliced_array.len() {
870 if bit_util::get_bit(&null_bits, 1 + i) {
871 assert!(sliced_array.is_valid(i));
872 } else {
873 assert!(sliced_array.is_null(i));
874 }
875 }
876
877 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
879 assert_eq!(2, sliced_list_array.value_offsets()[2]);
880 assert_eq!(2, sliced_list_array.value_length(2));
881 assert_eq!(4, sliced_list_array.value_offsets()[3]);
882 assert_eq!(2, sliced_list_array.value_length(3));
883 assert_eq!(6, sliced_list_array.value_offsets()[5]);
884 assert_eq!(3, sliced_list_array.value_length(5));
885 }
886
887 #[test]
888 fn test_large_list_array_slice() {
889 let value_data = ArrayData::builder(DataType::Int32)
891 .len(10)
892 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
893 .build()
894 .unwrap();
895
896 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
899 let mut null_bits: [u8; 2] = [0; 2];
901 bit_util::set_bit(&mut null_bits, 0);
902 bit_util::set_bit(&mut null_bits, 3);
903 bit_util::set_bit(&mut null_bits, 4);
904 bit_util::set_bit(&mut null_bits, 6);
905 bit_util::set_bit(&mut null_bits, 8);
906
907 let list_data_type = DataType::new_large_list(DataType::Int32, false);
909 let list_data = ArrayData::builder(list_data_type)
910 .len(9)
911 .add_buffer(value_offsets)
912 .add_child_data(value_data.clone())
913 .null_bit_buffer(Some(Buffer::from(null_bits)))
914 .build()
915 .unwrap();
916 let list_array = LargeListArray::from(list_data);
917
918 let values = list_array.values();
919 assert_eq!(value_data, values.to_data());
920 assert_eq!(DataType::Int32, list_array.value_type());
921 assert_eq!(9, list_array.len());
922 assert_eq!(4, list_array.null_count());
923 assert_eq!(2, list_array.value_offsets()[3]);
924 assert_eq!(2, list_array.value_length(3));
925
926 let sliced_array = list_array.slice(1, 6);
927 assert_eq!(6, sliced_array.len());
928 assert_eq!(3, sliced_array.null_count());
929
930 for i in 0..sliced_array.len() {
931 if bit_util::get_bit(&null_bits, 1 + i) {
932 assert!(sliced_array.is_valid(i));
933 } else {
934 assert!(sliced_array.is_null(i));
935 }
936 }
937
938 let sliced_list_array = sliced_array
940 .as_any()
941 .downcast_ref::<LargeListArray>()
942 .unwrap();
943 assert_eq!(2, sliced_list_array.value_offsets()[2]);
944 assert_eq!(2, sliced_list_array.value_length(2));
945 assert_eq!(4, sliced_list_array.value_offsets()[3]);
946 assert_eq!(2, sliced_list_array.value_length(3));
947 assert_eq!(6, sliced_list_array.value_offsets()[5]);
948 assert_eq!(3, sliced_list_array.value_length(5));
949 }
950
951 #[test]
952 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
953 fn test_list_array_index_out_of_bound() {
954 let value_data = ArrayData::builder(DataType::Int32)
956 .len(10)
957 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
958 .build()
959 .unwrap();
960
961 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
964 let mut null_bits: [u8; 2] = [0; 2];
966 bit_util::set_bit(&mut null_bits, 0);
967 bit_util::set_bit(&mut null_bits, 3);
968 bit_util::set_bit(&mut null_bits, 4);
969 bit_util::set_bit(&mut null_bits, 6);
970 bit_util::set_bit(&mut null_bits, 8);
971
972 let list_data_type = DataType::new_large_list(DataType::Int32, false);
974 let list_data = ArrayData::builder(list_data_type)
975 .len(9)
976 .add_buffer(value_offsets)
977 .add_child_data(value_data)
978 .null_bit_buffer(Some(Buffer::from(null_bits)))
979 .build()
980 .unwrap();
981 let list_array = LargeListArray::from(list_data);
982 assert_eq!(9, list_array.len());
983
984 list_array.value(10);
985 }
986 #[test]
987 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
988 #[cfg(not(feature = "force_validate"))]
991 fn test_list_array_invalid_buffer_len() {
992 let value_data = unsafe {
993 ArrayData::builder(DataType::Int32)
994 .len(8)
995 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
996 .build_unchecked()
997 };
998 let list_data_type =
999 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1000 let list_data = unsafe {
1001 ArrayData::builder(list_data_type)
1002 .len(3)
1003 .add_child_data(value_data)
1004 .build_unchecked()
1005 };
1006 drop(ListArray::from(list_data));
1007 }
1008
1009 #[test]
1010 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1011 #[cfg(not(feature = "force_validate"))]
1014 fn test_list_array_invalid_child_array_len() {
1015 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1016 let list_data_type =
1017 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1018 let list_data = unsafe {
1019 ArrayData::builder(list_data_type)
1020 .len(3)
1021 .add_buffer(value_offsets)
1022 .build_unchecked()
1023 };
1024 drop(ListArray::from(list_data));
1025 }
1026
1027 #[test]
1028 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1029 fn test_from_array_data_validation() {
1030 let mut builder = ListBuilder::new(Int32Builder::new());
1031 builder.values().append_value(1);
1032 builder.append(true);
1033 let array = builder.finish();
1034 let _ = LargeListArray::from(array.into_data());
1035 }
1036
1037 #[test]
1038 fn test_list_array_offsets_need_not_start_at_zero() {
1039 let value_data = ArrayData::builder(DataType::Int32)
1040 .len(8)
1041 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1042 .build()
1043 .unwrap();
1044
1045 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1046
1047 let list_data_type =
1048 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1049 let list_data = ArrayData::builder(list_data_type)
1050 .len(3)
1051 .add_buffer(value_offsets)
1052 .add_child_data(value_data)
1053 .build()
1054 .unwrap();
1055
1056 let list_array = ListArray::from(list_data);
1057 assert_eq!(list_array.value_length(0), 0);
1058 assert_eq!(list_array.value_length(1), 3);
1059 assert_eq!(list_array.value_length(2), 2);
1060 }
1061
1062 #[test]
1063 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1064 #[cfg(not(feature = "force_validate"))]
1067 fn test_primitive_array_alignment() {
1068 let buf = Buffer::from_slice_ref([0_u64]);
1069 let buf2 = buf.slice(1);
1070 let array_data = unsafe {
1071 ArrayData::builder(DataType::Int32)
1072 .add_buffer(buf2)
1073 .build_unchecked()
1074 };
1075 drop(Int32Array::from(array_data));
1076 }
1077
1078 #[test]
1079 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1080 #[cfg(not(feature = "force_validate"))]
1083 fn test_list_array_alignment() {
1084 let buf = Buffer::from_slice_ref([0_u64]);
1085 let buf2 = buf.slice(1);
1086
1087 let values: [i32; 8] = [0; 8];
1088 let value_data = unsafe {
1089 ArrayData::builder(DataType::Int32)
1090 .add_buffer(Buffer::from_slice_ref(values))
1091 .build_unchecked()
1092 };
1093
1094 let list_data_type =
1095 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1096 let list_data = unsafe {
1097 ArrayData::builder(list_data_type)
1098 .add_buffer(buf2)
1099 .add_child_data(value_data)
1100 .build_unchecked()
1101 };
1102 drop(ListArray::from(list_data));
1103 }
1104
1105 #[test]
1106 fn list_array_equality() {
1107 fn do_comparison(
1109 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1110 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1111 should_equal: bool,
1112 ) {
1113 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1114 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1115 assert_eq!(lhs == rhs, should_equal);
1116
1117 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1118 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1119 assert_eq!(lhs == rhs, should_equal);
1120 }
1121
1122 do_comparison(
1123 vec![
1124 Some(vec![Some(0), Some(1), Some(2)]),
1125 None,
1126 Some(vec![Some(3), None, Some(5)]),
1127 Some(vec![Some(6), Some(7)]),
1128 ],
1129 vec![
1130 Some(vec![Some(0), Some(1), Some(2)]),
1131 None,
1132 Some(vec![Some(3), None, Some(5)]),
1133 Some(vec![Some(6), Some(7)]),
1134 ],
1135 true,
1136 );
1137
1138 do_comparison(
1139 vec![
1140 None,
1141 None,
1142 Some(vec![Some(3), None, Some(5)]),
1143 Some(vec![Some(6), Some(7)]),
1144 ],
1145 vec![
1146 Some(vec![Some(0), Some(1), Some(2)]),
1147 None,
1148 Some(vec![Some(3), None, Some(5)]),
1149 Some(vec![Some(6), Some(7)]),
1150 ],
1151 false,
1152 );
1153
1154 do_comparison(
1155 vec![
1156 None,
1157 None,
1158 Some(vec![Some(3), None, Some(5)]),
1159 Some(vec![Some(6), Some(7)]),
1160 ],
1161 vec![
1162 None,
1163 None,
1164 Some(vec![Some(3), None, Some(5)]),
1165 Some(vec![Some(0), Some(0)]),
1166 ],
1167 false,
1168 );
1169
1170 do_comparison(
1171 vec![None, None, Some(vec![Some(1)])],
1172 vec![None, None, Some(vec![Some(2)])],
1173 false,
1174 );
1175 }
1176
1177 #[test]
1178 fn test_empty_offsets() {
1179 let f = Arc::new(Field::new("element", DataType::Int32, true));
1180 let string = ListArray::from(
1181 ArrayData::builder(DataType::List(f.clone()))
1182 .buffers(vec![Buffer::from(&[])])
1183 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1184 .build()
1185 .unwrap(),
1186 );
1187 assert_eq!(string.value_offsets(), &[0]);
1188 let string = LargeListArray::from(
1189 ArrayData::builder(DataType::LargeList(f))
1190 .buffers(vec![Buffer::from(&[])])
1191 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1192 .build()
1193 .unwrap(),
1194 );
1195 assert_eq!(string.len(), 0);
1196 assert_eq!(string.value_offsets(), &[0]);
1197 }
1198
1199 #[test]
1200 fn test_try_new() {
1201 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1202 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1203 let values = Arc::new(values) as ArrayRef;
1204
1205 let field = Arc::new(Field::new("element", DataType::Int32, false));
1206 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1207
1208 let nulls = NullBuffer::new_null(3);
1209 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1210
1211 let nulls = NullBuffer::new_null(3);
1212 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1213 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1214 .unwrap_err();
1215
1216 assert_eq!(
1217 err.to_string(),
1218 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1219 );
1220
1221 let field = Arc::new(Field::new("element", DataType::Int64, false));
1222 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1223 .unwrap_err();
1224
1225 assert_eq!(
1226 err.to_string(),
1227 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1228 );
1229
1230 let nulls = NullBuffer::new_null(7);
1231 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1232 let values = Arc::new(values);
1233
1234 let err =
1235 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1236
1237 assert_eq!(
1238 err.to_string(),
1239 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1240 );
1241
1242 let field = Arc::new(Field::new("element", DataType::Int64, true));
1243 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1244
1245 let values = Int64Array::new(vec![0; 2].into(), None);
1246 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1247
1248 assert_eq!(
1249 err.to_string(),
1250 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1251 );
1252 }
1253
1254 #[test]
1255 fn test_from_fixed_size_list() {
1256 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1257 builder.values().append_slice(&[1, 2, 3]);
1258 builder.append(true);
1259 builder.values().append_slice(&[0, 0, 0]);
1260 builder.append(false);
1261 builder.values().append_slice(&[4, 5, 6]);
1262 builder.append(true);
1263 let list: ListArray = builder.finish().into();
1264
1265 let values: Vec<_> = list
1266 .iter()
1267 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1268 .collect();
1269 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1270 }
1271
1272 #[test]
1273 fn test_nullable_union() {
1274 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1275 let mut builder = UnionBuilder::new_dense();
1276 builder.append::<Int32Type>("a", 1).unwrap();
1277 builder.append::<Int32Type>("b", 2).unwrap();
1278 builder.append::<Int32Type>("b", 3).unwrap();
1279 builder.append::<Int32Type>("a", 4).unwrap();
1280 builder.append::<Int32Type>("a", 5).unwrap();
1281 let values = builder.build().unwrap();
1282 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1283 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1284 }
1285}