1use crate::array::{get_offsets, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
22 iterator::GenericListArrayIter, new_empty_array,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num_integer::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait:
41 ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd
42{
43 const IS_LARGE: bool;
45 const PREFIX: &'static str;
47 const MAX_OFFSET: usize;
49}
50
51impl OffsetSizeTrait for i32 {
52 const IS_LARGE: bool = false;
53 const PREFIX: &'static str = "";
54 const MAX_OFFSET: usize = i32::MAX as usize;
55}
56
57impl OffsetSizeTrait for i64 {
58 const IS_LARGE: bool = true;
59 const PREFIX: &'static str = "Large";
60 const MAX_OFFSET: usize = i64::MAX as usize;
61}
62
63pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
172 data_type: DataType,
173 nulls: Option<NullBuffer>,
174 values: ArrayRef,
175 value_offsets: OffsetBuffer<OffsetSize>,
176}
177
178impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
179 fn clone(&self) -> Self {
180 Self {
181 data_type: self.data_type.clone(),
182 nulls: self.nulls.clone(),
183 values: self.values.clone(),
184 value_offsets: self.value_offsets.clone(),
185 }
186 }
187}
188
189impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
190 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
194 DataType::LargeList
195 } else {
196 DataType::List
197 };
198
199 pub fn try_new(
210 field: FieldRef,
211 offsets: OffsetBuffer<OffsetSize>,
212 values: ArrayRef,
213 nulls: Option<NullBuffer>,
214 ) -> Result<Self, ArrowError> {
215 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
217 if end_offset > values.len() {
220 return Err(ArrowError::InvalidArgumentError(format!(
221 "Max offset of {end_offset} exceeds length of values {}",
222 values.len()
223 )));
224 }
225
226 if let Some(n) = nulls.as_ref() {
227 if n.len() != len {
228 return Err(ArrowError::InvalidArgumentError(format!(
229 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
230 OffsetSize::PREFIX,
231 n.len(),
232 )));
233 }
234 }
235 if !field.is_nullable() && values.is_nullable() {
236 return Err(ArrowError::InvalidArgumentError(format!(
237 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
238 OffsetSize::PREFIX,
239 field.name()
240 )));
241 }
242
243 if field.data_type() != values.data_type() {
244 return Err(ArrowError::InvalidArgumentError(format!(
245 "{}ListArray expected data type {} got {} for {:?}",
246 OffsetSize::PREFIX,
247 field.data_type(),
248 values.data_type(),
249 field.name()
250 )));
251 }
252
253 Ok(Self {
254 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
255 nulls,
256 values,
257 value_offsets: offsets,
258 })
259 }
260
261 pub fn new(
267 field: FieldRef,
268 offsets: OffsetBuffer<OffsetSize>,
269 values: ArrayRef,
270 nulls: Option<NullBuffer>,
271 ) -> Self {
272 Self::try_new(field, offsets, values, nulls).unwrap()
273 }
274
275 pub fn new_null(field: FieldRef, len: usize) -> Self {
277 let values = new_empty_array(field.data_type());
278 Self {
279 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
280 nulls: Some(NullBuffer::new_null(len)),
281 value_offsets: OffsetBuffer::new_zeroed(len),
282 values,
283 }
284 }
285
286 pub fn into_parts(
288 self,
289 ) -> (
290 FieldRef,
291 OffsetBuffer<OffsetSize>,
292 ArrayRef,
293 Option<NullBuffer>,
294 ) {
295 let f = match self.data_type {
296 DataType::List(f) | DataType::LargeList(f) => f,
297 _ => unreachable!(),
298 };
299 (f, self.value_offsets, self.values, self.nulls)
300 }
301
302 #[inline]
311 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
312 &self.value_offsets
313 }
314
315 #[inline]
322 pub fn values(&self) -> &ArrayRef {
323 &self.values
324 }
325
326 pub fn value_type(&self) -> DataType {
328 self.values.data_type().clone()
329 }
330
331 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
339 let end = unsafe { self.value_offsets().get_unchecked(i + 1).as_usize() };
340 let start = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
341 self.values.slice(start, end - start)
342 }
343
344 pub fn value(&self, i: usize) -> ArrayRef {
352 let end = self.value_offsets()[i + 1].as_usize();
353 let start = self.value_offsets()[i].as_usize();
354 self.values.slice(start, end - start)
355 }
356
357 #[inline]
361 pub fn value_offsets(&self) -> &[OffsetSize] {
362 &self.value_offsets
363 }
364
365 #[inline]
367 pub fn value_length(&self, i: usize) -> OffsetSize {
368 let offsets = self.value_offsets();
369 offsets[i + 1] - offsets[i]
370 }
371
372 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
374 GenericListArrayIter::<'a, OffsetSize>::new(self)
375 }
376
377 #[inline]
378 fn get_type(data_type: &DataType) -> Option<&DataType> {
379 match (OffsetSize::IS_LARGE, data_type) {
380 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
381 Some(child.data_type())
382 }
383 _ => None,
384 }
385 }
386
387 pub fn slice(&self, offset: usize, length: usize) -> Self {
393 Self {
394 data_type: self.data_type.clone(),
395 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
396 values: self.values.clone(),
397 value_offsets: self.value_offsets.slice(offset, length),
398 }
399 }
400
401 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
417 where
418 T: ArrowPrimitiveType,
419 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
420 I: IntoIterator<Item = Option<P>>,
421 {
422 let iter = iter.into_iter();
423 let size_hint = iter.size_hint().0;
424 let mut builder =
425 GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
426
427 for i in iter {
428 match i {
429 Some(p) => {
430 for t in p {
431 builder.values().append_option(t);
432 }
433 builder.append(true);
434 }
435 None => builder.append(false),
436 }
437 }
438 builder.finish()
439 }
440}
441
442impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
443 fn from(data: ArrayData) -> Self {
444 Self::try_new_from_array_data(data)
445 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
446 }
447}
448
449impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
450 fn from(array: GenericListArray<OffsetSize>) -> Self {
451 let len = array.len();
452 let builder = ArrayDataBuilder::new(array.data_type)
453 .len(len)
454 .nulls(array.nulls)
455 .buffers(vec![array.value_offsets.into_inner().into_inner()])
456 .child_data(vec![array.values.to_data()]);
457
458 unsafe { builder.build_unchecked() }
459 }
460}
461
462impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
463 fn from(value: FixedSizeListArray) -> Self {
464 let (field, size) = match value.data_type() {
465 DataType::FixedSizeList(f, size) => (f, *size as usize),
466 _ => unreachable!(),
467 };
468
469 let offsets = OffsetBuffer::from_repeated_length(size, value.len());
470
471 Self {
472 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
473 nulls: value.nulls().cloned(),
474 values: value.values().clone(),
475 value_offsets: offsets,
476 }
477 }
478}
479
480impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
481 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
482 if data.buffers().len() != 1 {
483 return Err(ArrowError::InvalidArgumentError(format!(
484 "ListArray data should contain a single buffer only (value offsets), had {}",
485 data.buffers().len()
486 )));
487 }
488
489 if data.child_data().len() != 1 {
490 return Err(ArrowError::InvalidArgumentError(format!(
491 "ListArray should contain a single child array (values array), had {}",
492 data.child_data().len()
493 )));
494 }
495
496 let values = data.child_data()[0].clone();
497
498 if let Some(child_data_type) = Self::get_type(data.data_type()) {
499 if values.data_type() != child_data_type {
500 return Err(ArrowError::InvalidArgumentError(format!(
501 "[Large]ListArray's child datatype {:?} does not \
502 correspond to the List's datatype {:?}",
503 values.data_type(),
504 child_data_type
505 )));
506 }
507 } else {
508 return Err(ArrowError::InvalidArgumentError(format!(
509 "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
510 data.data_type()
511 )));
512 }
513
514 let values = make_array(values);
515 let value_offsets = unsafe { get_offsets(&data) };
518
519 Ok(Self {
520 data_type: data.data_type().clone(),
521 nulls: data.nulls().cloned(),
522 values,
523 value_offsets,
524 })
525 }
526}
527
528impl<OffsetSize: OffsetSizeTrait> super::private::Sealed for GenericListArray<OffsetSize> {}
529
530impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
531 fn as_any(&self) -> &dyn Any {
532 self
533 }
534
535 fn to_data(&self) -> ArrayData {
536 self.clone().into()
537 }
538
539 fn into_data(self) -> ArrayData {
540 self.into()
541 }
542
543 fn data_type(&self) -> &DataType {
544 &self.data_type
545 }
546
547 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
548 Arc::new(self.slice(offset, length))
549 }
550
551 fn len(&self) -> usize {
552 self.value_offsets.len() - 1
553 }
554
555 fn is_empty(&self) -> bool {
556 self.value_offsets.len() <= 1
557 }
558
559 fn shrink_to_fit(&mut self) {
560 if let Some(nulls) = &mut self.nulls {
561 nulls.shrink_to_fit();
562 }
563 self.values.shrink_to_fit();
564 self.value_offsets.shrink_to_fit();
565 }
566
567 fn offset(&self) -> usize {
568 0
569 }
570
571 fn nulls(&self) -> Option<&NullBuffer> {
572 self.nulls.as_ref()
573 }
574
575 fn logical_null_count(&self) -> usize {
576 self.null_count()
578 }
579
580 fn get_buffer_memory_size(&self) -> usize {
581 let mut size = self.values.get_buffer_memory_size();
582 size += self.value_offsets.inner().inner().capacity();
583 if let Some(n) = self.nulls.as_ref() {
584 size += n.buffer().capacity();
585 }
586 size
587 }
588
589 fn get_array_memory_size(&self) -> usize {
590 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
591 size += self.value_offsets.inner().inner().capacity();
592 if let Some(n) = self.nulls.as_ref() {
593 size += n.buffer().capacity();
594 }
595 size
596 }
597}
598
599impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
600 type Item = ArrayRef;
601
602 fn value(&self, index: usize) -> Self::Item {
603 GenericListArray::value(self, index)
604 }
605
606 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
607 GenericListArray::value(self, index)
608 }
609}
610
611impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
612 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
613 let prefix = OffsetSize::PREFIX;
614
615 write!(f, "{prefix}ListArray\n[\n")?;
616 print_long_array(self, f, |array, index, f| {
617 std::fmt::Debug::fmt(&array.value(index), f)
618 })?;
619 write!(f, "]")
620 }
621}
622
623pub type ListArray = GenericListArray<i32>;
627
628pub type LargeListArray = GenericListArray<i64>;
632
633#[cfg(test)]
634mod tests {
635 use super::*;
636 use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
637 use crate::cast::AsArray;
638 use crate::types::Int32Type;
639 use crate::{Int32Array, Int64Array};
640 use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
641 use arrow_schema::Field;
642
643 fn create_from_buffers() -> ListArray {
644 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
646 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
647 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
648 ListArray::new(field, offsets, Arc::new(values), None)
649 }
650
651 #[test]
652 fn test_from_iter_primitive() {
653 let data = vec![
654 Some(vec![Some(0), Some(1), Some(2)]),
655 Some(vec![Some(3), Some(4), Some(5)]),
656 Some(vec![Some(6), Some(7)]),
657 ];
658 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
659
660 let another = create_from_buffers();
661 assert_eq!(list_array, another)
662 }
663
664 #[test]
665 fn test_empty_list_array() {
666 let value_data = ArrayData::builder(DataType::Int32)
668 .len(0)
669 .add_buffer(Buffer::from([]))
670 .build()
671 .unwrap();
672
673 let value_offsets = Buffer::from([]);
675
676 let list_data_type =
678 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
679 let list_data = ArrayData::builder(list_data_type)
680 .len(0)
681 .add_buffer(value_offsets)
682 .add_child_data(value_data)
683 .build()
684 .unwrap();
685
686 let list_array = ListArray::from(list_data);
687 assert_eq!(list_array.len(), 0)
688 }
689
690 #[test]
691 fn test_list_array() {
692 let value_data = ArrayData::builder(DataType::Int32)
694 .len(8)
695 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
696 .build()
697 .unwrap();
698
699 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
702
703 let list_data_type =
705 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
706 let list_data = ArrayData::builder(list_data_type.clone())
707 .len(3)
708 .add_buffer(value_offsets.clone())
709 .add_child_data(value_data.clone())
710 .build()
711 .unwrap();
712 let list_array = ListArray::from(list_data);
713
714 let values = list_array.values();
715 assert_eq!(value_data, values.to_data());
716 assert_eq!(DataType::Int32, list_array.value_type());
717 assert_eq!(3, list_array.len());
718 assert_eq!(0, list_array.null_count());
719 assert_eq!(6, list_array.value_offsets()[2]);
720 assert_eq!(2, list_array.value_length(2));
721 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
722 assert_eq!(
723 0,
724 unsafe { list_array.value_unchecked(0) }
725 .as_primitive::<Int32Type>()
726 .value(0)
727 );
728 for i in 0..3 {
729 assert!(list_array.is_valid(i));
730 assert!(!list_array.is_null(i));
731 }
732
733 let list_data = ArrayData::builder(list_data_type)
736 .len(2)
737 .offset(1)
738 .add_buffer(value_offsets)
739 .add_child_data(value_data.clone())
740 .build()
741 .unwrap();
742 let list_array = ListArray::from(list_data);
743
744 let values = list_array.values();
745 assert_eq!(value_data, values.to_data());
746 assert_eq!(DataType::Int32, list_array.value_type());
747 assert_eq!(2, list_array.len());
748 assert_eq!(0, list_array.null_count());
749 assert_eq!(6, list_array.value_offsets()[1]);
750 assert_eq!(2, list_array.value_length(1));
751 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
752 assert_eq!(
753 3,
754 unsafe { list_array.value_unchecked(0) }
755 .as_primitive::<Int32Type>()
756 .value(0)
757 );
758 }
759
760 #[test]
761 fn test_large_list_array() {
762 let value_data = ArrayData::builder(DataType::Int32)
764 .len(8)
765 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
766 .build()
767 .unwrap();
768
769 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
772
773 let list_data_type = DataType::new_large_list(DataType::Int32, false);
775 let list_data = ArrayData::builder(list_data_type.clone())
776 .len(3)
777 .add_buffer(value_offsets.clone())
778 .add_child_data(value_data.clone())
779 .build()
780 .unwrap();
781 let list_array = LargeListArray::from(list_data);
782
783 let values = list_array.values();
784 assert_eq!(value_data, values.to_data());
785 assert_eq!(DataType::Int32, list_array.value_type());
786 assert_eq!(3, list_array.len());
787 assert_eq!(0, list_array.null_count());
788 assert_eq!(6, list_array.value_offsets()[2]);
789 assert_eq!(2, list_array.value_length(2));
790 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
791 assert_eq!(
792 0,
793 unsafe { list_array.value_unchecked(0) }
794 .as_primitive::<Int32Type>()
795 .value(0)
796 );
797 for i in 0..3 {
798 assert!(list_array.is_valid(i));
799 assert!(!list_array.is_null(i));
800 }
801
802 let list_data = ArrayData::builder(list_data_type)
805 .len(2)
806 .offset(1)
807 .add_buffer(value_offsets)
808 .add_child_data(value_data.clone())
809 .build()
810 .unwrap();
811 let list_array = LargeListArray::from(list_data);
812
813 let values = list_array.values();
814 assert_eq!(value_data, values.to_data());
815 assert_eq!(DataType::Int32, list_array.value_type());
816 assert_eq!(2, list_array.len());
817 assert_eq!(0, list_array.null_count());
818 assert_eq!(6, list_array.value_offsets()[1]);
819 assert_eq!(2, list_array.value_length(1));
820 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
821 assert_eq!(
822 3,
823 unsafe { list_array.value_unchecked(0) }
824 .as_primitive::<Int32Type>()
825 .value(0)
826 );
827 }
828
829 #[test]
830 fn test_list_array_slice() {
831 let value_data = ArrayData::builder(DataType::Int32)
833 .len(10)
834 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
835 .build()
836 .unwrap();
837
838 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
841 let mut null_bits: [u8; 2] = [0; 2];
843 bit_util::set_bit(&mut null_bits, 0);
844 bit_util::set_bit(&mut null_bits, 3);
845 bit_util::set_bit(&mut null_bits, 4);
846 bit_util::set_bit(&mut null_bits, 6);
847 bit_util::set_bit(&mut null_bits, 8);
848
849 let list_data_type =
851 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
852 let list_data = ArrayData::builder(list_data_type)
853 .len(9)
854 .add_buffer(value_offsets)
855 .add_child_data(value_data.clone())
856 .null_bit_buffer(Some(Buffer::from(null_bits)))
857 .build()
858 .unwrap();
859 let list_array = ListArray::from(list_data);
860
861 let values = list_array.values();
862 assert_eq!(value_data, values.to_data());
863 assert_eq!(DataType::Int32, list_array.value_type());
864 assert_eq!(9, list_array.len());
865 assert_eq!(4, list_array.null_count());
866 assert_eq!(2, list_array.value_offsets()[3]);
867 assert_eq!(2, list_array.value_length(3));
868
869 let sliced_array = list_array.slice(1, 6);
870 assert_eq!(6, sliced_array.len());
871 assert_eq!(3, sliced_array.null_count());
872
873 for i in 0..sliced_array.len() {
874 if bit_util::get_bit(&null_bits, 1 + i) {
875 assert!(sliced_array.is_valid(i));
876 } else {
877 assert!(sliced_array.is_null(i));
878 }
879 }
880
881 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
883 assert_eq!(2, sliced_list_array.value_offsets()[2]);
884 assert_eq!(2, sliced_list_array.value_length(2));
885 assert_eq!(4, sliced_list_array.value_offsets()[3]);
886 assert_eq!(2, sliced_list_array.value_length(3));
887 assert_eq!(6, sliced_list_array.value_offsets()[5]);
888 assert_eq!(3, sliced_list_array.value_length(5));
889 }
890
891 #[test]
892 fn test_large_list_array_slice() {
893 let value_data = ArrayData::builder(DataType::Int32)
895 .len(10)
896 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
897 .build()
898 .unwrap();
899
900 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
903 let mut null_bits: [u8; 2] = [0; 2];
905 bit_util::set_bit(&mut null_bits, 0);
906 bit_util::set_bit(&mut null_bits, 3);
907 bit_util::set_bit(&mut null_bits, 4);
908 bit_util::set_bit(&mut null_bits, 6);
909 bit_util::set_bit(&mut null_bits, 8);
910
911 let list_data_type = DataType::new_large_list(DataType::Int32, false);
913 let list_data = ArrayData::builder(list_data_type)
914 .len(9)
915 .add_buffer(value_offsets)
916 .add_child_data(value_data.clone())
917 .null_bit_buffer(Some(Buffer::from(null_bits)))
918 .build()
919 .unwrap();
920 let list_array = LargeListArray::from(list_data);
921
922 let values = list_array.values();
923 assert_eq!(value_data, values.to_data());
924 assert_eq!(DataType::Int32, list_array.value_type());
925 assert_eq!(9, list_array.len());
926 assert_eq!(4, list_array.null_count());
927 assert_eq!(2, list_array.value_offsets()[3]);
928 assert_eq!(2, list_array.value_length(3));
929
930 let sliced_array = list_array.slice(1, 6);
931 assert_eq!(6, sliced_array.len());
932 assert_eq!(3, sliced_array.null_count());
933
934 for i in 0..sliced_array.len() {
935 if bit_util::get_bit(&null_bits, 1 + i) {
936 assert!(sliced_array.is_valid(i));
937 } else {
938 assert!(sliced_array.is_null(i));
939 }
940 }
941
942 let sliced_list_array = sliced_array
944 .as_any()
945 .downcast_ref::<LargeListArray>()
946 .unwrap();
947 assert_eq!(2, sliced_list_array.value_offsets()[2]);
948 assert_eq!(2, sliced_list_array.value_length(2));
949 assert_eq!(4, sliced_list_array.value_offsets()[3]);
950 assert_eq!(2, sliced_list_array.value_length(3));
951 assert_eq!(6, sliced_list_array.value_offsets()[5]);
952 assert_eq!(3, sliced_list_array.value_length(5));
953 }
954
955 #[test]
956 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
957 fn test_list_array_index_out_of_bound() {
958 let value_data = ArrayData::builder(DataType::Int32)
960 .len(10)
961 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
962 .build()
963 .unwrap();
964
965 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
968 let mut null_bits: [u8; 2] = [0; 2];
970 bit_util::set_bit(&mut null_bits, 0);
971 bit_util::set_bit(&mut null_bits, 3);
972 bit_util::set_bit(&mut null_bits, 4);
973 bit_util::set_bit(&mut null_bits, 6);
974 bit_util::set_bit(&mut null_bits, 8);
975
976 let list_data_type = DataType::new_large_list(DataType::Int32, false);
978 let list_data = ArrayData::builder(list_data_type)
979 .len(9)
980 .add_buffer(value_offsets)
981 .add_child_data(value_data)
982 .null_bit_buffer(Some(Buffer::from(null_bits)))
983 .build()
984 .unwrap();
985 let list_array = LargeListArray::from(list_data);
986 assert_eq!(9, list_array.len());
987
988 list_array.value(10);
989 }
990 #[test]
991 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
992 #[cfg(not(feature = "force_validate"))]
995 fn test_list_array_invalid_buffer_len() {
996 let value_data = unsafe {
997 ArrayData::builder(DataType::Int32)
998 .len(8)
999 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1000 .build_unchecked()
1001 };
1002 let list_data_type =
1003 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1004 let list_data = unsafe {
1005 ArrayData::builder(list_data_type)
1006 .len(3)
1007 .add_child_data(value_data)
1008 .build_unchecked()
1009 };
1010 drop(ListArray::from(list_data));
1011 }
1012
1013 #[test]
1014 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1015 #[cfg(not(feature = "force_validate"))]
1018 fn test_list_array_invalid_child_array_len() {
1019 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1020 let list_data_type =
1021 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1022 let list_data = unsafe {
1023 ArrayData::builder(list_data_type)
1024 .len(3)
1025 .add_buffer(value_offsets)
1026 .build_unchecked()
1027 };
1028 drop(ListArray::from(list_data));
1029 }
1030
1031 #[test]
1032 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1033 fn test_from_array_data_validation() {
1034 let mut builder = ListBuilder::new(Int32Builder::new());
1035 builder.values().append_value(1);
1036 builder.append(true);
1037 let array = builder.finish();
1038 let _ = LargeListArray::from(array.into_data());
1039 }
1040
1041 #[test]
1042 fn test_list_array_offsets_need_not_start_at_zero() {
1043 let value_data = ArrayData::builder(DataType::Int32)
1044 .len(8)
1045 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1046 .build()
1047 .unwrap();
1048
1049 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1050
1051 let list_data_type =
1052 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1053 let list_data = ArrayData::builder(list_data_type)
1054 .len(3)
1055 .add_buffer(value_offsets)
1056 .add_child_data(value_data)
1057 .build()
1058 .unwrap();
1059
1060 let list_array = ListArray::from(list_data);
1061 assert_eq!(list_array.value_length(0), 0);
1062 assert_eq!(list_array.value_length(1), 3);
1063 assert_eq!(list_array.value_length(2), 2);
1064 }
1065
1066 #[test]
1067 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1068 #[cfg(not(feature = "force_validate"))]
1071 fn test_primitive_array_alignment() {
1072 let buf = Buffer::from_slice_ref([0_u64]);
1073 let buf2 = buf.slice(1);
1074 let array_data = unsafe {
1075 ArrayData::builder(DataType::Int32)
1076 .add_buffer(buf2)
1077 .build_unchecked()
1078 };
1079 drop(Int32Array::from(array_data));
1080 }
1081
1082 #[test]
1083 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1084 #[cfg(not(feature = "force_validate"))]
1087 fn test_list_array_alignment() {
1088 let buf = Buffer::from_slice_ref([0_u64]);
1089 let buf2 = buf.slice(1);
1090
1091 let values: [i32; 8] = [0; 8];
1092 let value_data = unsafe {
1093 ArrayData::builder(DataType::Int32)
1094 .add_buffer(Buffer::from_slice_ref(values))
1095 .build_unchecked()
1096 };
1097
1098 let list_data_type =
1099 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1100 let list_data = unsafe {
1101 ArrayData::builder(list_data_type)
1102 .add_buffer(buf2)
1103 .add_child_data(value_data)
1104 .build_unchecked()
1105 };
1106 drop(ListArray::from(list_data));
1107 }
1108
1109 #[test]
1110 fn list_array_equality() {
1111 fn do_comparison(
1113 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1114 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1115 should_equal: bool,
1116 ) {
1117 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1118 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1119 assert_eq!(lhs == rhs, should_equal);
1120
1121 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1122 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1123 assert_eq!(lhs == rhs, should_equal);
1124 }
1125
1126 do_comparison(
1127 vec![
1128 Some(vec![Some(0), Some(1), Some(2)]),
1129 None,
1130 Some(vec![Some(3), None, Some(5)]),
1131 Some(vec![Some(6), Some(7)]),
1132 ],
1133 vec![
1134 Some(vec![Some(0), Some(1), Some(2)]),
1135 None,
1136 Some(vec![Some(3), None, Some(5)]),
1137 Some(vec![Some(6), Some(7)]),
1138 ],
1139 true,
1140 );
1141
1142 do_comparison(
1143 vec![
1144 None,
1145 None,
1146 Some(vec![Some(3), None, Some(5)]),
1147 Some(vec![Some(6), Some(7)]),
1148 ],
1149 vec![
1150 Some(vec![Some(0), Some(1), Some(2)]),
1151 None,
1152 Some(vec![Some(3), None, Some(5)]),
1153 Some(vec![Some(6), Some(7)]),
1154 ],
1155 false,
1156 );
1157
1158 do_comparison(
1159 vec![
1160 None,
1161 None,
1162 Some(vec![Some(3), None, Some(5)]),
1163 Some(vec![Some(6), Some(7)]),
1164 ],
1165 vec![
1166 None,
1167 None,
1168 Some(vec![Some(3), None, Some(5)]),
1169 Some(vec![Some(0), Some(0)]),
1170 ],
1171 false,
1172 );
1173
1174 do_comparison(
1175 vec![None, None, Some(vec![Some(1)])],
1176 vec![None, None, Some(vec![Some(2)])],
1177 false,
1178 );
1179 }
1180
1181 #[test]
1182 fn test_empty_offsets() {
1183 let f = Arc::new(Field::new("element", DataType::Int32, true));
1184 let string = ListArray::from(
1185 ArrayData::builder(DataType::List(f.clone()))
1186 .buffers(vec![Buffer::from(&[])])
1187 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1188 .build()
1189 .unwrap(),
1190 );
1191 assert_eq!(string.value_offsets(), &[0]);
1192 let string = LargeListArray::from(
1193 ArrayData::builder(DataType::LargeList(f))
1194 .buffers(vec![Buffer::from(&[])])
1195 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1196 .build()
1197 .unwrap(),
1198 );
1199 assert_eq!(string.len(), 0);
1200 assert_eq!(string.value_offsets(), &[0]);
1201 }
1202
1203 #[test]
1204 fn test_try_new() {
1205 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1206 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1207 let values = Arc::new(values) as ArrayRef;
1208
1209 let field = Arc::new(Field::new("element", DataType::Int32, false));
1210 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1211
1212 let nulls = NullBuffer::new_null(3);
1213 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1214
1215 let nulls = NullBuffer::new_null(3);
1216 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1217 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1218 .unwrap_err();
1219
1220 assert_eq!(
1221 err.to_string(),
1222 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1223 );
1224
1225 let field = Arc::new(Field::new("element", DataType::Int64, false));
1226 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1227 .unwrap_err();
1228
1229 assert_eq!(
1230 err.to_string(),
1231 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1232 );
1233
1234 let nulls = NullBuffer::new_null(7);
1235 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1236 let values = Arc::new(values);
1237
1238 let err =
1239 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1240
1241 assert_eq!(
1242 err.to_string(),
1243 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1244 );
1245
1246 let field = Arc::new(Field::new("element", DataType::Int64, true));
1247 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1248
1249 let values = Int64Array::new(vec![0; 2].into(), None);
1250 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1251
1252 assert_eq!(
1253 err.to_string(),
1254 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1255 );
1256 }
1257
1258 #[test]
1259 fn test_from_fixed_size_list() {
1260 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1261 builder.values().append_slice(&[1, 2, 3]);
1262 builder.append(true);
1263 builder.values().append_slice(&[0, 0, 0]);
1264 builder.append(false);
1265 builder.values().append_slice(&[4, 5, 6]);
1266 builder.append(true);
1267 let list: ListArray = builder.finish().into();
1268
1269 let values: Vec<_> = list
1270 .iter()
1271 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1272 .collect();
1273 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1274 }
1275
1276 #[test]
1277 fn test_nullable_union() {
1278 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1279 let mut builder = UnionBuilder::new_dense();
1280 builder.append::<Int32Type>("a", 1).unwrap();
1281 builder.append::<Int32Type>("b", 2).unwrap();
1282 builder.append::<Int32Type>("b", 3).unwrap();
1283 builder.append::<Int32Type>("a", 4).unwrap();
1284 builder.append::<Int32Type>("a", 5).unwrap();
1285 let values = builder.build().unwrap();
1286 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1287 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1288 }
1289
1290 #[test]
1291 fn test_list_new_null_len() {
1292 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
1293 let array = ListArray::new_null(field, 5);
1294 assert_eq!(array.len(), 5);
1295 }
1296}