1use crate::array::{get_offsets, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
22 iterator::GenericListArrayIter, new_empty_array,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num_integer::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait:
41 ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd
42{
43 const IS_LARGE: bool;
45 const PREFIX: &'static str;
47 const MAX_OFFSET: usize;
49}
50
51impl OffsetSizeTrait for i32 {
52 const IS_LARGE: bool = false;
53 const PREFIX: &'static str = "";
54 const MAX_OFFSET: usize = i32::MAX as usize;
55}
56
57impl OffsetSizeTrait for i64 {
58 const IS_LARGE: bool = true;
59 const PREFIX: &'static str = "Large";
60 const MAX_OFFSET: usize = i64::MAX as usize;
61}
62
63pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
172 data_type: DataType,
173 nulls: Option<NullBuffer>,
174 values: ArrayRef,
175 value_offsets: OffsetBuffer<OffsetSize>,
176}
177
178impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
179 fn clone(&self) -> Self {
180 Self {
181 data_type: self.data_type.clone(),
182 nulls: self.nulls.clone(),
183 values: self.values.clone(),
184 value_offsets: self.value_offsets.clone(),
185 }
186 }
187}
188
189impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
190 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
194 DataType::LargeList
195 } else {
196 DataType::List
197 };
198
199 pub fn try_new(
210 field: FieldRef,
211 offsets: OffsetBuffer<OffsetSize>,
212 values: ArrayRef,
213 nulls: Option<NullBuffer>,
214 ) -> Result<Self, ArrowError> {
215 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
217 if end_offset > values.len() {
220 return Err(ArrowError::InvalidArgumentError(format!(
221 "Max offset of {end_offset} exceeds length of values {}",
222 values.len()
223 )));
224 }
225
226 if let Some(n) = nulls.as_ref() {
227 if n.len() != len {
228 return Err(ArrowError::InvalidArgumentError(format!(
229 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
230 OffsetSize::PREFIX,
231 n.len(),
232 )));
233 }
234 }
235 if !field.is_nullable() && values.is_nullable() {
236 return Err(ArrowError::InvalidArgumentError(format!(
237 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
238 OffsetSize::PREFIX,
239 field.name()
240 )));
241 }
242
243 if field.data_type() != values.data_type() {
244 return Err(ArrowError::InvalidArgumentError(format!(
245 "{}ListArray expected data type {} got {} for {:?}",
246 OffsetSize::PREFIX,
247 field.data_type(),
248 values.data_type(),
249 field.name()
250 )));
251 }
252
253 Ok(Self {
254 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
255 nulls,
256 values,
257 value_offsets: offsets,
258 })
259 }
260
261 pub fn new(
267 field: FieldRef,
268 offsets: OffsetBuffer<OffsetSize>,
269 values: ArrayRef,
270 nulls: Option<NullBuffer>,
271 ) -> Self {
272 Self::try_new(field, offsets, values, nulls).unwrap()
273 }
274
275 pub fn new_null(field: FieldRef, len: usize) -> Self {
277 let values = new_empty_array(field.data_type());
278 Self {
279 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
280 nulls: Some(NullBuffer::new_null(len)),
281 value_offsets: OffsetBuffer::new_zeroed(len),
282 values,
283 }
284 }
285
286 pub fn into_parts(
288 self,
289 ) -> (
290 FieldRef,
291 OffsetBuffer<OffsetSize>,
292 ArrayRef,
293 Option<NullBuffer>,
294 ) {
295 let f = match self.data_type {
296 DataType::List(f) | DataType::LargeList(f) => f,
297 _ => unreachable!(),
298 };
299 (f, self.value_offsets, self.values, self.nulls)
300 }
301
302 #[inline]
311 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
312 &self.value_offsets
313 }
314
315 #[inline]
322 pub fn values(&self) -> &ArrayRef {
323 &self.values
324 }
325
326 pub fn value_type(&self) -> DataType {
328 self.values.data_type().clone()
329 }
330
331 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
339 let end = unsafe { self.value_offsets().get_unchecked(i + 1).as_usize() };
340 let start = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
341 self.values.slice(start, end - start)
342 }
343
344 pub fn value(&self, i: usize) -> ArrayRef {
352 let end = self.value_offsets()[i + 1].as_usize();
353 let start = self.value_offsets()[i].as_usize();
354 self.values.slice(start, end - start)
355 }
356
357 #[inline]
361 pub fn value_offsets(&self) -> &[OffsetSize] {
362 &self.value_offsets
363 }
364
365 #[inline]
367 pub fn value_length(&self, i: usize) -> OffsetSize {
368 let offsets = self.value_offsets();
369 offsets[i + 1] - offsets[i]
370 }
371
372 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
374 GenericListArrayIter::<'a, OffsetSize>::new(self)
375 }
376
377 #[inline]
378 fn get_type(data_type: &DataType) -> Option<&DataType> {
379 match (OffsetSize::IS_LARGE, data_type) {
380 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
381 Some(child.data_type())
382 }
383 _ => None,
384 }
385 }
386
387 pub fn slice(&self, offset: usize, length: usize) -> Self {
393 Self {
394 data_type: self.data_type.clone(),
395 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
396 values: self.values.clone(),
397 value_offsets: self.value_offsets.slice(offset, length),
398 }
399 }
400
401 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
417 where
418 T: ArrowPrimitiveType,
419 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
420 I: IntoIterator<Item = Option<P>>,
421 {
422 let iter = iter.into_iter();
423 let size_hint = iter.size_hint().0;
424 let mut builder =
425 GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
426
427 for i in iter {
428 match i {
429 Some(p) => {
430 for t in p {
431 builder.values().append_option(t);
432 }
433 builder.append(true);
434 }
435 None => builder.append(false),
436 }
437 }
438 builder.finish()
439 }
440}
441
442impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
443 fn from(data: ArrayData) -> Self {
444 Self::try_new_from_array_data(data)
445 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
446 }
447}
448
449impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
450 fn from(array: GenericListArray<OffsetSize>) -> Self {
451 let len = array.len();
452 let builder = ArrayDataBuilder::new(array.data_type)
453 .len(len)
454 .nulls(array.nulls)
455 .buffers(vec![array.value_offsets.into_inner().into_inner()])
456 .child_data(vec![array.values.to_data()]);
457
458 unsafe { builder.build_unchecked() }
459 }
460}
461
462impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
463 fn from(value: FixedSizeListArray) -> Self {
464 let (field, size) = match value.data_type() {
465 DataType::FixedSizeList(f, size) => (f, *size as usize),
466 _ => unreachable!(),
467 };
468
469 let offsets = OffsetBuffer::from_repeated_length(size, value.len());
470
471 Self {
472 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
473 nulls: value.nulls().cloned(),
474 values: value.values().clone(),
475 value_offsets: offsets,
476 }
477 }
478}
479
480impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
481 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
482 if data.buffers().len() != 1 {
483 return Err(ArrowError::InvalidArgumentError(format!(
484 "ListArray data should contain a single buffer only (value offsets), had {}",
485 data.buffers().len()
486 )));
487 }
488
489 if data.child_data().len() != 1 {
490 return Err(ArrowError::InvalidArgumentError(format!(
491 "ListArray should contain a single child array (values array), had {}",
492 data.child_data().len()
493 )));
494 }
495
496 let values = data.child_data()[0].clone();
497
498 if let Some(child_data_type) = Self::get_type(data.data_type()) {
499 if values.data_type() != child_data_type {
500 return Err(ArrowError::InvalidArgumentError(format!(
501 "[Large]ListArray's child datatype {:?} does not \
502 correspond to the List's datatype {:?}",
503 values.data_type(),
504 child_data_type
505 )));
506 }
507 } else {
508 return Err(ArrowError::InvalidArgumentError(format!(
509 "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
510 data.data_type()
511 )));
512 }
513
514 let values = make_array(values);
515 let value_offsets = unsafe { get_offsets(&data) };
518
519 Ok(Self {
520 data_type: data.data_type().clone(),
521 nulls: data.nulls().cloned(),
522 values,
523 value_offsets,
524 })
525 }
526}
527
528unsafe impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
530 fn as_any(&self) -> &dyn Any {
531 self
532 }
533
534 fn to_data(&self) -> ArrayData {
535 self.clone().into()
536 }
537
538 fn into_data(self) -> ArrayData {
539 self.into()
540 }
541
542 fn data_type(&self) -> &DataType {
543 &self.data_type
544 }
545
546 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
547 Arc::new(self.slice(offset, length))
548 }
549
550 fn len(&self) -> usize {
551 self.value_offsets.len() - 1
552 }
553
554 fn is_empty(&self) -> bool {
555 self.value_offsets.len() <= 1
556 }
557
558 fn shrink_to_fit(&mut self) {
559 if let Some(nulls) = &mut self.nulls {
560 nulls.shrink_to_fit();
561 }
562 self.values.shrink_to_fit();
563 self.value_offsets.shrink_to_fit();
564 }
565
566 fn offset(&self) -> usize {
567 0
568 }
569
570 fn nulls(&self) -> Option<&NullBuffer> {
571 self.nulls.as_ref()
572 }
573
574 fn logical_null_count(&self) -> usize {
575 self.null_count()
577 }
578
579 fn get_buffer_memory_size(&self) -> usize {
580 let mut size = self.values.get_buffer_memory_size();
581 size += self.value_offsets.inner().inner().capacity();
582 if let Some(n) = self.nulls.as_ref() {
583 size += n.buffer().capacity();
584 }
585 size
586 }
587
588 fn get_array_memory_size(&self) -> usize {
589 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
590 size += self.value_offsets.inner().inner().capacity();
591 if let Some(n) = self.nulls.as_ref() {
592 size += n.buffer().capacity();
593 }
594 size
595 }
596}
597
598impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
599 type Item = ArrayRef;
600
601 fn value(&self, index: usize) -> Self::Item {
602 GenericListArray::value(self, index)
603 }
604
605 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
606 GenericListArray::value(self, index)
607 }
608}
609
610impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
611 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
612 let prefix = OffsetSize::PREFIX;
613
614 write!(f, "{prefix}ListArray\n[\n")?;
615 print_long_array(self, f, |array, index, f| {
616 std::fmt::Debug::fmt(&array.value(index), f)
617 })?;
618 write!(f, "]")
619 }
620}
621
622pub type ListArray = GenericListArray<i32>;
626
627pub type LargeListArray = GenericListArray<i64>;
631
632#[cfg(test)]
633mod tests {
634 use super::*;
635 use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
636 use crate::cast::AsArray;
637 use crate::types::Int32Type;
638 use crate::{Int32Array, Int64Array};
639 use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
640 use arrow_schema::Field;
641
642 fn create_from_buffers() -> ListArray {
643 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
645 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
646 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
647 ListArray::new(field, offsets, Arc::new(values), None)
648 }
649
650 #[test]
651 fn test_from_iter_primitive() {
652 let data = vec![
653 Some(vec![Some(0), Some(1), Some(2)]),
654 Some(vec![Some(3), Some(4), Some(5)]),
655 Some(vec![Some(6), Some(7)]),
656 ];
657 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
658
659 let another = create_from_buffers();
660 assert_eq!(list_array, another)
661 }
662
663 #[test]
664 fn test_empty_list_array() {
665 let value_data = ArrayData::builder(DataType::Int32)
667 .len(0)
668 .add_buffer(Buffer::from([]))
669 .build()
670 .unwrap();
671
672 let value_offsets = Buffer::from([]);
674
675 let list_data_type =
677 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
678 let list_data = ArrayData::builder(list_data_type)
679 .len(0)
680 .add_buffer(value_offsets)
681 .add_child_data(value_data)
682 .build()
683 .unwrap();
684
685 let list_array = ListArray::from(list_data);
686 assert_eq!(list_array.len(), 0)
687 }
688
689 #[test]
690 fn test_list_array() {
691 let value_data = ArrayData::builder(DataType::Int32)
693 .len(8)
694 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
695 .build()
696 .unwrap();
697
698 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
701
702 let list_data_type =
704 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
705 let list_data = ArrayData::builder(list_data_type.clone())
706 .len(3)
707 .add_buffer(value_offsets.clone())
708 .add_child_data(value_data.clone())
709 .build()
710 .unwrap();
711 let list_array = ListArray::from(list_data);
712
713 let values = list_array.values();
714 assert_eq!(value_data, values.to_data());
715 assert_eq!(DataType::Int32, list_array.value_type());
716 assert_eq!(3, list_array.len());
717 assert_eq!(0, list_array.null_count());
718 assert_eq!(6, list_array.value_offsets()[2]);
719 assert_eq!(2, list_array.value_length(2));
720 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
721 assert_eq!(
722 0,
723 unsafe { list_array.value_unchecked(0) }
724 .as_primitive::<Int32Type>()
725 .value(0)
726 );
727 for i in 0..3 {
728 assert!(list_array.is_valid(i));
729 assert!(!list_array.is_null(i));
730 }
731
732 let list_data = ArrayData::builder(list_data_type)
735 .len(2)
736 .offset(1)
737 .add_buffer(value_offsets)
738 .add_child_data(value_data.clone())
739 .build()
740 .unwrap();
741 let list_array = ListArray::from(list_data);
742
743 let values = list_array.values();
744 assert_eq!(value_data, values.to_data());
745 assert_eq!(DataType::Int32, list_array.value_type());
746 assert_eq!(2, list_array.len());
747 assert_eq!(0, list_array.null_count());
748 assert_eq!(6, list_array.value_offsets()[1]);
749 assert_eq!(2, list_array.value_length(1));
750 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
751 assert_eq!(
752 3,
753 unsafe { list_array.value_unchecked(0) }
754 .as_primitive::<Int32Type>()
755 .value(0)
756 );
757 }
758
759 #[test]
760 fn test_large_list_array() {
761 let value_data = ArrayData::builder(DataType::Int32)
763 .len(8)
764 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
765 .build()
766 .unwrap();
767
768 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
771
772 let list_data_type = DataType::new_large_list(DataType::Int32, false);
774 let list_data = ArrayData::builder(list_data_type.clone())
775 .len(3)
776 .add_buffer(value_offsets.clone())
777 .add_child_data(value_data.clone())
778 .build()
779 .unwrap();
780 let list_array = LargeListArray::from(list_data);
781
782 let values = list_array.values();
783 assert_eq!(value_data, values.to_data());
784 assert_eq!(DataType::Int32, list_array.value_type());
785 assert_eq!(3, list_array.len());
786 assert_eq!(0, list_array.null_count());
787 assert_eq!(6, list_array.value_offsets()[2]);
788 assert_eq!(2, list_array.value_length(2));
789 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
790 assert_eq!(
791 0,
792 unsafe { list_array.value_unchecked(0) }
793 .as_primitive::<Int32Type>()
794 .value(0)
795 );
796 for i in 0..3 {
797 assert!(list_array.is_valid(i));
798 assert!(!list_array.is_null(i));
799 }
800
801 let list_data = ArrayData::builder(list_data_type)
804 .len(2)
805 .offset(1)
806 .add_buffer(value_offsets)
807 .add_child_data(value_data.clone())
808 .build()
809 .unwrap();
810 let list_array = LargeListArray::from(list_data);
811
812 let values = list_array.values();
813 assert_eq!(value_data, values.to_data());
814 assert_eq!(DataType::Int32, list_array.value_type());
815 assert_eq!(2, list_array.len());
816 assert_eq!(0, list_array.null_count());
817 assert_eq!(6, list_array.value_offsets()[1]);
818 assert_eq!(2, list_array.value_length(1));
819 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
820 assert_eq!(
821 3,
822 unsafe { list_array.value_unchecked(0) }
823 .as_primitive::<Int32Type>()
824 .value(0)
825 );
826 }
827
828 #[test]
829 fn test_list_array_slice() {
830 let value_data = ArrayData::builder(DataType::Int32)
832 .len(10)
833 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
834 .build()
835 .unwrap();
836
837 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
840 let mut null_bits: [u8; 2] = [0; 2];
842 bit_util::set_bit(&mut null_bits, 0);
843 bit_util::set_bit(&mut null_bits, 3);
844 bit_util::set_bit(&mut null_bits, 4);
845 bit_util::set_bit(&mut null_bits, 6);
846 bit_util::set_bit(&mut null_bits, 8);
847
848 let list_data_type =
850 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
851 let list_data = ArrayData::builder(list_data_type)
852 .len(9)
853 .add_buffer(value_offsets)
854 .add_child_data(value_data.clone())
855 .null_bit_buffer(Some(Buffer::from(null_bits)))
856 .build()
857 .unwrap();
858 let list_array = ListArray::from(list_data);
859
860 let values = list_array.values();
861 assert_eq!(value_data, values.to_data());
862 assert_eq!(DataType::Int32, list_array.value_type());
863 assert_eq!(9, list_array.len());
864 assert_eq!(4, list_array.null_count());
865 assert_eq!(2, list_array.value_offsets()[3]);
866 assert_eq!(2, list_array.value_length(3));
867
868 let sliced_array = list_array.slice(1, 6);
869 assert_eq!(6, sliced_array.len());
870 assert_eq!(3, sliced_array.null_count());
871
872 for i in 0..sliced_array.len() {
873 if bit_util::get_bit(&null_bits, 1 + i) {
874 assert!(sliced_array.is_valid(i));
875 } else {
876 assert!(sliced_array.is_null(i));
877 }
878 }
879
880 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
882 assert_eq!(2, sliced_list_array.value_offsets()[2]);
883 assert_eq!(2, sliced_list_array.value_length(2));
884 assert_eq!(4, sliced_list_array.value_offsets()[3]);
885 assert_eq!(2, sliced_list_array.value_length(3));
886 assert_eq!(6, sliced_list_array.value_offsets()[5]);
887 assert_eq!(3, sliced_list_array.value_length(5));
888 }
889
890 #[test]
891 fn test_large_list_array_slice() {
892 let value_data = ArrayData::builder(DataType::Int32)
894 .len(10)
895 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
896 .build()
897 .unwrap();
898
899 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
902 let mut null_bits: [u8; 2] = [0; 2];
904 bit_util::set_bit(&mut null_bits, 0);
905 bit_util::set_bit(&mut null_bits, 3);
906 bit_util::set_bit(&mut null_bits, 4);
907 bit_util::set_bit(&mut null_bits, 6);
908 bit_util::set_bit(&mut null_bits, 8);
909
910 let list_data_type = DataType::new_large_list(DataType::Int32, false);
912 let list_data = ArrayData::builder(list_data_type)
913 .len(9)
914 .add_buffer(value_offsets)
915 .add_child_data(value_data.clone())
916 .null_bit_buffer(Some(Buffer::from(null_bits)))
917 .build()
918 .unwrap();
919 let list_array = LargeListArray::from(list_data);
920
921 let values = list_array.values();
922 assert_eq!(value_data, values.to_data());
923 assert_eq!(DataType::Int32, list_array.value_type());
924 assert_eq!(9, list_array.len());
925 assert_eq!(4, list_array.null_count());
926 assert_eq!(2, list_array.value_offsets()[3]);
927 assert_eq!(2, list_array.value_length(3));
928
929 let sliced_array = list_array.slice(1, 6);
930 assert_eq!(6, sliced_array.len());
931 assert_eq!(3, sliced_array.null_count());
932
933 for i in 0..sliced_array.len() {
934 if bit_util::get_bit(&null_bits, 1 + i) {
935 assert!(sliced_array.is_valid(i));
936 } else {
937 assert!(sliced_array.is_null(i));
938 }
939 }
940
941 let sliced_list_array = sliced_array
943 .as_any()
944 .downcast_ref::<LargeListArray>()
945 .unwrap();
946 assert_eq!(2, sliced_list_array.value_offsets()[2]);
947 assert_eq!(2, sliced_list_array.value_length(2));
948 assert_eq!(4, sliced_list_array.value_offsets()[3]);
949 assert_eq!(2, sliced_list_array.value_length(3));
950 assert_eq!(6, sliced_list_array.value_offsets()[5]);
951 assert_eq!(3, sliced_list_array.value_length(5));
952 }
953
954 #[test]
955 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
956 fn test_list_array_index_out_of_bound() {
957 let value_data = ArrayData::builder(DataType::Int32)
959 .len(10)
960 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
961 .build()
962 .unwrap();
963
964 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
967 let mut null_bits: [u8; 2] = [0; 2];
969 bit_util::set_bit(&mut null_bits, 0);
970 bit_util::set_bit(&mut null_bits, 3);
971 bit_util::set_bit(&mut null_bits, 4);
972 bit_util::set_bit(&mut null_bits, 6);
973 bit_util::set_bit(&mut null_bits, 8);
974
975 let list_data_type = DataType::new_large_list(DataType::Int32, false);
977 let list_data = ArrayData::builder(list_data_type)
978 .len(9)
979 .add_buffer(value_offsets)
980 .add_child_data(value_data)
981 .null_bit_buffer(Some(Buffer::from(null_bits)))
982 .build()
983 .unwrap();
984 let list_array = LargeListArray::from(list_data);
985 assert_eq!(9, list_array.len());
986
987 list_array.value(10);
988 }
989 #[test]
990 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
991 #[cfg(not(feature = "force_validate"))]
994 fn test_list_array_invalid_buffer_len() {
995 let value_data = unsafe {
996 ArrayData::builder(DataType::Int32)
997 .len(8)
998 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
999 .build_unchecked()
1000 };
1001 let list_data_type =
1002 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1003 let list_data = unsafe {
1004 ArrayData::builder(list_data_type)
1005 .len(3)
1006 .add_child_data(value_data)
1007 .build_unchecked()
1008 };
1009 drop(ListArray::from(list_data));
1010 }
1011
1012 #[test]
1013 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
1014 #[cfg(not(feature = "force_validate"))]
1017 fn test_list_array_invalid_child_array_len() {
1018 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
1019 let list_data_type =
1020 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1021 let list_data = unsafe {
1022 ArrayData::builder(list_data_type)
1023 .len(3)
1024 .add_buffer(value_offsets)
1025 .build_unchecked()
1026 };
1027 drop(ListArray::from(list_data));
1028 }
1029
1030 #[test]
1031 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
1032 fn test_from_array_data_validation() {
1033 let mut builder = ListBuilder::new(Int32Builder::new());
1034 builder.values().append_value(1);
1035 builder.append(true);
1036 let array = builder.finish();
1037 let _ = LargeListArray::from(array.into_data());
1038 }
1039
1040 #[test]
1041 fn test_list_array_offsets_need_not_start_at_zero() {
1042 let value_data = ArrayData::builder(DataType::Int32)
1043 .len(8)
1044 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
1045 .build()
1046 .unwrap();
1047
1048 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
1049
1050 let list_data_type =
1051 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1052 let list_data = ArrayData::builder(list_data_type)
1053 .len(3)
1054 .add_buffer(value_offsets)
1055 .add_child_data(value_data)
1056 .build()
1057 .unwrap();
1058
1059 let list_array = ListArray::from(list_data);
1060 assert_eq!(list_array.value_length(0), 0);
1061 assert_eq!(list_array.value_length(1), 3);
1062 assert_eq!(list_array.value_length(2), 2);
1063 }
1064
1065 #[test]
1066 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1067 #[cfg(not(feature = "force_validate"))]
1070 fn test_primitive_array_alignment() {
1071 let buf = Buffer::from_slice_ref([0_u64]);
1072 let buf2 = buf.slice(1);
1073 let array_data = unsafe {
1074 ArrayData::builder(DataType::Int32)
1075 .add_buffer(buf2)
1076 .build_unchecked()
1077 };
1078 drop(Int32Array::from(array_data));
1079 }
1080
1081 #[test]
1082 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1083 #[cfg(not(feature = "force_validate"))]
1086 fn test_list_array_alignment() {
1087 let buf = Buffer::from_slice_ref([0_u64]);
1088 let buf2 = buf.slice(1);
1089
1090 let values: [i32; 8] = [0; 8];
1091 let value_data = unsafe {
1092 ArrayData::builder(DataType::Int32)
1093 .add_buffer(Buffer::from_slice_ref(values))
1094 .build_unchecked()
1095 };
1096
1097 let list_data_type =
1098 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1099 let list_data = unsafe {
1100 ArrayData::builder(list_data_type)
1101 .add_buffer(buf2)
1102 .add_child_data(value_data)
1103 .build_unchecked()
1104 };
1105 drop(ListArray::from(list_data));
1106 }
1107
1108 #[test]
1109 fn list_array_equality() {
1110 fn do_comparison(
1112 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1113 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1114 should_equal: bool,
1115 ) {
1116 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1117 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1118 assert_eq!(lhs == rhs, should_equal);
1119
1120 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1121 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1122 assert_eq!(lhs == rhs, should_equal);
1123 }
1124
1125 do_comparison(
1126 vec![
1127 Some(vec![Some(0), Some(1), Some(2)]),
1128 None,
1129 Some(vec![Some(3), None, Some(5)]),
1130 Some(vec![Some(6), Some(7)]),
1131 ],
1132 vec![
1133 Some(vec![Some(0), Some(1), Some(2)]),
1134 None,
1135 Some(vec![Some(3), None, Some(5)]),
1136 Some(vec![Some(6), Some(7)]),
1137 ],
1138 true,
1139 );
1140
1141 do_comparison(
1142 vec![
1143 None,
1144 None,
1145 Some(vec![Some(3), None, Some(5)]),
1146 Some(vec![Some(6), Some(7)]),
1147 ],
1148 vec![
1149 Some(vec![Some(0), Some(1), Some(2)]),
1150 None,
1151 Some(vec![Some(3), None, Some(5)]),
1152 Some(vec![Some(6), Some(7)]),
1153 ],
1154 false,
1155 );
1156
1157 do_comparison(
1158 vec![
1159 None,
1160 None,
1161 Some(vec![Some(3), None, Some(5)]),
1162 Some(vec![Some(6), Some(7)]),
1163 ],
1164 vec![
1165 None,
1166 None,
1167 Some(vec![Some(3), None, Some(5)]),
1168 Some(vec![Some(0), Some(0)]),
1169 ],
1170 false,
1171 );
1172
1173 do_comparison(
1174 vec![None, None, Some(vec![Some(1)])],
1175 vec![None, None, Some(vec![Some(2)])],
1176 false,
1177 );
1178 }
1179
1180 #[test]
1181 fn test_empty_offsets() {
1182 let f = Arc::new(Field::new("element", DataType::Int32, true));
1183 let string = ListArray::from(
1184 ArrayData::builder(DataType::List(f.clone()))
1185 .buffers(vec![Buffer::from(&[])])
1186 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1187 .build()
1188 .unwrap(),
1189 );
1190 assert_eq!(string.value_offsets(), &[0]);
1191 let string = LargeListArray::from(
1192 ArrayData::builder(DataType::LargeList(f))
1193 .buffers(vec![Buffer::from(&[])])
1194 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1195 .build()
1196 .unwrap(),
1197 );
1198 assert_eq!(string.len(), 0);
1199 assert_eq!(string.value_offsets(), &[0]);
1200 }
1201
1202 #[test]
1203 fn test_try_new() {
1204 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1205 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1206 let values = Arc::new(values) as ArrayRef;
1207
1208 let field = Arc::new(Field::new("element", DataType::Int32, false));
1209 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1210
1211 let nulls = NullBuffer::new_null(3);
1212 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1213
1214 let nulls = NullBuffer::new_null(3);
1215 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1216 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1217 .unwrap_err();
1218
1219 assert_eq!(
1220 err.to_string(),
1221 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1222 );
1223
1224 let field = Arc::new(Field::new("element", DataType::Int64, false));
1225 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1226 .unwrap_err();
1227
1228 assert_eq!(
1229 err.to_string(),
1230 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1231 );
1232
1233 let nulls = NullBuffer::new_null(7);
1234 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1235 let values = Arc::new(values);
1236
1237 let err =
1238 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1239
1240 assert_eq!(
1241 err.to_string(),
1242 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1243 );
1244
1245 let field = Arc::new(Field::new("element", DataType::Int64, true));
1246 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1247
1248 let values = Int64Array::new(vec![0; 2].into(), None);
1249 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1250
1251 assert_eq!(
1252 err.to_string(),
1253 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1254 );
1255 }
1256
1257 #[test]
1258 fn test_from_fixed_size_list() {
1259 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1260 builder.values().append_slice(&[1, 2, 3]);
1261 builder.append(true);
1262 builder.values().append_slice(&[0, 0, 0]);
1263 builder.append(false);
1264 builder.values().append_slice(&[4, 5, 6]);
1265 builder.append(true);
1266 let list: ListArray = builder.finish().into();
1267
1268 let values: Vec<_> = list
1269 .iter()
1270 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1271 .collect();
1272 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1273 }
1274
1275 #[test]
1276 fn test_nullable_union() {
1277 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1278 let mut builder = UnionBuilder::new_dense();
1279 builder.append::<Int32Type>("a", 1).unwrap();
1280 builder.append::<Int32Type>("b", 2).unwrap();
1281 builder.append::<Int32Type>("b", 3).unwrap();
1282 builder.append::<Int32Type>("a", 4).unwrap();
1283 builder.append::<Int32Type>("a", 5).unwrap();
1284 let values = builder.build().unwrap();
1285 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1286 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1287 }
1288
1289 #[test]
1290 fn test_list_new_null_len() {
1291 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
1292 let array = ListArray::new_null(field, 5);
1293 assert_eq!(array.len(), 5);
1294 }
1295}