1use crate::array::{get_offsets, make_array, print_long_array};
19use crate::builder::{GenericListBuilder, PrimitiveBuilder};
20use crate::{
21 iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
22 ArrowPrimitiveType, FixedSizeListArray,
23};
24use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
25use arrow_data::{ArrayData, ArrayDataBuilder};
26use arrow_schema::{ArrowError, DataType, FieldRef};
27use num::Integer;
28use std::any::Any;
29use std::sync::Arc;
30
31pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
41 const IS_LARGE: bool;
43 const PREFIX: &'static str;
45}
46
47impl OffsetSizeTrait for i32 {
48 const IS_LARGE: bool = false;
49 const PREFIX: &'static str = "";
50}
51
52impl OffsetSizeTrait for i64 {
53 const IS_LARGE: bool = true;
54 const PREFIX: &'static str = "Large";
55}
56
57pub struct GenericListArray<OffsetSize: OffsetSizeTrait> {
128 data_type: DataType,
129 nulls: Option<NullBuffer>,
130 values: ArrayRef,
131 value_offsets: OffsetBuffer<OffsetSize>,
132}
133
134impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> {
135 fn clone(&self) -> Self {
136 Self {
137 data_type: self.data_type.clone(),
138 nulls: self.nulls.clone(),
139 values: self.values.clone(),
140 value_offsets: self.value_offsets.clone(),
141 }
142 }
143}
144
145impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
146 pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE {
150 DataType::LargeList
151 } else {
152 DataType::List
153 };
154
155 pub fn try_new(
166 field: FieldRef,
167 offsets: OffsetBuffer<OffsetSize>,
168 values: ArrayRef,
169 nulls: Option<NullBuffer>,
170 ) -> Result<Self, ArrowError> {
171 let len = offsets.len() - 1; let end_offset = offsets.last().unwrap().as_usize();
173 if end_offset > values.len() {
176 return Err(ArrowError::InvalidArgumentError(format!(
177 "Max offset of {end_offset} exceeds length of values {}",
178 values.len()
179 )));
180 }
181
182 if let Some(n) = nulls.as_ref() {
183 if n.len() != len {
184 return Err(ArrowError::InvalidArgumentError(format!(
185 "Incorrect length of null buffer for {}ListArray, expected {len} got {}",
186 OffsetSize::PREFIX,
187 n.len(),
188 )));
189 }
190 }
191 if !field.is_nullable() && values.is_nullable() {
192 return Err(ArrowError::InvalidArgumentError(format!(
193 "Non-nullable field of {}ListArray {:?} cannot contain nulls",
194 OffsetSize::PREFIX,
195 field.name()
196 )));
197 }
198
199 if field.data_type() != values.data_type() {
200 return Err(ArrowError::InvalidArgumentError(format!(
201 "{}ListArray expected data type {} got {} for {:?}",
202 OffsetSize::PREFIX,
203 field.data_type(),
204 values.data_type(),
205 field.name()
206 )));
207 }
208
209 Ok(Self {
210 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
211 nulls,
212 values,
213 value_offsets: offsets,
214 })
215 }
216
217 pub fn new(
223 field: FieldRef,
224 offsets: OffsetBuffer<OffsetSize>,
225 values: ArrayRef,
226 nulls: Option<NullBuffer>,
227 ) -> Self {
228 Self::try_new(field, offsets, values, nulls).unwrap()
229 }
230
231 pub fn new_null(field: FieldRef, len: usize) -> Self {
233 let values = new_empty_array(field.data_type());
234 Self {
235 data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
236 nulls: Some(NullBuffer::new_null(len)),
237 value_offsets: OffsetBuffer::new_zeroed(len),
238 values,
239 }
240 }
241
242 pub fn into_parts(
244 self,
245 ) -> (
246 FieldRef,
247 OffsetBuffer<OffsetSize>,
248 ArrayRef,
249 Option<NullBuffer>,
250 ) {
251 let f = match self.data_type {
252 DataType::List(f) | DataType::LargeList(f) => f,
253 _ => unreachable!(),
254 };
255 (f, self.value_offsets, self.values, self.nulls)
256 }
257
258 #[inline]
263 pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
264 &self.value_offsets
265 }
266
267 #[inline]
269 pub fn values(&self) -> &ArrayRef {
270 &self.values
271 }
272
273 pub fn value_type(&self) -> DataType {
275 self.values.data_type().clone()
276 }
277
278 pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
282 let end = self.value_offsets().get_unchecked(i + 1).as_usize();
283 let start = self.value_offsets().get_unchecked(i).as_usize();
284 self.values.slice(start, end - start)
285 }
286
287 pub fn value(&self, i: usize) -> ArrayRef {
289 let end = self.value_offsets()[i + 1].as_usize();
290 let start = self.value_offsets()[i].as_usize();
291 self.values.slice(start, end - start)
292 }
293
294 #[inline]
296 pub fn value_offsets(&self) -> &[OffsetSize] {
297 &self.value_offsets
298 }
299
300 #[inline]
302 pub fn value_length(&self, i: usize) -> OffsetSize {
303 let offsets = self.value_offsets();
304 offsets[i + 1] - offsets[i]
305 }
306
307 pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
309 GenericListArrayIter::<'a, OffsetSize>::new(self)
310 }
311
312 #[inline]
313 fn get_type(data_type: &DataType) -> Option<&DataType> {
314 match (OffsetSize::IS_LARGE, data_type) {
315 (true, DataType::LargeList(child)) | (false, DataType::List(child)) => {
316 Some(child.data_type())
317 }
318 _ => None,
319 }
320 }
321
322 pub fn slice(&self, offset: usize, length: usize) -> Self {
324 Self {
325 data_type: self.data_type.clone(),
326 nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
327 values: self.values.clone(),
328 value_offsets: self.value_offsets.slice(offset, length),
329 }
330 }
331
332 pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
348 where
349 T: ArrowPrimitiveType,
350 P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
351 I: IntoIterator<Item = Option<P>>,
352 {
353 let iter = iter.into_iter();
354 let size_hint = iter.size_hint().0;
355 let mut builder =
356 GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
357
358 for i in iter {
359 match i {
360 Some(p) => {
361 for t in p {
362 builder.values().append_option(t);
363 }
364 builder.append(true);
365 }
366 None => builder.append(false),
367 }
368 }
369 builder.finish()
370 }
371}
372
373impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
374 fn from(data: ArrayData) -> Self {
375 Self::try_new_from_array_data(data)
376 .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
377 }
378}
379
380impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData {
381 fn from(array: GenericListArray<OffsetSize>) -> Self {
382 let len = array.len();
383 let builder = ArrayDataBuilder::new(array.data_type)
384 .len(len)
385 .nulls(array.nulls)
386 .buffers(vec![array.value_offsets.into_inner().into_inner()])
387 .child_data(vec![array.values.to_data()]);
388
389 unsafe { builder.build_unchecked() }
390 }
391}
392
393impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
394 fn from(value: FixedSizeListArray) -> Self {
395 let (field, size) = match value.data_type() {
396 DataType::FixedSizeList(f, size) => (f, *size as usize),
397 _ => unreachable!(),
398 };
399
400 let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
401
402 Self {
403 data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
404 nulls: value.nulls().cloned(),
405 values: value.values().clone(),
406 value_offsets: offsets,
407 }
408 }
409}
410
411impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
412 fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
413 if data.buffers().len() != 1 {
414 return Err(ArrowError::InvalidArgumentError(format!(
415 "ListArray data should contain a single buffer only (value offsets), had {}",
416 data.buffers().len()
417 )));
418 }
419
420 if data.child_data().len() != 1 {
421 return Err(ArrowError::InvalidArgumentError(format!(
422 "ListArray should contain a single child array (values array), had {}",
423 data.child_data().len()
424 )));
425 }
426
427 let values = data.child_data()[0].clone();
428
429 if let Some(child_data_type) = Self::get_type(data.data_type()) {
430 if values.data_type() != child_data_type {
431 return Err(ArrowError::InvalidArgumentError(format!(
432 "[Large]ListArray's child datatype {:?} does not \
433 correspond to the List's datatype {:?}",
434 values.data_type(),
435 child_data_type
436 )));
437 }
438 } else {
439 return Err(ArrowError::InvalidArgumentError(format!(
440 "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
441 data.data_type()
442 )));
443 }
444
445 let values = make_array(values);
446 let value_offsets = unsafe { get_offsets(&data) };
449
450 Ok(Self {
451 data_type: data.data_type().clone(),
452 nulls: data.nulls().cloned(),
453 values,
454 value_offsets,
455 })
456 }
457}
458
459impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
460 fn as_any(&self) -> &dyn Any {
461 self
462 }
463
464 fn to_data(&self) -> ArrayData {
465 self.clone().into()
466 }
467
468 fn into_data(self) -> ArrayData {
469 self.into()
470 }
471
472 fn data_type(&self) -> &DataType {
473 &self.data_type
474 }
475
476 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
477 Arc::new(self.slice(offset, length))
478 }
479
480 fn len(&self) -> usize {
481 self.value_offsets.len() - 1
482 }
483
484 fn is_empty(&self) -> bool {
485 self.value_offsets.len() <= 1
486 }
487
488 fn shrink_to_fit(&mut self) {
489 if let Some(nulls) = &mut self.nulls {
490 nulls.shrink_to_fit();
491 }
492 self.values.shrink_to_fit();
493 self.value_offsets.shrink_to_fit();
494 }
495
496 fn offset(&self) -> usize {
497 0
498 }
499
500 fn nulls(&self) -> Option<&NullBuffer> {
501 self.nulls.as_ref()
502 }
503
504 fn logical_null_count(&self) -> usize {
505 self.null_count()
507 }
508
509 fn get_buffer_memory_size(&self) -> usize {
510 let mut size = self.values.get_buffer_memory_size();
511 size += self.value_offsets.inner().inner().capacity();
512 if let Some(n) = self.nulls.as_ref() {
513 size += n.buffer().capacity();
514 }
515 size
516 }
517
518 fn get_array_memory_size(&self) -> usize {
519 let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
520 size += self.value_offsets.inner().inner().capacity();
521 if let Some(n) = self.nulls.as_ref() {
522 size += n.buffer().capacity();
523 }
524 size
525 }
526}
527
528impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
529 type Item = ArrayRef;
530
531 fn value(&self, index: usize) -> Self::Item {
532 GenericListArray::value(self, index)
533 }
534
535 unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
536 GenericListArray::value(self, index)
537 }
538}
539
540impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> {
541 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
542 let prefix = OffsetSize::PREFIX;
543
544 write!(f, "{prefix}ListArray\n[\n")?;
545 print_long_array(self, f, |array, index, f| {
546 std::fmt::Debug::fmt(&array.value(index), f)
547 })?;
548 write!(f, "]")
549 }
550}
551
552pub type ListArray = GenericListArray<i32>;
556
557pub type LargeListArray = GenericListArray<i64>;
561
562#[cfg(test)]
563mod tests {
564 use super::*;
565 use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder};
566 use crate::cast::AsArray;
567 use crate::types::Int32Type;
568 use crate::{Int32Array, Int64Array};
569 use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
570 use arrow_schema::Field;
571
572 fn create_from_buffers() -> ListArray {
573 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
575 let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8]));
576 let field = Arc::new(Field::new("item", DataType::Int32, true));
577 ListArray::new(field, offsets, Arc::new(values), None)
578 }
579
580 #[test]
581 fn test_from_iter_primitive() {
582 let data = vec![
583 Some(vec![Some(0), Some(1), Some(2)]),
584 Some(vec![Some(3), Some(4), Some(5)]),
585 Some(vec![Some(6), Some(7)]),
586 ];
587 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
588
589 let another = create_from_buffers();
590 assert_eq!(list_array, another)
591 }
592
593 #[test]
594 fn test_empty_list_array() {
595 let value_data = ArrayData::builder(DataType::Int32)
597 .len(0)
598 .add_buffer(Buffer::from([]))
599 .build()
600 .unwrap();
601
602 let value_offsets = Buffer::from([]);
604
605 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
607 let list_data = ArrayData::builder(list_data_type)
608 .len(0)
609 .add_buffer(value_offsets)
610 .add_child_data(value_data)
611 .build()
612 .unwrap();
613
614 let list_array = ListArray::from(list_data);
615 assert_eq!(list_array.len(), 0)
616 }
617
618 #[test]
619 fn test_list_array() {
620 let value_data = ArrayData::builder(DataType::Int32)
622 .len(8)
623 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
624 .build()
625 .unwrap();
626
627 let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
630
631 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
633 let list_data = ArrayData::builder(list_data_type.clone())
634 .len(3)
635 .add_buffer(value_offsets.clone())
636 .add_child_data(value_data.clone())
637 .build()
638 .unwrap();
639 let list_array = ListArray::from(list_data);
640
641 let values = list_array.values();
642 assert_eq!(value_data, values.to_data());
643 assert_eq!(DataType::Int32, list_array.value_type());
644 assert_eq!(3, list_array.len());
645 assert_eq!(0, list_array.null_count());
646 assert_eq!(6, list_array.value_offsets()[2]);
647 assert_eq!(2, list_array.value_length(2));
648 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
649 assert_eq!(
650 0,
651 unsafe { list_array.value_unchecked(0) }
652 .as_primitive::<Int32Type>()
653 .value(0)
654 );
655 for i in 0..3 {
656 assert!(list_array.is_valid(i));
657 assert!(!list_array.is_null(i));
658 }
659
660 let list_data = ArrayData::builder(list_data_type)
663 .len(2)
664 .offset(1)
665 .add_buffer(value_offsets)
666 .add_child_data(value_data.clone())
667 .build()
668 .unwrap();
669 let list_array = ListArray::from(list_data);
670
671 let values = list_array.values();
672 assert_eq!(value_data, values.to_data());
673 assert_eq!(DataType::Int32, list_array.value_type());
674 assert_eq!(2, list_array.len());
675 assert_eq!(0, list_array.null_count());
676 assert_eq!(6, list_array.value_offsets()[1]);
677 assert_eq!(2, list_array.value_length(1));
678 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
679 assert_eq!(
680 3,
681 unsafe { list_array.value_unchecked(0) }
682 .as_primitive::<Int32Type>()
683 .value(0)
684 );
685 }
686
687 #[test]
688 fn test_large_list_array() {
689 let value_data = ArrayData::builder(DataType::Int32)
691 .len(8)
692 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
693 .build()
694 .unwrap();
695
696 let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
699
700 let list_data_type = DataType::new_large_list(DataType::Int32, false);
702 let list_data = ArrayData::builder(list_data_type.clone())
703 .len(3)
704 .add_buffer(value_offsets.clone())
705 .add_child_data(value_data.clone())
706 .build()
707 .unwrap();
708 let list_array = LargeListArray::from(list_data);
709
710 let values = list_array.values();
711 assert_eq!(value_data, values.to_data());
712 assert_eq!(DataType::Int32, list_array.value_type());
713 assert_eq!(3, list_array.len());
714 assert_eq!(0, list_array.null_count());
715 assert_eq!(6, list_array.value_offsets()[2]);
716 assert_eq!(2, list_array.value_length(2));
717 assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0));
718 assert_eq!(
719 0,
720 unsafe { list_array.value_unchecked(0) }
721 .as_primitive::<Int32Type>()
722 .value(0)
723 );
724 for i in 0..3 {
725 assert!(list_array.is_valid(i));
726 assert!(!list_array.is_null(i));
727 }
728
729 let list_data = ArrayData::builder(list_data_type)
732 .len(2)
733 .offset(1)
734 .add_buffer(value_offsets)
735 .add_child_data(value_data.clone())
736 .build()
737 .unwrap();
738 let list_array = LargeListArray::from(list_data);
739
740 let values = list_array.values();
741 assert_eq!(value_data, values.to_data());
742 assert_eq!(DataType::Int32, list_array.value_type());
743 assert_eq!(2, list_array.len());
744 assert_eq!(0, list_array.null_count());
745 assert_eq!(6, list_array.value_offsets()[1]);
746 assert_eq!(2, list_array.value_length(1));
747 assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0));
748 assert_eq!(
749 3,
750 unsafe { list_array.value_unchecked(0) }
751 .as_primitive::<Int32Type>()
752 .value(0)
753 );
754 }
755
756 #[test]
757 fn test_list_array_slice() {
758 let value_data = ArrayData::builder(DataType::Int32)
760 .len(10)
761 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
762 .build()
763 .unwrap();
764
765 let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
768 let mut null_bits: [u8; 2] = [0; 2];
770 bit_util::set_bit(&mut null_bits, 0);
771 bit_util::set_bit(&mut null_bits, 3);
772 bit_util::set_bit(&mut null_bits, 4);
773 bit_util::set_bit(&mut null_bits, 6);
774 bit_util::set_bit(&mut null_bits, 8);
775
776 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
778 let list_data = ArrayData::builder(list_data_type)
779 .len(9)
780 .add_buffer(value_offsets)
781 .add_child_data(value_data.clone())
782 .null_bit_buffer(Some(Buffer::from(null_bits)))
783 .build()
784 .unwrap();
785 let list_array = ListArray::from(list_data);
786
787 let values = list_array.values();
788 assert_eq!(value_data, values.to_data());
789 assert_eq!(DataType::Int32, list_array.value_type());
790 assert_eq!(9, list_array.len());
791 assert_eq!(4, list_array.null_count());
792 assert_eq!(2, list_array.value_offsets()[3]);
793 assert_eq!(2, list_array.value_length(3));
794
795 let sliced_array = list_array.slice(1, 6);
796 assert_eq!(6, sliced_array.len());
797 assert_eq!(3, sliced_array.null_count());
798
799 for i in 0..sliced_array.len() {
800 if bit_util::get_bit(&null_bits, 1 + i) {
801 assert!(sliced_array.is_valid(i));
802 } else {
803 assert!(sliced_array.is_null(i));
804 }
805 }
806
807 let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
809 assert_eq!(2, sliced_list_array.value_offsets()[2]);
810 assert_eq!(2, sliced_list_array.value_length(2));
811 assert_eq!(4, sliced_list_array.value_offsets()[3]);
812 assert_eq!(2, sliced_list_array.value_length(3));
813 assert_eq!(6, sliced_list_array.value_offsets()[5]);
814 assert_eq!(3, sliced_list_array.value_length(5));
815 }
816
817 #[test]
818 fn test_large_list_array_slice() {
819 let value_data = ArrayData::builder(DataType::Int32)
821 .len(10)
822 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
823 .build()
824 .unwrap();
825
826 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
829 let mut null_bits: [u8; 2] = [0; 2];
831 bit_util::set_bit(&mut null_bits, 0);
832 bit_util::set_bit(&mut null_bits, 3);
833 bit_util::set_bit(&mut null_bits, 4);
834 bit_util::set_bit(&mut null_bits, 6);
835 bit_util::set_bit(&mut null_bits, 8);
836
837 let list_data_type = DataType::new_large_list(DataType::Int32, false);
839 let list_data = ArrayData::builder(list_data_type)
840 .len(9)
841 .add_buffer(value_offsets)
842 .add_child_data(value_data.clone())
843 .null_bit_buffer(Some(Buffer::from(null_bits)))
844 .build()
845 .unwrap();
846 let list_array = LargeListArray::from(list_data);
847
848 let values = list_array.values();
849 assert_eq!(value_data, values.to_data());
850 assert_eq!(DataType::Int32, list_array.value_type());
851 assert_eq!(9, list_array.len());
852 assert_eq!(4, list_array.null_count());
853 assert_eq!(2, list_array.value_offsets()[3]);
854 assert_eq!(2, list_array.value_length(3));
855
856 let sliced_array = list_array.slice(1, 6);
857 assert_eq!(6, sliced_array.len());
858 assert_eq!(3, sliced_array.null_count());
859
860 for i in 0..sliced_array.len() {
861 if bit_util::get_bit(&null_bits, 1 + i) {
862 assert!(sliced_array.is_valid(i));
863 } else {
864 assert!(sliced_array.is_null(i));
865 }
866 }
867
868 let sliced_list_array = sliced_array
870 .as_any()
871 .downcast_ref::<LargeListArray>()
872 .unwrap();
873 assert_eq!(2, sliced_list_array.value_offsets()[2]);
874 assert_eq!(2, sliced_list_array.value_length(2));
875 assert_eq!(4, sliced_list_array.value_offsets()[3]);
876 assert_eq!(2, sliced_list_array.value_length(3));
877 assert_eq!(6, sliced_list_array.value_offsets()[5]);
878 assert_eq!(3, sliced_list_array.value_length(5));
879 }
880
881 #[test]
882 #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")]
883 fn test_list_array_index_out_of_bound() {
884 let value_data = ArrayData::builder(DataType::Int32)
886 .len(10)
887 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
888 .build()
889 .unwrap();
890
891 let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]);
894 let mut null_bits: [u8; 2] = [0; 2];
896 bit_util::set_bit(&mut null_bits, 0);
897 bit_util::set_bit(&mut null_bits, 3);
898 bit_util::set_bit(&mut null_bits, 4);
899 bit_util::set_bit(&mut null_bits, 6);
900 bit_util::set_bit(&mut null_bits, 8);
901
902 let list_data_type = DataType::new_large_list(DataType::Int32, false);
904 let list_data = ArrayData::builder(list_data_type)
905 .len(9)
906 .add_buffer(value_offsets)
907 .add_child_data(value_data)
908 .null_bit_buffer(Some(Buffer::from(null_bits)))
909 .build()
910 .unwrap();
911 let list_array = LargeListArray::from(list_data);
912 assert_eq!(9, list_array.len());
913
914 list_array.value(10);
915 }
916 #[test]
917 #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
918 #[cfg(not(feature = "force_validate"))]
921 fn test_list_array_invalid_buffer_len() {
922 let value_data = unsafe {
923 ArrayData::builder(DataType::Int32)
924 .len(8)
925 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
926 .build_unchecked()
927 };
928 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
929 let list_data = unsafe {
930 ArrayData::builder(list_data_type)
931 .len(3)
932 .add_child_data(value_data)
933 .build_unchecked()
934 };
935 drop(ListArray::from(list_data));
936 }
937
938 #[test]
939 #[should_panic(expected = "ListArray should contain a single child array (values array)")]
940 #[cfg(not(feature = "force_validate"))]
943 fn test_list_array_invalid_child_array_len() {
944 let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
945 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
946 let list_data = unsafe {
947 ArrayData::builder(list_data_type)
948 .len(3)
949 .add_buffer(value_offsets)
950 .build_unchecked()
951 };
952 drop(ListArray::from(list_data));
953 }
954
955 #[test]
956 #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
957 fn test_from_array_data_validation() {
958 let mut builder = ListBuilder::new(Int32Builder::new());
959 builder.values().append_value(1);
960 builder.append(true);
961 let array = builder.finish();
962 let _ = LargeListArray::from(array.into_data());
963 }
964
965 #[test]
966 fn test_list_array_offsets_need_not_start_at_zero() {
967 let value_data = ArrayData::builder(DataType::Int32)
968 .len(8)
969 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
970 .build()
971 .unwrap();
972
973 let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
974
975 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
976 let list_data = ArrayData::builder(list_data_type)
977 .len(3)
978 .add_buffer(value_offsets)
979 .add_child_data(value_data)
980 .build()
981 .unwrap();
982
983 let list_array = ListArray::from(list_data);
984 assert_eq!(list_array.value_length(0), 0);
985 assert_eq!(list_array.value_length(1), 3);
986 assert_eq!(list_array.value_length(2), 2);
987 }
988
989 #[test]
990 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
991 #[cfg(not(feature = "force_validate"))]
994 fn test_primitive_array_alignment() {
995 let buf = Buffer::from_slice_ref([0_u64]);
996 let buf2 = buf.slice(1);
997 let array_data = unsafe {
998 ArrayData::builder(DataType::Int32)
999 .add_buffer(buf2)
1000 .build_unchecked()
1001 };
1002 drop(Int32Array::from(array_data));
1003 }
1004
1005 #[test]
1006 #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
1007 #[cfg(not(feature = "force_validate"))]
1010 fn test_list_array_alignment() {
1011 let buf = Buffer::from_slice_ref([0_u64]);
1012 let buf2 = buf.slice(1);
1013
1014 let values: [i32; 8] = [0; 8];
1015 let value_data = unsafe {
1016 ArrayData::builder(DataType::Int32)
1017 .add_buffer(Buffer::from_slice_ref(values))
1018 .build_unchecked()
1019 };
1020
1021 let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
1022 let list_data = unsafe {
1023 ArrayData::builder(list_data_type)
1024 .add_buffer(buf2)
1025 .add_child_data(value_data)
1026 .build_unchecked()
1027 };
1028 drop(ListArray::from(list_data));
1029 }
1030
1031 #[test]
1032 fn list_array_equality() {
1033 fn do_comparison(
1035 lhs_data: Vec<Option<Vec<Option<i32>>>>,
1036 rhs_data: Vec<Option<Vec<Option<i32>>>>,
1037 should_equal: bool,
1038 ) {
1039 let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone());
1040 let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone());
1041 assert_eq!(lhs == rhs, should_equal);
1042
1043 let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data);
1044 let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data);
1045 assert_eq!(lhs == rhs, should_equal);
1046 }
1047
1048 do_comparison(
1049 vec![
1050 Some(vec![Some(0), Some(1), Some(2)]),
1051 None,
1052 Some(vec![Some(3), None, Some(5)]),
1053 Some(vec![Some(6), Some(7)]),
1054 ],
1055 vec![
1056 Some(vec![Some(0), Some(1), Some(2)]),
1057 None,
1058 Some(vec![Some(3), None, Some(5)]),
1059 Some(vec![Some(6), Some(7)]),
1060 ],
1061 true,
1062 );
1063
1064 do_comparison(
1065 vec![
1066 None,
1067 None,
1068 Some(vec![Some(3), None, Some(5)]),
1069 Some(vec![Some(6), Some(7)]),
1070 ],
1071 vec![
1072 Some(vec![Some(0), Some(1), Some(2)]),
1073 None,
1074 Some(vec![Some(3), None, Some(5)]),
1075 Some(vec![Some(6), Some(7)]),
1076 ],
1077 false,
1078 );
1079
1080 do_comparison(
1081 vec![
1082 None,
1083 None,
1084 Some(vec![Some(3), None, Some(5)]),
1085 Some(vec![Some(6), Some(7)]),
1086 ],
1087 vec![
1088 None,
1089 None,
1090 Some(vec![Some(3), None, Some(5)]),
1091 Some(vec![Some(0), Some(0)]),
1092 ],
1093 false,
1094 );
1095
1096 do_comparison(
1097 vec![None, None, Some(vec![Some(1)])],
1098 vec![None, None, Some(vec![Some(2)])],
1099 false,
1100 );
1101 }
1102
1103 #[test]
1104 fn test_empty_offsets() {
1105 let f = Arc::new(Field::new("element", DataType::Int32, true));
1106 let string = ListArray::from(
1107 ArrayData::builder(DataType::List(f.clone()))
1108 .buffers(vec![Buffer::from(&[])])
1109 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1110 .build()
1111 .unwrap(),
1112 );
1113 assert_eq!(string.value_offsets(), &[0]);
1114 let string = LargeListArray::from(
1115 ArrayData::builder(DataType::LargeList(f))
1116 .buffers(vec![Buffer::from(&[])])
1117 .add_child_data(ArrayData::new_empty(&DataType::Int32))
1118 .build()
1119 .unwrap(),
1120 );
1121 assert_eq!(string.len(), 0);
1122 assert_eq!(string.value_offsets(), &[0]);
1123 }
1124
1125 #[test]
1126 fn test_try_new() {
1127 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1128 let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
1129 let values = Arc::new(values) as ArrayRef;
1130
1131 let field = Arc::new(Field::new("element", DataType::Int32, false));
1132 ListArray::new(field.clone(), offsets.clone(), values.clone(), None);
1133
1134 let nulls = NullBuffer::new_null(3);
1135 ListArray::new(field.clone(), offsets, values.clone(), Some(nulls));
1136
1137 let nulls = NullBuffer::new_null(3);
1138 let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
1139 let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
1140 .unwrap_err();
1141
1142 assert_eq!(
1143 err.to_string(),
1144 "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3"
1145 );
1146
1147 let field = Arc::new(Field::new("element", DataType::Int64, false));
1148 let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
1149 .unwrap_err();
1150
1151 assert_eq!(
1152 err.to_string(),
1153 "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\""
1154 );
1155
1156 let nulls = NullBuffer::new_null(7);
1157 let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
1158 let values = Arc::new(values);
1159
1160 let err =
1161 LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
1162
1163 assert_eq!(
1164 err.to_string(),
1165 "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls"
1166 );
1167
1168 let field = Arc::new(Field::new("element", DataType::Int64, true));
1169 LargeListArray::new(field.clone(), offsets.clone(), values, None);
1170
1171 let values = Int64Array::new(vec![0; 2].into(), None);
1172 let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
1173
1174 assert_eq!(
1175 err.to_string(),
1176 "Invalid argument error: Max offset of 5 exceeds length of values 2"
1177 );
1178 }
1179
1180 #[test]
1181 fn test_from_fixed_size_list() {
1182 let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
1183 builder.values().append_slice(&[1, 2, 3]);
1184 builder.append(true);
1185 builder.values().append_slice(&[0, 0, 0]);
1186 builder.append(false);
1187 builder.values().append_slice(&[4, 5, 6]);
1188 builder.append(true);
1189 let list: ListArray = builder.finish().into();
1190
1191 let values: Vec<_> = list
1192 .iter()
1193 .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
1194 .collect();
1195 assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])])
1196 }
1197
1198 #[test]
1199 fn test_nullable_union() {
1200 let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
1201 let mut builder = UnionBuilder::new_dense();
1202 builder.append::<Int32Type>("a", 1).unwrap();
1203 builder.append::<Int32Type>("b", 2).unwrap();
1204 builder.append::<Int32Type>("b", 3).unwrap();
1205 builder.append::<Int32Type>("a", 4).unwrap();
1206 builder.append::<Int32Type>("a", 5).unwrap();
1207 let values = builder.build().unwrap();
1208 let field = Arc::new(Field::new("element", values.data_type().clone(), false));
1209 ListArray::new(field.clone(), offsets, Arc::new(values), None);
1210 }
1211}