1mod binary_array;
21
22use crate::types::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer};
24use arrow_data::ArrayData;
25use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26use std::any::Any;
27use std::sync::Arc;
28
29pub use binary_array::*;
30
31mod boolean_array;
32pub use boolean_array::*;
33
34mod byte_array;
35pub use byte_array::*;
36
37mod dictionary_array;
38pub use dictionary_array::*;
39
40mod fixed_size_binary_array;
41pub use fixed_size_binary_array::*;
42
43mod fixed_size_list_array;
44pub use fixed_size_list_array::*;
45
46mod list_array;
47pub use list_array::*;
48
49mod map_array;
50pub use map_array::*;
51
52mod null_array;
53pub use null_array::*;
54
55mod primitive_array;
56pub use primitive_array::*;
57
58mod string_array;
59pub use string_array::*;
60
61mod struct_array;
62pub use struct_array::*;
63
64mod union_array;
65pub use union_array::*;
66
67mod run_array;
68
69pub use run_array::*;
70
71mod byte_view_array;
72
73pub use byte_view_array::*;
74
75mod list_view_array;
76
77pub use list_view_array::*;
78
79use crate::iterator::ArrayIter;
80
81mod private {
82 pub trait Sealed {}
84
85 impl<T: Sealed> Sealed for &T {}
86}
87
88pub trait Array: std::fmt::Debug + Send + Sync + private::Sealed {
93 fn as_any(&self) -> &dyn Any;
116
117 fn to_data(&self) -> ArrayData;
119
120 fn into_data(self) -> ArrayData;
124
125 fn data_type(&self) -> &DataType;
138
139 fn slice(&self, offset: usize, length: usize) -> ArrayRef;
153
154 fn len(&self) -> usize;
166
167 fn is_empty(&self) -> bool;
179
180 fn shrink_to_fit(&mut self) {}
185
186 fn offset(&self) -> usize;
202
203 fn nulls(&self) -> Option<&NullBuffer>;
216
217 fn logical_nulls(&self) -> Option<NullBuffer> {
236 self.nulls().cloned()
237 }
238
239 fn is_null(&self, index: usize) -> bool {
261 self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
262 }
263
264 fn is_valid(&self, index: usize) -> bool {
278 !self.is_null(index)
279 }
280
281 fn null_count(&self) -> usize {
297 self.nulls().map(|n| n.null_count()).unwrap_or_default()
298 }
299
300 fn logical_null_count(&self) -> usize {
317 self.logical_nulls()
318 .map(|n| n.null_count())
319 .unwrap_or_default()
320 }
321
322 fn is_nullable(&self) -> bool {
336 self.logical_null_count() != 0
337 }
338
339 fn get_buffer_memory_size(&self) -> usize;
344
345 fn get_array_memory_size(&self) -> usize;
349}
350
351pub type ArrayRef = Arc<dyn Array>;
353
354impl private::Sealed for ArrayRef {}
355
356impl Array for ArrayRef {
358 fn as_any(&self) -> &dyn Any {
359 self.as_ref().as_any()
360 }
361
362 fn to_data(&self) -> ArrayData {
363 self.as_ref().to_data()
364 }
365
366 fn into_data(self) -> ArrayData {
367 self.to_data()
368 }
369
370 fn data_type(&self) -> &DataType {
371 self.as_ref().data_type()
372 }
373
374 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
375 self.as_ref().slice(offset, length)
376 }
377
378 fn len(&self) -> usize {
379 self.as_ref().len()
380 }
381
382 fn is_empty(&self) -> bool {
383 self.as_ref().is_empty()
384 }
385
386 fn shrink_to_fit(&mut self) {
388 if let Some(slf) = Arc::get_mut(self) {
389 slf.shrink_to_fit();
390 } else {
391 }
393 }
394
395 fn offset(&self) -> usize {
396 self.as_ref().offset()
397 }
398
399 fn nulls(&self) -> Option<&NullBuffer> {
400 self.as_ref().nulls()
401 }
402
403 fn logical_nulls(&self) -> Option<NullBuffer> {
404 self.as_ref().logical_nulls()
405 }
406
407 fn is_null(&self, index: usize) -> bool {
408 self.as_ref().is_null(index)
409 }
410
411 fn is_valid(&self, index: usize) -> bool {
412 self.as_ref().is_valid(index)
413 }
414
415 fn null_count(&self) -> usize {
416 self.as_ref().null_count()
417 }
418
419 fn logical_null_count(&self) -> usize {
420 self.as_ref().logical_null_count()
421 }
422
423 fn is_nullable(&self) -> bool {
424 self.as_ref().is_nullable()
425 }
426
427 fn get_buffer_memory_size(&self) -> usize {
428 self.as_ref().get_buffer_memory_size()
429 }
430
431 fn get_array_memory_size(&self) -> usize {
432 self.as_ref().get_array_memory_size()
433 }
434}
435
436impl<T: Array> Array for &T {
437 fn as_any(&self) -> &dyn Any {
438 T::as_any(self)
439 }
440
441 fn to_data(&self) -> ArrayData {
442 T::to_data(self)
443 }
444
445 fn into_data(self) -> ArrayData {
446 self.to_data()
447 }
448
449 fn data_type(&self) -> &DataType {
450 T::data_type(self)
451 }
452
453 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
454 T::slice(self, offset, length)
455 }
456
457 fn len(&self) -> usize {
458 T::len(self)
459 }
460
461 fn is_empty(&self) -> bool {
462 T::is_empty(self)
463 }
464
465 fn offset(&self) -> usize {
466 T::offset(self)
467 }
468
469 fn nulls(&self) -> Option<&NullBuffer> {
470 T::nulls(self)
471 }
472
473 fn logical_nulls(&self) -> Option<NullBuffer> {
474 T::logical_nulls(self)
475 }
476
477 fn is_null(&self, index: usize) -> bool {
478 T::is_null(self, index)
479 }
480
481 fn is_valid(&self, index: usize) -> bool {
482 T::is_valid(self, index)
483 }
484
485 fn null_count(&self) -> usize {
486 T::null_count(self)
487 }
488
489 fn logical_null_count(&self) -> usize {
490 T::logical_null_count(self)
491 }
492
493 fn is_nullable(&self) -> bool {
494 T::is_nullable(self)
495 }
496
497 fn get_buffer_memory_size(&self) -> usize {
498 T::get_buffer_memory_size(self)
499 }
500
501 fn get_array_memory_size(&self) -> usize {
502 T::get_array_memory_size(self)
503 }
504}
505
506pub trait ArrayAccessor: Array {
587 type Item: Send + Sync;
589
590 fn value(&self, index: usize) -> Self::Item;
594
595 unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
599}
600
601pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
609 fn is_ascii(&self) -> bool;
611
612 fn iter(&self) -> ArrayIter<Self>;
614}
615
616impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
617 fn is_ascii(&self) -> bool {
618 GenericStringArray::<O>::is_ascii(self)
619 }
620
621 fn iter(&self) -> ArrayIter<Self> {
622 GenericStringArray::<O>::iter(self)
623 }
624}
625impl<'a> StringArrayType<'a> for &'a StringViewArray {
626 fn is_ascii(&self) -> bool {
627 StringViewArray::is_ascii(self)
628 }
629
630 fn iter(&self) -> ArrayIter<Self> {
631 StringViewArray::iter(self)
632 }
633}
634
635pub trait BinaryArrayType<'a>: ArrayAccessor<Item = &'a [u8]> + Sized {
644 fn iter(&self) -> ArrayIter<Self>;
646}
647
648impl<'a, O: OffsetSizeTrait> BinaryArrayType<'a> for &'a GenericBinaryArray<O> {
649 fn iter(&self) -> ArrayIter<Self> {
650 GenericBinaryArray::<O>::iter(self)
651 }
652}
653impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
654 fn iter(&self) -> ArrayIter<Self> {
655 BinaryViewArray::iter(self)
656 }
657}
658impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
659 fn iter(&self) -> ArrayIter<Self> {
660 FixedSizeBinaryArray::iter(self)
661 }
662}
663
664impl PartialEq for dyn Array + '_ {
665 fn eq(&self, other: &Self) -> bool {
666 self.to_data().eq(&other.to_data())
667 }
668}
669
670impl<T: Array> PartialEq<T> for dyn Array + '_ {
671 fn eq(&self, other: &T) -> bool {
672 self.to_data().eq(&other.to_data())
673 }
674}
675
676impl PartialEq for NullArray {
677 fn eq(&self, other: &NullArray) -> bool {
678 self.to_data().eq(&other.to_data())
679 }
680}
681
682impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
683 fn eq(&self, other: &PrimitiveArray<T>) -> bool {
684 self.to_data().eq(&other.to_data())
685 }
686}
687
688impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
689 fn eq(&self, other: &Self) -> bool {
690 self.to_data().eq(&other.to_data())
691 }
692}
693
694impl PartialEq for BooleanArray {
695 fn eq(&self, other: &BooleanArray) -> bool {
696 self.to_data().eq(&other.to_data())
697 }
698}
699
700impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
701 fn eq(&self, other: &Self) -> bool {
702 self.to_data().eq(&other.to_data())
703 }
704}
705
706impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
707 fn eq(&self, other: &Self) -> bool {
708 self.to_data().eq(&other.to_data())
709 }
710}
711
712impl PartialEq for FixedSizeBinaryArray {
713 fn eq(&self, other: &Self) -> bool {
714 self.to_data().eq(&other.to_data())
715 }
716}
717
718impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
719 fn eq(&self, other: &Self) -> bool {
720 self.to_data().eq(&other.to_data())
721 }
722}
723
724impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
725 fn eq(&self, other: &Self) -> bool {
726 self.to_data().eq(&other.to_data())
727 }
728}
729
730impl PartialEq for MapArray {
731 fn eq(&self, other: &Self) -> bool {
732 self.to_data().eq(&other.to_data())
733 }
734}
735
736impl PartialEq for FixedSizeListArray {
737 fn eq(&self, other: &Self) -> bool {
738 self.to_data().eq(&other.to_data())
739 }
740}
741
742impl PartialEq for StructArray {
743 fn eq(&self, other: &Self) -> bool {
744 self.to_data().eq(&other.to_data())
745 }
746}
747
748impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
749 fn eq(&self, other: &Self) -> bool {
750 self.to_data().eq(&other.to_data())
751 }
752}
753
754impl<R: RunEndIndexType> PartialEq for RunArray<R> {
755 fn eq(&self, other: &Self) -> bool {
756 self.to_data().eq(&other.to_data())
757 }
758}
759
760pub fn make_array(data: ArrayData) -> ArrayRef {
763 match data.data_type() {
764 DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
765 DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
766 DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
767 DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
768 DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
769 DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
770 DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
771 DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
772 DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
773 DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
774 DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
775 DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
776 DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
777 DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
778 DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
779 DataType::Time32(TimeUnit::Millisecond) => {
780 Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
781 }
782 DataType::Time64(TimeUnit::Microsecond) => {
783 Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
784 }
785 DataType::Time64(TimeUnit::Nanosecond) => {
786 Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
787 }
788 DataType::Timestamp(TimeUnit::Second, _) => {
789 Arc::new(TimestampSecondArray::from(data)) as ArrayRef
790 }
791 DataType::Timestamp(TimeUnit::Millisecond, _) => {
792 Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
793 }
794 DataType::Timestamp(TimeUnit::Microsecond, _) => {
795 Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
796 }
797 DataType::Timestamp(TimeUnit::Nanosecond, _) => {
798 Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
799 }
800 DataType::Interval(IntervalUnit::YearMonth) => {
801 Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
802 }
803 DataType::Interval(IntervalUnit::DayTime) => {
804 Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
805 }
806 DataType::Interval(IntervalUnit::MonthDayNano) => {
807 Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
808 }
809 DataType::Duration(TimeUnit::Second) => {
810 Arc::new(DurationSecondArray::from(data)) as ArrayRef
811 }
812 DataType::Duration(TimeUnit::Millisecond) => {
813 Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
814 }
815 DataType::Duration(TimeUnit::Microsecond) => {
816 Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
817 }
818 DataType::Duration(TimeUnit::Nanosecond) => {
819 Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
820 }
821 DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
822 DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
823 DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
824 DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
825 DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
826 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
827 DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
828 DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
829 DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
830 DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
831 DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
832 DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
833 DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
834 DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
835 DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
836 DataType::Dictionary(key_type, _) => match key_type.as_ref() {
837 DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
838 DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
839 DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
840 DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
841 DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
842 DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
843 DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
844 DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
845 dt => unimplemented!("Unexpected dictionary key type {dt}"),
846 },
847 DataType::RunEndEncoded(run_ends_type, _) => match run_ends_type.data_type() {
848 DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
849 DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
850 DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
851 dt => unimplemented!("Unexpected data type for run_ends array {dt}"),
852 },
853 DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
854 DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
855 DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
856 DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
857 DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
858 dt => unimplemented!("Unexpected data type {dt}"),
859 }
860}
861
862pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
875 let data = ArrayData::new_empty(data_type);
876 make_array(data)
877}
878
879pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
893 make_array(ArrayData::new_null(data_type, length))
894}
895
896unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
902 match data.is_empty() && data.buffers()[0].is_empty() {
903 true => OffsetBuffer::new_empty(),
904 false => {
905 let buffer =
906 ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len() + 1);
907 unsafe { OffsetBuffer::new_unchecked(buffer) }
910 }
911 }
912}
913
914fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
916where
917 A: Array,
918 F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
919{
920 let head = std::cmp::min(10, array.len());
921
922 for i in 0..head {
923 if array.is_null(i) {
924 writeln!(f, " null,")?;
925 } else {
926 write!(f, " ")?;
927 print_item(array, i, f)?;
928 writeln!(f, ",")?;
929 }
930 }
931 if array.len() > 10 {
932 if array.len() > 20 {
933 writeln!(f, " ...{} elements...,", array.len() - 20)?;
934 }
935
936 let tail = std::cmp::max(head, array.len() - 10);
937
938 for i in tail..array.len() {
939 if array.is_null(i) {
940 writeln!(f, " null,")?;
941 } else {
942 write!(f, " ")?;
943 print_item(array, i, f)?;
944 writeln!(f, ",")?;
945 }
946 }
947 }
948 Ok(())
949}
950
951#[cfg(test)]
952mod tests {
953 use super::*;
954 use crate::cast::{as_union_array, downcast_array};
955 use crate::downcast_run_array;
956 use arrow_buffer::MutableBuffer;
957 use arrow_schema::{Field, Fields, UnionFields, UnionMode};
958
959 #[test]
960 fn test_empty_primitive() {
961 let array = new_empty_array(&DataType::Int32);
962 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
963 assert_eq!(a.len(), 0);
964 let expected: &[i32] = &[];
965 assert_eq!(a.values(), expected);
966 }
967
968 #[test]
969 fn test_empty_variable_sized() {
970 let array = new_empty_array(&DataType::Utf8);
971 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
972 assert_eq!(a.len(), 0);
973 assert_eq!(a.value_offsets()[0], 0i32);
974 }
975
976 #[test]
977 fn test_empty_list_primitive() {
978 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
979 let array = new_empty_array(&data_type);
980 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
981 assert_eq!(a.len(), 0);
982 assert_eq!(a.value_offsets()[0], 0i32);
983 }
984
985 #[test]
986 fn test_null_boolean() {
987 let array = new_null_array(&DataType::Boolean, 9);
988 let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
989 assert_eq!(a.len(), 9);
990 for i in 0..9 {
991 assert!(a.is_null(i));
992 }
993 }
994
995 #[test]
996 fn test_null_primitive() {
997 let array = new_null_array(&DataType::Int32, 9);
998 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
999 assert_eq!(a.len(), 9);
1000 for i in 0..9 {
1001 assert!(a.is_null(i));
1002 }
1003 }
1004
1005 #[test]
1006 fn test_null_struct() {
1007 let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
1010 let array = new_null_array(&struct_type, 9);
1011
1012 let a = array.as_any().downcast_ref::<StructArray>().unwrap();
1013 assert_eq!(a.len(), 9);
1014 assert_eq!(a.column(0).len(), 9);
1015 for i in 0..9 {
1016 assert!(a.is_null(i));
1017 }
1018
1019 a.slice(0, 5);
1021 }
1022
1023 #[test]
1024 fn test_null_variable_sized() {
1025 let array = new_null_array(&DataType::Utf8, 9);
1026 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1027 assert_eq!(a.len(), 9);
1028 assert_eq!(a.value_offsets()[9], 0i32);
1029 for i in 0..9 {
1030 assert!(a.is_null(i));
1031 }
1032 }
1033
1034 #[test]
1035 fn test_null_list_primitive() {
1036 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
1037 let array = new_null_array(&data_type, 9);
1038 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1039 assert_eq!(a.len(), 9);
1040 assert_eq!(a.value_offsets()[9], 0i32);
1041 for i in 0..9 {
1042 assert!(a.is_null(i));
1043 }
1044 }
1045
1046 #[test]
1047 fn test_null_map() {
1048 let data_type = DataType::Map(
1049 Arc::new(Field::new(
1050 "entry",
1051 DataType::Struct(Fields::from(vec![
1052 Field::new("key", DataType::Utf8, false),
1053 Field::new("value", DataType::Int32, true),
1054 ])),
1055 false,
1056 )),
1057 false,
1058 );
1059 let array = new_null_array(&data_type, 9);
1060 let a = array.as_any().downcast_ref::<MapArray>().unwrap();
1061 assert_eq!(a.len(), 9);
1062 assert_eq!(a.value_offsets()[9], 0i32);
1063 for i in 0..9 {
1064 assert!(a.is_null(i));
1065 }
1066 }
1067
1068 #[test]
1069 fn test_null_dictionary() {
1070 let values =
1071 vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1072
1073 let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1074 let array = Arc::new(array) as ArrayRef;
1075
1076 let null_array = new_null_array(array.data_type(), 9);
1077 assert_eq!(&array, &null_array);
1078 assert_eq!(
1079 array.to_data().buffers()[0].len(),
1080 null_array.to_data().buffers()[0].len()
1081 );
1082 }
1083
1084 #[test]
1085 fn test_null_union() {
1086 for mode in [UnionMode::Sparse, UnionMode::Dense] {
1087 let data_type = DataType::Union(
1088 UnionFields::try_new(
1089 vec![2, 1],
1090 vec![
1091 Field::new("foo", DataType::Int32, true),
1092 Field::new("bar", DataType::Int64, true),
1093 ],
1094 )
1095 .unwrap(),
1096 mode,
1097 );
1098 let array = new_null_array(&data_type, 4);
1099
1100 let array = as_union_array(array.as_ref());
1101 assert_eq!(array.len(), 4);
1102 assert_eq!(array.null_count(), 0);
1103 assert_eq!(array.logical_null_count(), 4);
1104
1105 for i in 0..4 {
1106 let a = array.value(i);
1107 assert_eq!(a.len(), 1);
1108 assert_eq!(a.null_count(), 1);
1109 assert_eq!(a.logical_null_count(), 1);
1110 assert!(a.is_null(0))
1111 }
1112
1113 array.to_data().validate_full().unwrap();
1114 }
1115 }
1116
1117 #[test]
1118 #[allow(unused_parens)]
1119 fn test_null_runs() {
1120 for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1121 let data_type = DataType::RunEndEncoded(
1122 Arc::new(Field::new("run_ends", r, false)),
1123 Arc::new(Field::new("values", DataType::Utf8, true)),
1124 );
1125
1126 let array = new_null_array(&data_type, 4);
1127 let array = array.as_ref();
1128
1129 downcast_run_array! {
1130 array => {
1131 assert_eq!(array.len(), 4);
1132 assert_eq!(array.null_count(), 0);
1133 assert_eq!(array.logical_null_count(), 4);
1134 assert_eq!(array.values().len(), 1);
1135 assert_eq!(array.values().null_count(), 1);
1136 assert_eq!(array.run_ends().len(), 4);
1137 assert_eq!(array.run_ends().values(), &[4]);
1138
1139 let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1140 assert_eq!(idx, &[0,0,0,0]);
1141 }
1142 d => unreachable!("{d}")
1143 }
1144 }
1145 }
1146
1147 #[test]
1148 fn test_null_fixed_size_binary() {
1149 for size in [1, 2, 7] {
1150 let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1151 let array = array
1152 .as_ref()
1153 .as_any()
1154 .downcast_ref::<FixedSizeBinaryArray>()
1155 .unwrap();
1156
1157 assert_eq!(array.len(), 6);
1158 assert_eq!(array.null_count(), 6);
1159 assert_eq!(array.logical_null_count(), 6);
1160 array.iter().for_each(|x| assert!(x.is_none()));
1161 }
1162 }
1163
1164 #[test]
1165 fn test_memory_size_null() {
1166 let null_arr = NullArray::new(32);
1167
1168 assert_eq!(0, null_arr.get_buffer_memory_size());
1169 assert_eq!(
1170 std::mem::size_of::<usize>(),
1171 null_arr.get_array_memory_size()
1172 );
1173 }
1174
1175 #[test]
1176 fn test_memory_size_primitive() {
1177 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1178 let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1179
1180 assert_eq!(
1182 arr.get_array_memory_size() - empty.get_array_memory_size(),
1183 128 * std::mem::size_of::<i64>()
1184 );
1185 }
1186
1187 #[test]
1188 fn test_memory_size_primitive_sliced() {
1189 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1190 let slice1 = arr.slice(0, 64);
1191 let slice2 = arr.slice(64, 64);
1192
1193 assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1195 assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1196 }
1197
1198 #[test]
1199 fn test_memory_size_primitive_nullable() {
1200 let arr: PrimitiveArray<Int64Type> = (0..128)
1201 .map(|i| if i % 20 == 0 { Some(i) } else { None })
1202 .collect();
1203 let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1204 ArrayData::builder(arr.data_type().clone())
1205 .add_buffer(MutableBuffer::new(0).into())
1206 .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1207 .build()
1208 .unwrap(),
1209 );
1210
1211 assert_eq!(
1215 std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1216 empty_with_bitmap.get_array_memory_size()
1217 );
1218
1219 assert_eq!(
1222 arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1223 128 * std::mem::size_of::<i64>() + 64
1224 );
1225 }
1226
1227 #[test]
1228 fn test_memory_size_dictionary() {
1229 let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1230 let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1231 (0..256).map(|i| (i % values.len()) as i16),
1232 );
1233
1234 let dict_data_type = DataType::Dictionary(
1235 Box::new(keys.data_type().clone()),
1236 Box::new(values.data_type().clone()),
1237 );
1238 let dict_data = keys
1239 .into_data()
1240 .into_builder()
1241 .data_type(dict_data_type)
1242 .child_data(vec![values.into_data()])
1243 .build()
1244 .unwrap();
1245
1246 let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1247 Box::new(DataType::Int16),
1248 Box::new(DataType::Int64),
1249 ));
1250
1251 let arr = DictionaryArray::<Int16Type>::from(dict_data);
1252 let empty = DictionaryArray::<Int16Type>::from(empty_data);
1253
1254 let expected_keys_size = 256 * std::mem::size_of::<i16>();
1255 assert_eq!(
1256 arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1257 expected_keys_size
1258 );
1259
1260 let expected_values_size = 16 * std::mem::size_of::<i64>();
1261 assert_eq!(
1262 arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1263 expected_values_size
1264 );
1265
1266 let expected_size = expected_keys_size + expected_values_size;
1267 assert_eq!(
1268 arr.get_array_memory_size() - empty.get_array_memory_size(),
1269 expected_size
1270 );
1271 }
1272
1273 fn compute_my_thing(arr: &dyn Array) -> bool {
1275 !arr.is_empty()
1276 }
1277
1278 #[test]
1279 fn test_array_ref_as_array() {
1280 let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1281
1282 assert!(compute_my_thing(&arr));
1284
1285 let arr: ArrayRef = Arc::new(arr);
1287 assert!(compute_my_thing(&arr));
1288 assert!(compute_my_thing(arr.as_ref()));
1289 }
1290
1291 #[test]
1292 fn test_downcast_array() {
1293 let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1294
1295 let boxed: ArrayRef = Arc::new(array);
1296 let array: Int32Array = downcast_array(&boxed);
1297
1298 let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1299 assert_eq!(array, expected);
1300 }
1301}