1mod binary_array;
21
22use crate::types::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer};
24use arrow_data::ArrayData;
25use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26use std::any::Any;
27use std::sync::Arc;
28
29pub use binary_array::*;
30
31mod boolean_array;
32pub use boolean_array::*;
33
34mod byte_array;
35pub use byte_array::*;
36
37mod dictionary_array;
38pub use dictionary_array::*;
39
40mod fixed_size_binary_array;
41pub use fixed_size_binary_array::*;
42
43mod fixed_size_list_array;
44pub use fixed_size_list_array::*;
45
46mod list_array;
47pub use list_array::*;
48
49mod map_array;
50pub use map_array::*;
51
52mod null_array;
53pub use null_array::*;
54
55mod primitive_array;
56pub use primitive_array::*;
57
58mod string_array;
59pub use string_array::*;
60
61mod struct_array;
62pub use struct_array::*;
63
64mod union_array;
65pub use union_array::*;
66
67mod run_array;
68
69pub use run_array::*;
70
71mod byte_view_array;
72
73pub use byte_view_array::*;
74
75mod list_view_array;
76
77pub use list_view_array::*;
78
79use crate::iterator::ArrayIter;
80
81pub trait Array: std::fmt::Debug + Send + Sync {
83 fn as_any(&self) -> &dyn Any;
106
107 fn to_data(&self) -> ArrayData;
109
110 fn into_data(self) -> ArrayData;
114
115 fn data_type(&self) -> &DataType;
128
129 fn slice(&self, offset: usize, length: usize) -> ArrayRef;
143
144 fn len(&self) -> usize;
156
157 fn is_empty(&self) -> bool;
169
170 fn shrink_to_fit(&mut self) {}
175
176 fn offset(&self) -> usize;
192
193 fn nulls(&self) -> Option<&NullBuffer>;
206
207 fn logical_nulls(&self) -> Option<NullBuffer> {
226 self.nulls().cloned()
227 }
228
229 fn is_null(&self, index: usize) -> bool {
251 self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
252 }
253
254 fn is_valid(&self, index: usize) -> bool {
268 !self.is_null(index)
269 }
270
271 fn null_count(&self) -> usize {
287 self.nulls().map(|n| n.null_count()).unwrap_or_default()
288 }
289
290 fn logical_null_count(&self) -> usize {
307 self.logical_nulls()
308 .map(|n| n.null_count())
309 .unwrap_or_default()
310 }
311
312 fn is_nullable(&self) -> bool {
326 self.null_count() != 0
328 }
329
330 fn get_buffer_memory_size(&self) -> usize;
335
336 fn get_array_memory_size(&self) -> usize;
340}
341
342pub type ArrayRef = Arc<dyn Array>;
344
345impl Array for ArrayRef {
347 fn as_any(&self) -> &dyn Any {
348 self.as_ref().as_any()
349 }
350
351 fn to_data(&self) -> ArrayData {
352 self.as_ref().to_data()
353 }
354
355 fn into_data(self) -> ArrayData {
356 self.to_data()
357 }
358
359 fn data_type(&self) -> &DataType {
360 self.as_ref().data_type()
361 }
362
363 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
364 self.as_ref().slice(offset, length)
365 }
366
367 fn len(&self) -> usize {
368 self.as_ref().len()
369 }
370
371 fn is_empty(&self) -> bool {
372 self.as_ref().is_empty()
373 }
374
375 fn shrink_to_fit(&mut self) {
377 if let Some(slf) = Arc::get_mut(self) {
378 slf.shrink_to_fit();
379 } else {
380 }
382 }
383
384 fn offset(&self) -> usize {
385 self.as_ref().offset()
386 }
387
388 fn nulls(&self) -> Option<&NullBuffer> {
389 self.as_ref().nulls()
390 }
391
392 fn logical_nulls(&self) -> Option<NullBuffer> {
393 self.as_ref().logical_nulls()
394 }
395
396 fn is_null(&self, index: usize) -> bool {
397 self.as_ref().is_null(index)
398 }
399
400 fn is_valid(&self, index: usize) -> bool {
401 self.as_ref().is_valid(index)
402 }
403
404 fn null_count(&self) -> usize {
405 self.as_ref().null_count()
406 }
407
408 fn logical_null_count(&self) -> usize {
409 self.as_ref().logical_null_count()
410 }
411
412 fn is_nullable(&self) -> bool {
413 self.as_ref().is_nullable()
414 }
415
416 fn get_buffer_memory_size(&self) -> usize {
417 self.as_ref().get_buffer_memory_size()
418 }
419
420 fn get_array_memory_size(&self) -> usize {
421 self.as_ref().get_array_memory_size()
422 }
423}
424
425impl<T: Array> Array for &T {
426 fn as_any(&self) -> &dyn Any {
427 T::as_any(self)
428 }
429
430 fn to_data(&self) -> ArrayData {
431 T::to_data(self)
432 }
433
434 fn into_data(self) -> ArrayData {
435 self.to_data()
436 }
437
438 fn data_type(&self) -> &DataType {
439 T::data_type(self)
440 }
441
442 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
443 T::slice(self, offset, length)
444 }
445
446 fn len(&self) -> usize {
447 T::len(self)
448 }
449
450 fn is_empty(&self) -> bool {
451 T::is_empty(self)
452 }
453
454 fn offset(&self) -> usize {
455 T::offset(self)
456 }
457
458 fn nulls(&self) -> Option<&NullBuffer> {
459 T::nulls(self)
460 }
461
462 fn logical_nulls(&self) -> Option<NullBuffer> {
463 T::logical_nulls(self)
464 }
465
466 fn is_null(&self, index: usize) -> bool {
467 T::is_null(self, index)
468 }
469
470 fn is_valid(&self, index: usize) -> bool {
471 T::is_valid(self, index)
472 }
473
474 fn null_count(&self) -> usize {
475 T::null_count(self)
476 }
477
478 fn logical_null_count(&self) -> usize {
479 T::logical_null_count(self)
480 }
481
482 fn is_nullable(&self) -> bool {
483 T::is_nullable(self)
484 }
485
486 fn get_buffer_memory_size(&self) -> usize {
487 T::get_buffer_memory_size(self)
488 }
489
490 fn get_array_memory_size(&self) -> usize {
491 T::get_array_memory_size(self)
492 }
493}
494
495pub trait ArrayAccessor: Array {
576 type Item: Send + Sync;
578
579 fn value(&self, index: usize) -> Self::Item;
583
584 unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
588}
589
590pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
598 fn is_ascii(&self) -> bool;
600
601 fn iter(&self) -> ArrayIter<Self>;
603}
604
605impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
606 fn is_ascii(&self) -> bool {
607 GenericStringArray::<O>::is_ascii(self)
608 }
609
610 fn iter(&self) -> ArrayIter<Self> {
611 GenericStringArray::<O>::iter(self)
612 }
613}
614impl<'a> StringArrayType<'a> for &'a StringViewArray {
615 fn is_ascii(&self) -> bool {
616 StringViewArray::is_ascii(self)
617 }
618
619 fn iter(&self) -> ArrayIter<Self> {
620 StringViewArray::iter(self)
621 }
622}
623
624impl PartialEq for dyn Array + '_ {
625 fn eq(&self, other: &Self) -> bool {
626 self.to_data().eq(&other.to_data())
627 }
628}
629
630impl<T: Array> PartialEq<T> for dyn Array + '_ {
631 fn eq(&self, other: &T) -> bool {
632 self.to_data().eq(&other.to_data())
633 }
634}
635
636impl PartialEq for NullArray {
637 fn eq(&self, other: &NullArray) -> bool {
638 self.to_data().eq(&other.to_data())
639 }
640}
641
642impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
643 fn eq(&self, other: &PrimitiveArray<T>) -> bool {
644 self.to_data().eq(&other.to_data())
645 }
646}
647
648impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
649 fn eq(&self, other: &Self) -> bool {
650 self.to_data().eq(&other.to_data())
651 }
652}
653
654impl PartialEq for BooleanArray {
655 fn eq(&self, other: &BooleanArray) -> bool {
656 self.to_data().eq(&other.to_data())
657 }
658}
659
660impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
661 fn eq(&self, other: &Self) -> bool {
662 self.to_data().eq(&other.to_data())
663 }
664}
665
666impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
667 fn eq(&self, other: &Self) -> bool {
668 self.to_data().eq(&other.to_data())
669 }
670}
671
672impl PartialEq for FixedSizeBinaryArray {
673 fn eq(&self, other: &Self) -> bool {
674 self.to_data().eq(&other.to_data())
675 }
676}
677
678impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
679 fn eq(&self, other: &Self) -> bool {
680 self.to_data().eq(&other.to_data())
681 }
682}
683
684impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
685 fn eq(&self, other: &Self) -> bool {
686 self.to_data().eq(&other.to_data())
687 }
688}
689
690impl PartialEq for MapArray {
691 fn eq(&self, other: &Self) -> bool {
692 self.to_data().eq(&other.to_data())
693 }
694}
695
696impl PartialEq for FixedSizeListArray {
697 fn eq(&self, other: &Self) -> bool {
698 self.to_data().eq(&other.to_data())
699 }
700}
701
702impl PartialEq for StructArray {
703 fn eq(&self, other: &Self) -> bool {
704 self.to_data().eq(&other.to_data())
705 }
706}
707
708impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
709 fn eq(&self, other: &Self) -> bool {
710 self.to_data().eq(&other.to_data())
711 }
712}
713
714pub fn make_array(data: ArrayData) -> ArrayRef {
717 match data.data_type() {
718 DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
719 DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
720 DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
721 DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
722 DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
723 DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
724 DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
725 DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
726 DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
727 DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
728 DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
729 DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
730 DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
731 DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
732 DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
733 DataType::Time32(TimeUnit::Millisecond) => {
734 Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
735 }
736 DataType::Time64(TimeUnit::Microsecond) => {
737 Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
738 }
739 DataType::Time64(TimeUnit::Nanosecond) => {
740 Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
741 }
742 DataType::Timestamp(TimeUnit::Second, _) => {
743 Arc::new(TimestampSecondArray::from(data)) as ArrayRef
744 }
745 DataType::Timestamp(TimeUnit::Millisecond, _) => {
746 Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
747 }
748 DataType::Timestamp(TimeUnit::Microsecond, _) => {
749 Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
750 }
751 DataType::Timestamp(TimeUnit::Nanosecond, _) => {
752 Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
753 }
754 DataType::Interval(IntervalUnit::YearMonth) => {
755 Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
756 }
757 DataType::Interval(IntervalUnit::DayTime) => {
758 Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
759 }
760 DataType::Interval(IntervalUnit::MonthDayNano) => {
761 Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
762 }
763 DataType::Duration(TimeUnit::Second) => {
764 Arc::new(DurationSecondArray::from(data)) as ArrayRef
765 }
766 DataType::Duration(TimeUnit::Millisecond) => {
767 Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
768 }
769 DataType::Duration(TimeUnit::Microsecond) => {
770 Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
771 }
772 DataType::Duration(TimeUnit::Nanosecond) => {
773 Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
774 }
775 DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
776 DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
777 DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
778 DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
779 DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
780 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
781 DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
782 DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
783 DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
784 DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
785 DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
786 DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
787 DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
788 DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
789 DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
790 DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
791 DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
792 DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
793 DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
794 DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
795 DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
796 DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
797 DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
798 DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
799 dt => panic!("Unexpected dictionary key type {dt:?}"),
800 },
801 DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
802 DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
803 DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
804 DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
805 dt => panic!("Unexpected data type for run_ends array {dt:?}"),
806 },
807 DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
808 DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
809 DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
810 dt => panic!("Unexpected data type {dt:?}"),
811 }
812}
813
814pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
827 let data = ArrayData::new_empty(data_type);
828 make_array(data)
829}
830
831pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
845 make_array(ArrayData::new_null(data_type, length))
846}
847
848unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
854 match data.is_empty() && data.buffers()[0].is_empty() {
855 true => OffsetBuffer::new_empty(),
856 false => {
857 let buffer =
858 ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len() + 1);
859 unsafe { OffsetBuffer::new_unchecked(buffer) }
862 }
863 }
864}
865
866fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
868where
869 A: Array,
870 F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
871{
872 let head = std::cmp::min(10, array.len());
873
874 for i in 0..head {
875 if array.is_null(i) {
876 writeln!(f, " null,")?;
877 } else {
878 write!(f, " ")?;
879 print_item(array, i, f)?;
880 writeln!(f, ",")?;
881 }
882 }
883 if array.len() > 10 {
884 if array.len() > 20 {
885 writeln!(f, " ...{} elements...,", array.len() - 20)?;
886 }
887
888 let tail = std::cmp::max(head, array.len() - 10);
889
890 for i in tail..array.len() {
891 if array.is_null(i) {
892 writeln!(f, " null,")?;
893 } else {
894 write!(f, " ")?;
895 print_item(array, i, f)?;
896 writeln!(f, ",")?;
897 }
898 }
899 }
900 Ok(())
901}
902
903#[cfg(test)]
904mod tests {
905 use super::*;
906 use crate::cast::{as_union_array, downcast_array};
907 use crate::downcast_run_array;
908 use arrow_buffer::MutableBuffer;
909 use arrow_schema::{Field, Fields, UnionFields, UnionMode};
910
911 #[test]
912 fn test_empty_primitive() {
913 let array = new_empty_array(&DataType::Int32);
914 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
915 assert_eq!(a.len(), 0);
916 let expected: &[i32] = &[];
917 assert_eq!(a.values(), expected);
918 }
919
920 #[test]
921 fn test_empty_variable_sized() {
922 let array = new_empty_array(&DataType::Utf8);
923 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
924 assert_eq!(a.len(), 0);
925 assert_eq!(a.value_offsets()[0], 0i32);
926 }
927
928 #[test]
929 fn test_empty_list_primitive() {
930 let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
931 let array = new_empty_array(&data_type);
932 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
933 assert_eq!(a.len(), 0);
934 assert_eq!(a.value_offsets()[0], 0i32);
935 }
936
937 #[test]
938 fn test_null_boolean() {
939 let array = new_null_array(&DataType::Boolean, 9);
940 let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
941 assert_eq!(a.len(), 9);
942 for i in 0..9 {
943 assert!(a.is_null(i));
944 }
945 }
946
947 #[test]
948 fn test_null_primitive() {
949 let array = new_null_array(&DataType::Int32, 9);
950 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
951 assert_eq!(a.len(), 9);
952 for i in 0..9 {
953 assert!(a.is_null(i));
954 }
955 }
956
957 #[test]
958 fn test_null_struct() {
959 let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
962 let array = new_null_array(&struct_type, 9);
963
964 let a = array.as_any().downcast_ref::<StructArray>().unwrap();
965 assert_eq!(a.len(), 9);
966 assert_eq!(a.column(0).len(), 9);
967 for i in 0..9 {
968 assert!(a.is_null(i));
969 }
970
971 a.slice(0, 5);
973 }
974
975 #[test]
976 fn test_null_variable_sized() {
977 let array = new_null_array(&DataType::Utf8, 9);
978 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
979 assert_eq!(a.len(), 9);
980 assert_eq!(a.value_offsets()[9], 0i32);
981 for i in 0..9 {
982 assert!(a.is_null(i));
983 }
984 }
985
986 #[test]
987 fn test_null_list_primitive() {
988 let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
989 let array = new_null_array(&data_type, 9);
990 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
991 assert_eq!(a.len(), 9);
992 assert_eq!(a.value_offsets()[9], 0i32);
993 for i in 0..9 {
994 assert!(a.is_null(i));
995 }
996 }
997
998 #[test]
999 fn test_null_map() {
1000 let data_type = DataType::Map(
1001 Arc::new(Field::new(
1002 "entry",
1003 DataType::Struct(Fields::from(vec![
1004 Field::new("key", DataType::Utf8, false),
1005 Field::new("value", DataType::Int32, true),
1006 ])),
1007 false,
1008 )),
1009 false,
1010 );
1011 let array = new_null_array(&data_type, 9);
1012 let a = array.as_any().downcast_ref::<MapArray>().unwrap();
1013 assert_eq!(a.len(), 9);
1014 assert_eq!(a.value_offsets()[9], 0i32);
1015 for i in 0..9 {
1016 assert!(a.is_null(i));
1017 }
1018 }
1019
1020 #[test]
1021 fn test_null_dictionary() {
1022 let values =
1023 vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1024
1025 let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1026 let array = Arc::new(array) as ArrayRef;
1027
1028 let null_array = new_null_array(array.data_type(), 9);
1029 assert_eq!(&array, &null_array);
1030 assert_eq!(
1031 array.to_data().buffers()[0].len(),
1032 null_array.to_data().buffers()[0].len()
1033 );
1034 }
1035
1036 #[test]
1037 fn test_null_union() {
1038 for mode in [UnionMode::Sparse, UnionMode::Dense] {
1039 let data_type = DataType::Union(
1040 UnionFields::new(
1041 vec![2, 1],
1042 vec![
1043 Field::new("foo", DataType::Int32, true),
1044 Field::new("bar", DataType::Int64, true),
1045 ],
1046 ),
1047 mode,
1048 );
1049 let array = new_null_array(&data_type, 4);
1050
1051 let array = as_union_array(array.as_ref());
1052 assert_eq!(array.len(), 4);
1053 assert_eq!(array.null_count(), 0);
1054 assert_eq!(array.logical_null_count(), 4);
1055
1056 for i in 0..4 {
1057 let a = array.value(i);
1058 assert_eq!(a.len(), 1);
1059 assert_eq!(a.null_count(), 1);
1060 assert_eq!(a.logical_null_count(), 1);
1061 assert!(a.is_null(0))
1062 }
1063
1064 array.to_data().validate_full().unwrap();
1065 }
1066 }
1067
1068 #[test]
1069 #[allow(unused_parens)]
1070 fn test_null_runs() {
1071 for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1072 let data_type = DataType::RunEndEncoded(
1073 Arc::new(Field::new("run_ends", r, false)),
1074 Arc::new(Field::new("values", DataType::Utf8, true)),
1075 );
1076
1077 let array = new_null_array(&data_type, 4);
1078 let array = array.as_ref();
1079
1080 downcast_run_array! {
1081 array => {
1082 assert_eq!(array.len(), 4);
1083 assert_eq!(array.null_count(), 0);
1084 assert_eq!(array.logical_null_count(), 4);
1085 assert_eq!(array.values().len(), 1);
1086 assert_eq!(array.values().null_count(), 1);
1087 assert_eq!(array.run_ends().len(), 4);
1088 assert_eq!(array.run_ends().values(), &[4]);
1089
1090 let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1091 assert_eq!(idx, &[0,0,0,0]);
1092 }
1093 d => unreachable!("{d}")
1094 }
1095 }
1096 }
1097
1098 #[test]
1099 fn test_null_fixed_size_binary() {
1100 for size in [1, 2, 7] {
1101 let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1102 let array = array
1103 .as_ref()
1104 .as_any()
1105 .downcast_ref::<FixedSizeBinaryArray>()
1106 .unwrap();
1107
1108 assert_eq!(array.len(), 6);
1109 assert_eq!(array.null_count(), 6);
1110 assert_eq!(array.logical_null_count(), 6);
1111 array.iter().for_each(|x| assert!(x.is_none()));
1112 }
1113 }
1114
1115 #[test]
1116 fn test_memory_size_null() {
1117 let null_arr = NullArray::new(32);
1118
1119 assert_eq!(0, null_arr.get_buffer_memory_size());
1120 assert_eq!(
1121 std::mem::size_of::<usize>(),
1122 null_arr.get_array_memory_size()
1123 );
1124 }
1125
1126 #[test]
1127 fn test_memory_size_primitive() {
1128 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1129 let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1130
1131 assert_eq!(
1133 arr.get_array_memory_size() - empty.get_array_memory_size(),
1134 128 * std::mem::size_of::<i64>()
1135 );
1136 }
1137
1138 #[test]
1139 fn test_memory_size_primitive_sliced() {
1140 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1141 let slice1 = arr.slice(0, 64);
1142 let slice2 = arr.slice(64, 64);
1143
1144 assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1146 assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1147 }
1148
1149 #[test]
1150 fn test_memory_size_primitive_nullable() {
1151 let arr: PrimitiveArray<Int64Type> = (0..128)
1152 .map(|i| if i % 20 == 0 { Some(i) } else { None })
1153 .collect();
1154 let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1155 ArrayData::builder(arr.data_type().clone())
1156 .add_buffer(MutableBuffer::new(0).into())
1157 .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1158 .build()
1159 .unwrap(),
1160 );
1161
1162 assert_eq!(
1166 std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1167 empty_with_bitmap.get_array_memory_size()
1168 );
1169
1170 assert_eq!(
1173 arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1174 128 * std::mem::size_of::<i64>() + 64
1175 );
1176 }
1177
1178 #[test]
1179 fn test_memory_size_dictionary() {
1180 let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1181 let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1182 (0..256).map(|i| (i % values.len()) as i16),
1183 );
1184
1185 let dict_data_type = DataType::Dictionary(
1186 Box::new(keys.data_type().clone()),
1187 Box::new(values.data_type().clone()),
1188 );
1189 let dict_data = keys
1190 .into_data()
1191 .into_builder()
1192 .data_type(dict_data_type)
1193 .child_data(vec![values.into_data()])
1194 .build()
1195 .unwrap();
1196
1197 let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1198 Box::new(DataType::Int16),
1199 Box::new(DataType::Int64),
1200 ));
1201
1202 let arr = DictionaryArray::<Int16Type>::from(dict_data);
1203 let empty = DictionaryArray::<Int16Type>::from(empty_data);
1204
1205 let expected_keys_size = 256 * std::mem::size_of::<i16>();
1206 assert_eq!(
1207 arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1208 expected_keys_size
1209 );
1210
1211 let expected_values_size = 16 * std::mem::size_of::<i64>();
1212 assert_eq!(
1213 arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1214 expected_values_size
1215 );
1216
1217 let expected_size = expected_keys_size + expected_values_size;
1218 assert_eq!(
1219 arr.get_array_memory_size() - empty.get_array_memory_size(),
1220 expected_size
1221 );
1222 }
1223
1224 fn compute_my_thing(arr: &dyn Array) -> bool {
1226 !arr.is_empty()
1227 }
1228
1229 #[test]
1230 fn test_array_ref_as_array() {
1231 let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1232
1233 assert!(compute_my_thing(&arr));
1235
1236 let arr: ArrayRef = Arc::new(arr);
1238 assert!(compute_my_thing(&arr));
1239 assert!(compute_my_thing(arr.as_ref()));
1240 }
1241
1242 #[test]
1243 fn test_downcast_array() {
1244 let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1245
1246 let boxed: ArrayRef = Arc::new(array);
1247 let array: Int32Array = downcast_array(&boxed);
1248
1249 let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1250 assert_eq!(array, expected);
1251 }
1252}