1use crate::bit_iterator::BitSliceIterator;
22use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
23use arrow_buffer::{
24 ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer, bit_util, i256,
25};
26use arrow_schema::{ArrowError, DataType, UnionMode};
27use std::mem;
28use std::ops::Range;
29use std::sync::Arc;
30
31use crate::{equal, validate_binary_view, validate_string_view};
32
33#[inline]
34pub(crate) fn contains_nulls(
35 null_bit_buffer: Option<&NullBuffer>,
36 offset: usize,
37 len: usize,
38) -> bool {
39 match null_bit_buffer {
40 Some(buffer) => {
41 match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len).next() {
42 Some((start, end)) => start != 0 || end != len,
43 None => len != 0, }
45 }
46 None => false, }
48}
49
50#[inline]
51pub(crate) fn count_nulls(
52 null_bit_buffer: Option<&NullBuffer>,
53 offset: usize,
54 len: usize,
55) -> usize {
56 if let Some(buf) = null_bit_buffer {
57 let buffer = buf.buffer();
58 len - buffer.count_set_bits_offset(offset + buf.offset(), len)
59 } else {
60 0
61 }
62}
63
64#[inline]
66pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuffer; 2] {
67 let empty_buffer = MutableBuffer::new(0);
68 match data_type {
69 DataType::Null => [empty_buffer, MutableBuffer::new(0)],
70 DataType::Boolean => {
71 let bytes = bit_util::ceil(capacity, 8);
72 let buffer = MutableBuffer::new(bytes);
73 [buffer, empty_buffer]
74 }
75 DataType::UInt8
76 | DataType::UInt16
77 | DataType::UInt32
78 | DataType::UInt64
79 | DataType::Int8
80 | DataType::Int16
81 | DataType::Int32
82 | DataType::Int64
83 | DataType::Float16
84 | DataType::Float32
85 | DataType::Float64
86 | DataType::Decimal32(_, _)
87 | DataType::Decimal64(_, _)
88 | DataType::Decimal128(_, _)
89 | DataType::Decimal256(_, _)
90 | DataType::Date32
91 | DataType::Time32(_)
92 | DataType::Date64
93 | DataType::Time64(_)
94 | DataType::Duration(_)
95 | DataType::Timestamp(_, _)
96 | DataType::Interval(_) => [
97 MutableBuffer::new(capacity * data_type.primitive_width().unwrap()),
98 empty_buffer,
99 ],
100 DataType::Utf8 | DataType::Binary => {
101 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
102 buffer.push(0i32);
104 [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
105 }
106 DataType::LargeUtf8 | DataType::LargeBinary => {
107 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
108 buffer.push(0i64);
110 [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
111 }
112 DataType::BinaryView | DataType::Utf8View => [
113 MutableBuffer::new(capacity * mem::size_of::<u128>()),
114 empty_buffer,
115 ],
116 DataType::List(_) | DataType::Map(_, _) => {
117 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
119 buffer.push(0i32);
120 [buffer, empty_buffer]
121 }
122 DataType::ListView(_) => [
123 MutableBuffer::new(capacity * mem::size_of::<i32>()),
124 MutableBuffer::new(capacity * mem::size_of::<i32>()),
125 ],
126 DataType::LargeList(_) => {
127 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
129 buffer.push(0i64);
130 [buffer, empty_buffer]
131 }
132 DataType::LargeListView(_) => [
133 MutableBuffer::new(capacity * mem::size_of::<i64>()),
134 MutableBuffer::new(capacity * mem::size_of::<i64>()),
135 ],
136 DataType::FixedSizeBinary(size) => {
137 [MutableBuffer::new(capacity * *size as usize), empty_buffer]
138 }
139 DataType::Dictionary(k, _) => [
140 MutableBuffer::new(capacity * k.primitive_width().unwrap()),
141 empty_buffer,
142 ],
143 DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
144 [empty_buffer, MutableBuffer::new(0)]
145 }
146 DataType::Union(_, mode) => {
147 let type_ids = MutableBuffer::new(capacity * mem::size_of::<i8>());
148 match mode {
149 UnionMode::Sparse => [type_ids, empty_buffer],
150 UnionMode::Dense => {
151 let offsets = MutableBuffer::new(capacity * mem::size_of::<i32>());
152 [type_ids, offsets]
153 }
154 }
155 }
156 }
157}
158
159#[derive(Debug, Clone)]
205pub struct ArrayData {
206 data_type: DataType,
208
209 len: usize,
211
212 offset: usize,
217
218 buffers: Vec<Buffer>,
231
232 child_data: Vec<ArrayData>,
242
243 nulls: Option<NullBuffer>,
251}
252
253pub type ArrayDataRef = Arc<ArrayData>;
255
256fn checked_len_plus_offset(
257 data_type: &DataType,
258 len: usize,
259 offset: usize,
260) -> Result<usize, ArrowError> {
261 len.checked_add(offset).ok_or_else(|| {
262 ArrowError::InvalidArgumentError(format!(
263 "Length {len} with offset {offset} overflows usize for {data_type}"
264 ))
265 })
266}
267
268impl ArrayData {
269 pub unsafe fn new_unchecked(
286 data_type: DataType,
287 len: usize,
288 null_count: Option<usize>,
289 null_bit_buffer: Option<Buffer>,
290 offset: usize,
291 buffers: Vec<Buffer>,
292 child_data: Vec<ArrayData>,
293 ) -> Self {
294 let mut skip_validation = UnsafeFlag::new();
295 unsafe { skip_validation.set(true) };
297
298 ArrayDataBuilder {
299 data_type,
300 len,
301 null_count,
302 null_bit_buffer,
303 nulls: None,
304 offset,
305 buffers,
306 child_data,
307 align_buffers: false,
308 skip_validation,
309 }
310 .build()
311 .unwrap()
312 }
313
314 pub fn try_new(
325 data_type: DataType,
326 len: usize,
327 null_bit_buffer: Option<Buffer>,
328 offset: usize,
329 buffers: Vec<Buffer>,
330 child_data: Vec<ArrayData>,
331 ) -> Result<Self, ArrowError> {
332 if let Some(null_bit_buffer) = null_bit_buffer.as_ref() {
336 let len_plus_offset = checked_len_plus_offset(&data_type, len, offset)?;
337 let needed_len = bit_util::ceil(len_plus_offset, 8);
338 if null_bit_buffer.len() < needed_len {
339 return Err(ArrowError::InvalidArgumentError(format!(
340 "null_bit_buffer size too small. got {} needed {}",
341 null_bit_buffer.len(),
342 needed_len
343 )));
344 }
345 }
346 let new_self = unsafe {
348 Self::new_unchecked(
349 data_type,
350 len,
351 None,
352 null_bit_buffer,
353 offset,
354 buffers,
355 child_data,
356 )
357 };
358
359 new_self.validate_data()?;
364 Ok(new_self)
365 }
366
367 #[inline]
369 pub const fn builder(data_type: DataType) -> ArrayDataBuilder {
370 ArrayDataBuilder::new(data_type)
371 }
372
373 #[inline]
375 pub const fn data_type(&self) -> &DataType {
376 &self.data_type
377 }
378
379 pub fn buffers(&self) -> &[Buffer] {
381 &self.buffers
382 }
383
384 pub fn child_data(&self) -> &[ArrayData] {
387 &self.child_data[..]
388 }
389
390 #[inline]
392 pub fn is_null(&self, i: usize) -> bool {
393 match &self.nulls {
394 Some(v) => v.is_null(i),
395 None => false,
396 }
397 }
398
399 #[inline]
403 pub fn nulls(&self) -> Option<&NullBuffer> {
404 self.nulls.as_ref()
405 }
406
407 #[inline]
409 pub fn is_valid(&self, i: usize) -> bool {
410 !self.is_null(i)
411 }
412
413 #[inline]
415 pub const fn len(&self) -> usize {
416 self.len
417 }
418
419 #[inline]
421 pub const fn is_empty(&self) -> bool {
422 self.len == 0
423 }
424
425 #[inline]
427 pub const fn offset(&self) -> usize {
428 self.offset
429 }
430
431 #[inline]
433 pub fn null_count(&self) -> usize {
434 self.nulls
435 .as_ref()
436 .map(|x| x.null_count())
437 .unwrap_or_default()
438 }
439
440 pub fn get_buffer_memory_size(&self) -> usize {
452 let mut size = 0;
453 for buffer in &self.buffers {
454 size += buffer.capacity();
455 }
456 if let Some(bitmap) = &self.nulls {
457 size += bitmap.buffer().capacity()
458 }
459 for child in &self.child_data {
460 size += child.get_buffer_memory_size();
461 }
462 size
463 }
464
465 pub fn get_slice_memory_size(&self) -> Result<usize, ArrowError> {
478 let mut result: usize = 0;
479 let layout = layout(&self.data_type);
480
481 for spec in layout.buffers.iter() {
482 match spec {
483 BufferSpec::FixedWidth { byte_width, .. } => {
484 let buffer_size = self.len.checked_mul(*byte_width).ok_or_else(|| {
485 ArrowError::ComputeError(
486 "Integer overflow computing buffer size".to_string(),
487 )
488 })?;
489 result += buffer_size;
490 }
491 BufferSpec::VariableWidth => {
492 let buffer_len = match self.data_type {
493 DataType::Utf8 | DataType::Binary => {
494 let offsets = self.typed_offsets::<i32>()?;
495 (offsets[self.len] - offsets[0]) as usize
496 }
497 DataType::LargeUtf8 | DataType::LargeBinary => {
498 let offsets = self.typed_offsets::<i64>()?;
499 (offsets[self.len] - offsets[0]) as usize
500 }
501 _ => {
502 return Err(ArrowError::NotYetImplemented(format!(
503 "Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
504 self.data_type
505 )));
506 }
507 };
508 result += buffer_len;
509 }
510 BufferSpec::BitMap => {
511 let buffer_size = bit_util::ceil(self.len, 8);
512 result += buffer_size;
513 }
514 BufferSpec::AlwaysNull => {
515 }
517 }
518 }
519
520 if self.nulls().is_some() {
521 result += bit_util::ceil(self.len, 8);
522 }
523
524 for child in &self.child_data {
525 result += child.get_slice_memory_size()?;
526 }
527 Ok(result)
528 }
529
530 pub fn get_array_memory_size(&self) -> usize {
539 let mut size = mem::size_of_val(self);
540
541 for buffer in &self.buffers {
543 size += mem::size_of::<Buffer>();
544 size += buffer.capacity();
545 }
546 if let Some(nulls) = &self.nulls {
547 size += nulls.buffer().capacity();
548 }
549 for child in &self.child_data {
550 size += child.get_array_memory_size();
551 }
552
553 size
554 }
555
556 pub fn slice(&self, offset: usize, length: usize) -> ArrayData {
564 let end = offset
565 .checked_add(length)
566 .expect("offset + length overflow");
567 assert!(end <= self.len());
568
569 if let DataType::Struct(_) = self.data_type() {
570 let new_offset = self.offset + offset;
572 ArrayData {
573 data_type: self.data_type().clone(),
574 len: length,
575 offset: new_offset,
576 buffers: self.buffers.clone(),
577 child_data: self
579 .child_data()
580 .iter()
581 .map(|data| data.slice(offset, length))
582 .collect(),
583 nulls: self.nulls.as_ref().map(|x| x.slice(offset, length)),
584 }
585 } else {
586 let mut new_data = self.clone();
587
588 new_data.len = length;
589 new_data.offset = offset + self.offset;
590 new_data.nulls = self.nulls.as_ref().map(|x| x.slice(offset, length));
591
592 new_data
593 }
594 }
595
596 pub fn buffer<T: ArrowNativeType>(&self, buffer: usize) -> &[T] {
603 &self.buffers()[buffer].typed_data()[self.offset..]
604 }
605
606 pub fn new_null(data_type: &DataType, len: usize) -> Self {
608 let bit_len = bit_util::ceil(len, 8);
609 let zeroed = |len: usize| Buffer::from(MutableBuffer::from_len_zeroed(len));
610
611 let (buffers, child_data, has_nulls) = match data_type.primitive_width() {
612 Some(width) => (vec![zeroed(width * len)], vec![], true),
613 None => match data_type {
614 DataType::Null => (vec![], vec![], false),
615 DataType::Boolean => (vec![zeroed(bit_len)], vec![], true),
616 DataType::Binary | DataType::Utf8 => {
617 (vec![zeroed((len + 1) * 4), zeroed(0)], vec![], true)
618 }
619 DataType::BinaryView | DataType::Utf8View => (vec![zeroed(len * 16)], vec![], true),
620 DataType::LargeBinary | DataType::LargeUtf8 => {
621 (vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true)
622 }
623 DataType::FixedSizeBinary(i) => (vec![zeroed(*i as usize * len)], vec![], true),
624 DataType::List(f) | DataType::Map(f, _) => (
625 vec![zeroed((len + 1) * 4)],
626 vec![ArrayData::new_empty(f.data_type())],
627 true,
628 ),
629 DataType::LargeList(f) => (
630 vec![zeroed((len + 1) * 8)],
631 vec![ArrayData::new_empty(f.data_type())],
632 true,
633 ),
634 DataType::ListView(f) => (
635 vec![zeroed(len * 4), zeroed(len * 4)],
636 vec![ArrayData::new_empty(f.data_type())],
637 true,
638 ),
639 DataType::LargeListView(f) => (
640 vec![zeroed(len * 8), zeroed(len * 8)],
641 vec![ArrayData::new_empty(f.data_type())],
642 true,
643 ),
644 DataType::FixedSizeList(f, list_len) => (
645 vec![],
646 vec![ArrayData::new_null(f.data_type(), *list_len as usize * len)],
647 true,
648 ),
649 DataType::Struct(fields) => (
650 vec![],
651 fields
652 .iter()
653 .map(|f| Self::new_null(f.data_type(), len))
654 .collect(),
655 true,
656 ),
657 DataType::Dictionary(k, v) => (
658 vec![zeroed(k.primitive_width().unwrap() * len)],
659 vec![ArrayData::new_empty(v.as_ref())],
660 true,
661 ),
662 DataType::Union(f, mode) => {
663 let (id, _) = f.iter().next().unwrap();
664 let ids = Buffer::from_iter(std::iter::repeat_n(id, len));
665 let buffers = match mode {
666 UnionMode::Sparse => vec![ids],
667 UnionMode::Dense => {
668 let end_offset = i32::from_usize(len).unwrap();
669 vec![ids, Buffer::from_iter(0_i32..end_offset)]
670 }
671 };
672
673 let children = f
674 .iter()
675 .enumerate()
676 .map(|(idx, (_, f))| {
677 if idx == 0 || *mode == UnionMode::Sparse {
678 Self::new_null(f.data_type(), len)
679 } else {
680 Self::new_empty(f.data_type())
681 }
682 })
683 .collect();
684
685 (buffers, children, false)
686 }
687 DataType::RunEndEncoded(r, v) => {
688 let runs = match r.data_type() {
689 DataType::Int16 => {
690 let i = i16::from_usize(len).expect("run overflow");
691 Buffer::from_slice_ref([i])
692 }
693 DataType::Int32 => {
694 let i = i32::from_usize(len).expect("run overflow");
695 Buffer::from_slice_ref([i])
696 }
697 DataType::Int64 => {
698 let i = i64::from_usize(len).expect("run overflow");
699 Buffer::from_slice_ref([i])
700 }
701 dt => unreachable!("Invalid run ends data type {dt}"),
702 };
703
704 let builder = ArrayData::builder(r.data_type().clone())
705 .len(1)
706 .buffers(vec![runs]);
707
708 let runs = unsafe { builder.build_unchecked() };
711 (
712 vec![],
713 vec![runs, ArrayData::new_null(v.data_type(), 1)],
714 false,
715 )
716 }
717 d => unreachable!("{d}"),
718 },
719 };
720
721 let mut builder = ArrayDataBuilder::new(data_type.clone())
722 .len(len)
723 .buffers(buffers)
724 .child_data(child_data);
725
726 if has_nulls {
727 builder = builder.nulls(Some(NullBuffer::new_null(len)))
728 }
729
730 unsafe { builder.build_unchecked() }
733 }
734
735 pub fn new_empty(data_type: &DataType) -> Self {
737 Self::new_null(data_type, 0)
738 }
739
740 pub fn align_buffers(&mut self) {
749 let layout = layout(&self.data_type);
750 for (buffer, spec) in self.buffers.iter_mut().zip(&layout.buffers) {
751 if let BufferSpec::FixedWidth { alignment, .. } = spec {
752 if buffer.as_ptr().align_offset(*alignment) != 0 {
753 *buffer = Buffer::from_slice_ref(buffer.as_ref());
754 }
755 }
756 }
757 for data in self.child_data.iter_mut() {
759 data.align_buffers()
760 }
761 }
762
763 pub fn validate(&self) -> Result<(), ArrowError> {
774 let len_plus_offset = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
776
777 let layout = layout(&self.data_type);
779
780 if !layout.can_contain_null_mask && self.nulls.is_some() {
781 return Err(ArrowError::InvalidArgumentError(format!(
782 "Arrays of type {:?} cannot contain a null bitmask",
783 self.data_type,
784 )));
785 }
786
787 if self.buffers.len() < layout.buffers.len()
789 || (!layout.variadic && self.buffers.len() != layout.buffers.len())
790 {
791 return Err(ArrowError::InvalidArgumentError(format!(
792 "Expected {} buffers in array of type {:?}, got {}",
793 layout.buffers.len(),
794 self.data_type,
795 self.buffers.len(),
796 )));
797 }
798
799 for (i, (buffer, spec)) in self.buffers.iter().zip(layout.buffers.iter()).enumerate() {
800 match spec {
801 BufferSpec::FixedWidth {
802 byte_width,
803 alignment,
804 } => {
805 let min_buffer_size = len_plus_offset.saturating_mul(*byte_width);
806
807 if buffer.len() < min_buffer_size {
808 return Err(ArrowError::InvalidArgumentError(format!(
809 "Need at least {} bytes in buffers[{}] in array of type {:?}, but got {}",
810 min_buffer_size,
811 i,
812 self.data_type,
813 buffer.len()
814 )));
815 }
816
817 let align_offset = buffer.as_ptr().align_offset(*alignment);
818 if align_offset != 0 {
819 return Err(ArrowError::InvalidArgumentError(format!(
820 "Misaligned buffers[{i}] in array of type {:?}, offset from expected alignment of {alignment} by {}",
821 self.data_type,
822 align_offset.min(alignment - align_offset)
823 )));
824 }
825 }
826 BufferSpec::VariableWidth => {
827 }
831 BufferSpec::BitMap => {
832 let min_buffer_size = bit_util::ceil(len_plus_offset, 8);
833 if buffer.len() < min_buffer_size {
834 return Err(ArrowError::InvalidArgumentError(format!(
835 "Need at least {} bytes for bitmap in buffers[{}] in array of type {:?}, but got {}",
836 min_buffer_size,
837 i,
838 self.data_type,
839 buffer.len()
840 )));
841 }
842 }
843 BufferSpec::AlwaysNull => {
844 }
846 }
847 }
848
849 if let Some(nulls) = self.nulls() {
851 if nulls.null_count() > self.len {
852 return Err(ArrowError::InvalidArgumentError(format!(
853 "null_count {} for an array exceeds length of {} elements",
854 nulls.null_count(),
855 self.len
856 )));
857 }
858
859 let actual_len = nulls.validity().len();
860 let needed_len = bit_util::ceil(len_plus_offset, 8);
861 if actual_len < needed_len {
862 return Err(ArrowError::InvalidArgumentError(format!(
863 "null_bit_buffer size too small. got {actual_len} needed {needed_len}",
864 )));
865 }
866
867 if nulls.len() != self.len {
868 return Err(ArrowError::InvalidArgumentError(format!(
869 "null buffer incorrect size. got {} expected {}",
870 nulls.len(),
871 self.len
872 )));
873 }
874 }
875
876 self.validate_child_data()?;
877
878 match &self.data_type {
880 DataType::Utf8 | DataType::Binary => {
881 self.validate_offsets::<i32>(self.buffers[1].len())?;
882 }
883 DataType::LargeUtf8 | DataType::LargeBinary => {
884 self.validate_offsets::<i64>(self.buffers[1].len())?;
885 }
886 DataType::Dictionary(key_type, _value_type) => {
887 if !DataType::is_dictionary_key_type(key_type) {
889 return Err(ArrowError::InvalidArgumentError(format!(
890 "Dictionary key type must be integer, but was {key_type}"
891 )));
892 }
893 }
894 DataType::RunEndEncoded(run_ends_type, _) => {
895 if run_ends_type.is_nullable() {
896 return Err(ArrowError::InvalidArgumentError(
897 "The nullable should be set to false for the field defining run_ends array.".to_string()
898 ));
899 }
900 if !DataType::is_run_ends_type(run_ends_type.data_type()) {
901 return Err(ArrowError::InvalidArgumentError(format!(
902 "RunArray run_ends types must be Int16, Int32 or Int64, but was {}",
903 run_ends_type.data_type()
904 )));
905 }
906 }
907 _ => {}
908 };
909
910 Ok(())
911 }
912
913 fn typed_offsets<T: ArrowNativeType + num_traits::Num>(&self) -> Result<&[T], ArrowError> {
920 if self.len == 0 && self.buffers[0].is_empty() {
922 return Ok(&[]);
923 }
924
925 let len = checked_len_plus_offset(&self.data_type, self.len, 1)?;
926
927 self.typed_buffer(0, len)
928 }
929
930 fn typed_buffer<T: ArrowNativeType + num_traits::Num>(
932 &self,
933 idx: usize,
934 len: usize,
935 ) -> Result<&[T], ArrowError> {
936 let buffer = &self.buffers[idx];
937
938 let required_elements = checked_len_plus_offset(&self.data_type, len, self.offset)?;
939 let byte_width = mem::size_of::<T>();
940 let required_len = required_elements.checked_mul(byte_width).ok_or_else(|| {
941 ArrowError::InvalidArgumentError(format!(
942 "Buffer {idx} of {} byte length overflow: {} elements of {} bytes exceeds usize",
943 self.data_type, required_elements, byte_width
944 ))
945 })?;
946
947 if buffer.len() < required_len {
948 return Err(ArrowError::InvalidArgumentError(format!(
949 "Buffer {} of {} isn't large enough. Expected {} bytes got {}",
950 idx,
951 self.data_type,
952 required_len,
953 buffer.len()
954 )));
955 }
956
957 Ok(&buffer.typed_data::<T>()[self.offset..required_elements])
958 }
959
960 fn validate_offsets<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
963 &self,
964 values_length: usize,
965 ) -> Result<(), ArrowError> {
966 let offsets = self.typed_offsets::<T>()?;
968 if offsets.is_empty() {
969 return Ok(());
970 }
971
972 let first_offset = offsets[0].to_usize().ok_or_else(|| {
973 ArrowError::InvalidArgumentError(format!(
974 "Error converting offset[0] ({}) to usize for {}",
975 offsets[0], self.data_type
976 ))
977 })?;
978
979 let last_offset = offsets[self.len].to_usize().ok_or_else(|| {
980 ArrowError::InvalidArgumentError(format!(
981 "Error converting offset[{}] ({}) to usize for {}",
982 self.len, offsets[self.len], self.data_type
983 ))
984 })?;
985
986 if first_offset > values_length {
987 return Err(ArrowError::InvalidArgumentError(format!(
988 "First offset {} of {} is larger than values length {}",
989 first_offset, self.data_type, values_length,
990 )));
991 }
992
993 if last_offset > values_length {
994 return Err(ArrowError::InvalidArgumentError(format!(
995 "Last offset {} of {} is larger than values length {}",
996 last_offset, self.data_type, values_length,
997 )));
998 }
999
1000 if first_offset > last_offset {
1001 return Err(ArrowError::InvalidArgumentError(format!(
1002 "First offset {} in {} is smaller than last offset {}",
1003 first_offset, self.data_type, last_offset,
1004 )));
1005 }
1006
1007 Ok(())
1008 }
1009
1010 fn validate_offsets_and_sizes<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
1013 &self,
1014 values_length: usize,
1015 ) -> Result<(), ArrowError> {
1016 let offsets: &[T] = self.typed_buffer(0, self.len)?;
1017 let sizes: &[T] = self.typed_buffer(1, self.len)?;
1018 if offsets.len() != sizes.len() {
1019 return Err(ArrowError::ComputeError(format!(
1020 "ListView offsets len {} does not match sizes len {}",
1021 offsets.len(),
1022 sizes.len()
1023 )));
1024 }
1025
1026 for i in 0..sizes.len() {
1027 let size = sizes[i].to_usize().ok_or_else(|| {
1028 ArrowError::InvalidArgumentError(format!(
1029 "Error converting size[{}] ({}) to usize for {}",
1030 i, sizes[i], self.data_type
1031 ))
1032 })?;
1033 let offset = offsets[i].to_usize().ok_or_else(|| {
1034 ArrowError::InvalidArgumentError(format!(
1035 "Error converting offset[{}] ({}) to usize for {}",
1036 i, offsets[i], self.data_type
1037 ))
1038 })?;
1039 if size
1040 .checked_add(offset)
1041 .expect("Offset and size have exceeded the usize boundary")
1042 > values_length
1043 {
1044 return Err(ArrowError::InvalidArgumentError(format!(
1045 "Size {} at index {} is larger than the remaining values for {}",
1046 size, i, self.data_type
1047 )));
1048 }
1049 }
1050 Ok(())
1051 }
1052
1053 fn validate_child_data(&self) -> Result<(), ArrowError> {
1055 match &self.data_type {
1056 DataType::List(field) | DataType::Map(field, _) => {
1057 let values_data = self.get_single_valid_child_data(field.data_type())?;
1058 self.validate_offsets::<i32>(values_data.len)?;
1059 Ok(())
1060 }
1061 DataType::LargeList(field) => {
1062 let values_data = self.get_single_valid_child_data(field.data_type())?;
1063 self.validate_offsets::<i64>(values_data.len)?;
1064 Ok(())
1065 }
1066 DataType::ListView(field) => {
1067 let values_data = self.get_single_valid_child_data(field.data_type())?;
1068 self.validate_offsets_and_sizes::<i32>(values_data.len)?;
1069 Ok(())
1070 }
1071 DataType::LargeListView(field) => {
1072 let values_data = self.get_single_valid_child_data(field.data_type())?;
1073 self.validate_offsets_and_sizes::<i64>(values_data.len)?;
1074 Ok(())
1075 }
1076 DataType::FixedSizeList(field, list_size) => {
1077 let values_data = self.get_single_valid_child_data(field.data_type())?;
1078
1079 let list_size: usize = (*list_size).try_into().map_err(|_| {
1080 ArrowError::InvalidArgumentError(format!(
1081 "{} has a negative list_size {}",
1082 self.data_type, list_size
1083 ))
1084 })?;
1085
1086 let expected_values_len = self.len
1087 .checked_mul(list_size)
1088 .expect("integer overflow computing expected number of expected values in FixedListSize");
1089
1090 if values_data.len < expected_values_len {
1091 return Err(ArrowError::InvalidArgumentError(format!(
1092 "Values length {} is less than the length ({}) multiplied by the value size ({}) for {}",
1093 values_data.len, self.len, list_size, self.data_type
1094 )));
1095 }
1096
1097 Ok(())
1098 }
1099 DataType::Struct(fields) => {
1100 self.validate_num_child_data(fields.len())?;
1101 for (i, field) in fields.iter().enumerate() {
1102 let field_data = self.get_valid_child_data(i, field.data_type())?;
1103
1104 if field_data.len < self.len {
1106 return Err(ArrowError::InvalidArgumentError(format!(
1107 "{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
1108 self.data_type,
1109 i,
1110 field.name(),
1111 field_data.len,
1112 self.len
1113 )));
1114 }
1115 }
1116 Ok(())
1117 }
1118 DataType::RunEndEncoded(run_ends_field, values_field) => {
1119 self.validate_num_child_data(2)?;
1120 let run_ends_data = self.get_valid_child_data(0, run_ends_field.data_type())?;
1121 let values_data = self.get_valid_child_data(1, values_field.data_type())?;
1122 if run_ends_data.len != values_data.len {
1123 return Err(ArrowError::InvalidArgumentError(format!(
1124 "The run_ends array length should be the same as values array length. Run_ends array length is {}, values array length is {}",
1125 run_ends_data.len, values_data.len
1126 )));
1127 }
1128 if run_ends_data.nulls.is_some() {
1129 return Err(ArrowError::InvalidArgumentError(
1130 "Found null values in run_ends array. The run_ends array should not have null values.".to_string(),
1131 ));
1132 }
1133 Ok(())
1134 }
1135 DataType::Union(fields, mode) => {
1136 self.validate_num_child_data(fields.len())?;
1137
1138 for (i, (_, field)) in fields.iter().enumerate() {
1139 let field_data = self.get_valid_child_data(i, field.data_type())?;
1140
1141 if mode == &UnionMode::Sparse {
1142 let len_plus_offset =
1143 checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1144 if field_data.len < len_plus_offset {
1145 return Err(ArrowError::InvalidArgumentError(format!(
1146 "Sparse union child array #{} has length smaller than expected for union array ({} < {})",
1147 i, field_data.len, len_plus_offset
1148 )));
1149 }
1150 }
1151 }
1152 Ok(())
1153 }
1154 DataType::Dictionary(_key_type, value_type) => {
1155 self.get_single_valid_child_data(value_type)?;
1156 Ok(())
1157 }
1158 _ => {
1159 if !self.child_data.is_empty() {
1161 return Err(ArrowError::InvalidArgumentError(format!(
1162 "Expected no child arrays for type {} but got {}",
1163 self.data_type,
1164 self.child_data.len()
1165 )));
1166 }
1167 Ok(())
1168 }
1169 }
1170 }
1171
1172 fn get_single_valid_child_data(
1176 &self,
1177 expected_type: &DataType,
1178 ) -> Result<&ArrayData, ArrowError> {
1179 self.validate_num_child_data(1)?;
1180 self.get_valid_child_data(0, expected_type)
1181 }
1182
1183 fn validate_num_child_data(&self, expected_len: usize) -> Result<(), ArrowError> {
1185 if self.child_data.len() != expected_len {
1186 Err(ArrowError::InvalidArgumentError(format!(
1187 "Value data for {} should contain {} child data array(s), had {}",
1188 self.data_type,
1189 expected_len,
1190 self.child_data.len()
1191 )))
1192 } else {
1193 Ok(())
1194 }
1195 }
1196
1197 fn get_valid_child_data(
1200 &self,
1201 i: usize,
1202 expected_type: &DataType,
1203 ) -> Result<&ArrayData, ArrowError> {
1204 let values_data = self.child_data.get(i).ok_or_else(|| {
1205 ArrowError::InvalidArgumentError(format!(
1206 "{} did not have enough child arrays. Expected at least {} but had only {}",
1207 self.data_type,
1208 i + 1,
1209 self.child_data.len()
1210 ))
1211 })?;
1212
1213 if expected_type != &values_data.data_type {
1214 return Err(ArrowError::InvalidArgumentError(format!(
1215 "Child type mismatch for {}. Expected {} but child data had {}",
1216 self.data_type, expected_type, values_data.data_type
1217 )));
1218 }
1219
1220 values_data.validate()?;
1221 Ok(values_data)
1222 }
1223
1224 pub fn validate_data(&self) -> Result<(), ArrowError> {
1240 self.validate()?;
1241
1242 self.validate_nulls()?;
1243 self.validate_values()?;
1244 Ok(())
1245 }
1246
1247 pub fn validate_full(&self) -> Result<(), ArrowError> {
1252 self.validate_data()?;
1253 self.child_data
1255 .iter()
1256 .enumerate()
1257 .try_for_each(|(i, child_data)| {
1258 child_data.validate_full().map_err(|e| {
1259 ArrowError::InvalidArgumentError(format!(
1260 "{} child #{} invalid: {}",
1261 self.data_type, i, e
1262 ))
1263 })
1264 })?;
1265 Ok(())
1266 }
1267
1268 pub fn validate_nulls(&self) -> Result<(), ArrowError> {
1278 if let Some(nulls) = &self.nulls {
1279 let actual = nulls.len() - nulls.inner().count_set_bits();
1280 if actual != nulls.null_count() {
1281 return Err(ArrowError::InvalidArgumentError(format!(
1282 "null_count value ({}) doesn't match actual number of nulls in array ({})",
1283 nulls.null_count(),
1284 actual
1285 )));
1286 }
1287 }
1288
1289 match &self.data_type {
1294 DataType::List(f) | DataType::LargeList(f) | DataType::Map(f, _) => {
1295 if !f.is_nullable() {
1296 self.validate_non_nullable(None, &self.child_data[0])?
1297 }
1298 }
1299 DataType::FixedSizeList(field, len) => {
1300 let child = &self.child_data[0];
1301 if !field.is_nullable() {
1302 match &self.nulls {
1303 Some(nulls) => {
1304 let element_len = *len as usize;
1305 let expanded = nulls.expand(element_len);
1306 self.validate_non_nullable(Some(&expanded), child)?;
1307 }
1308 None => self.validate_non_nullable(None, child)?,
1309 }
1310 }
1311 }
1312 DataType::Struct(fields) => {
1313 for (field, child) in fields.iter().zip(&self.child_data) {
1314 if !field.is_nullable() {
1315 self.validate_non_nullable(self.nulls(), child)?
1316 }
1317 }
1318 }
1319 _ => {}
1320 }
1321
1322 Ok(())
1323 }
1324
1325 fn validate_non_nullable(
1327 &self,
1328 mask: Option<&NullBuffer>,
1329 child: &ArrayData,
1330 ) -> Result<(), ArrowError> {
1331 let mask = match mask {
1332 Some(mask) => mask,
1333 None => {
1334 return match child.null_count() {
1335 0 => Ok(()),
1336 _ => Err(ArrowError::InvalidArgumentError(format!(
1337 "non-nullable child of type {} contains nulls not present in parent {}",
1338 child.data_type, self.data_type
1339 ))),
1340 };
1341 }
1342 };
1343
1344 match child.nulls() {
1345 Some(nulls) if !mask.contains(nulls) => Err(ArrowError::InvalidArgumentError(format!(
1346 "non-nullable child of type {} contains nulls not present in parent",
1347 child.data_type
1348 ))),
1349 _ => Ok(()),
1350 }
1351 }
1352
1353 pub fn validate_values(&self) -> Result<(), ArrowError> {
1359 match &self.data_type {
1360 DataType::Utf8 => self.validate_utf8::<i32>(),
1361 DataType::LargeUtf8 => self.validate_utf8::<i64>(),
1362 DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
1363 DataType::LargeBinary => self.validate_offsets_full::<i64>(self.buffers[1].len()),
1364 DataType::BinaryView => {
1365 let views = self.typed_buffer::<u128>(0, self.len)?;
1366 validate_binary_view(views, &self.buffers[1..])
1367 }
1368 DataType::Utf8View => {
1369 let views = self.typed_buffer::<u128>(0, self.len)?;
1370 validate_string_view(views, &self.buffers[1..])
1371 }
1372 DataType::List(_) | DataType::Map(_, _) => {
1373 let child = &self.child_data[0];
1374 self.validate_offsets_full::<i32>(child.len)
1375 }
1376 DataType::LargeList(_) => {
1377 let child = &self.child_data[0];
1378 self.validate_offsets_full::<i64>(child.len)
1379 }
1380 DataType::Union(_, _) => {
1381 Ok(())
1387 }
1388 DataType::Dictionary(key_type, _value_type) => {
1389 let dictionary_length: i64 = self.child_data[0].len.try_into().unwrap();
1390 let max_value = dictionary_length - 1;
1391 match key_type.as_ref() {
1392 DataType::UInt8 => self.check_bounds::<u8>(max_value),
1393 DataType::UInt16 => self.check_bounds::<u16>(max_value),
1394 DataType::UInt32 => self.check_bounds::<u32>(max_value),
1395 DataType::UInt64 => self.check_bounds::<u64>(max_value),
1396 DataType::Int8 => self.check_bounds::<i8>(max_value),
1397 DataType::Int16 => self.check_bounds::<i16>(max_value),
1398 DataType::Int32 => self.check_bounds::<i32>(max_value),
1399 DataType::Int64 => self.check_bounds::<i64>(max_value),
1400 _ => unreachable!(),
1401 }
1402 }
1403 DataType::RunEndEncoded(run_ends, _values) => {
1404 let run_ends_data = self.child_data()[0].clone();
1405 match run_ends.data_type() {
1406 DataType::Int16 => run_ends_data.check_run_ends::<i16>(),
1407 DataType::Int32 => run_ends_data.check_run_ends::<i32>(),
1408 DataType::Int64 => run_ends_data.check_run_ends::<i64>(),
1409 _ => unreachable!(),
1410 }
1411 }
1412 _ => {
1413 Ok(())
1415 }
1416 }
1417 }
1418
1419 fn validate_each_offset<T, V>(&self, offset_limit: usize, validate: V) -> Result<(), ArrowError>
1430 where
1431 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1432 V: Fn(usize, Range<usize>) -> Result<(), ArrowError>,
1433 {
1434 self.typed_offsets::<T>()?
1435 .iter()
1436 .enumerate()
1437 .map(|(i, x)| {
1438 let r = x.to_usize().ok_or_else(|| {
1440 ArrowError::InvalidArgumentError(format!(
1441 "Offset invariant failure: Could not convert offset {x} to usize at position {i}"))}
1442 );
1443 match r {
1445 Ok(n) if n <= offset_limit => Ok((i, n)),
1446 Ok(_) => Err(ArrowError::InvalidArgumentError(format!(
1447 "Offset invariant failure: offset at position {i} out of bounds: {x} > {offset_limit}"))
1448 ),
1449 Err(e) => Err(e),
1450 }
1451 })
1452 .scan(0_usize, |start, end| {
1453 match end {
1455 Ok((i, end)) if *start <= end => {
1456 let range = Some(Ok((i, *start..end)));
1457 *start = end;
1458 range
1459 }
1460 Ok((i, end)) => Some(Err(ArrowError::InvalidArgumentError(format!(
1461 "Offset invariant failure: non-monotonic offset at slot {}: {} > {}",
1462 i - 1, start, end))
1463 )),
1464 Err(err) => Some(Err(err)),
1465 }
1466 })
1467 .skip(1) .try_for_each(|res: Result<(usize, Range<usize>), ArrowError>| {
1469 let (item_index, range) = res?;
1470 validate(item_index-1, range)
1471 })
1472 }
1473
1474 fn validate_utf8<T>(&self) -> Result<(), ArrowError>
1477 where
1478 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1479 {
1480 let values_buffer = &self.buffers[1].as_slice();
1481 if let Ok(values_str) = std::str::from_utf8(values_buffer) {
1482 self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
1484 if !values_str.is_char_boundary(range.start)
1485 || !values_str.is_char_boundary(range.end)
1486 {
1487 return Err(ArrowError::InvalidArgumentError(format!(
1488 "incomplete utf-8 byte sequence from index {string_index}"
1489 )));
1490 }
1491 Ok(())
1492 })
1493 } else {
1494 self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
1496 std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
1497 ArrowError::InvalidArgumentError(format!(
1498 "Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
1499 ))
1500 })?;
1501 Ok(())
1502 })
1503 }
1504 }
1505
1506 fn validate_offsets_full<T>(&self, offset_limit: usize) -> Result<(), ArrowError>
1509 where
1510 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1511 {
1512 self.validate_each_offset::<T, _>(offset_limit, |_string_index, _range| {
1513 Ok(())
1516 })
1517 }
1518
1519 fn check_bounds<T>(&self, max_value: i64) -> Result<(), ArrowError>
1522 where
1523 T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
1524 {
1525 let required_len = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1526 let buffer = &self.buffers[0];
1527
1528 assert!(buffer.len() / mem::size_of::<T>() >= required_len);
1531
1532 let indexes: &[T] = &buffer.typed_data::<T>()[self.offset..required_len];
1534
1535 indexes.iter().enumerate().try_for_each(|(i, &dict_index)| {
1536 if self.is_null(i) {
1538 return Ok(());
1539 }
1540 let dict_index: i64 = dict_index.try_into().map_err(|_| {
1541 ArrowError::InvalidArgumentError(format!(
1542 "Value at position {i} out of bounds: {dict_index} (can not convert to i64)"
1543 ))
1544 })?;
1545
1546 if dict_index < 0 || dict_index > max_value {
1547 return Err(ArrowError::InvalidArgumentError(format!(
1548 "Value at position {i} out of bounds: {dict_index} (should be in [0, {max_value}])"
1549 )));
1550 }
1551 Ok(())
1552 })
1553 }
1554
1555 fn check_run_ends<T>(&self) -> Result<(), ArrowError>
1557 where
1558 T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
1559 {
1560 let values = self.typed_buffer::<T>(0, self.len)?;
1561 let mut prev_value: i64 = 0_i64;
1562 values.iter().enumerate().try_for_each(|(ix, &inp_value)| {
1563 let value: i64 = inp_value.try_into().map_err(|_| {
1564 ArrowError::InvalidArgumentError(format!(
1565 "Value at position {ix} out of bounds: {inp_value} (can not convert to i64)"
1566 ))
1567 })?;
1568 if value <= 0_i64 {
1569 return Err(ArrowError::InvalidArgumentError(format!(
1570 "The values in run_ends array should be strictly positive. Found value {value} at index {ix} that does not match the criteria."
1571 )));
1572 }
1573 if ix > 0 && value <= prev_value {
1574 return Err(ArrowError::InvalidArgumentError(format!(
1575 "The values in run_ends array should be strictly increasing. Found value {value} at index {ix} with previous value {prev_value} that does not match the criteria."
1576 )));
1577 }
1578
1579 prev_value = value;
1580 Ok(())
1581 })?;
1582
1583 let len_plus_offset = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1584 if prev_value.as_usize() < len_plus_offset {
1585 return Err(ArrowError::InvalidArgumentError(format!(
1586 "The offset + length of array should be less or equal to last value in the run_ends array. The last value of run_ends array is {prev_value} and offset + length of array is {}.",
1587 len_plus_offset
1588 )));
1589 }
1590 Ok(())
1591 }
1592
1593 pub fn ptr_eq(&self, other: &Self) -> bool {
1597 if self.offset != other.offset
1598 || self.len != other.len
1599 || self.data_type != other.data_type
1600 || self.buffers.len() != other.buffers.len()
1601 || self.child_data.len() != other.child_data.len()
1602 {
1603 return false;
1604 }
1605
1606 match (&self.nulls, &other.nulls) {
1607 (Some(a), Some(b)) if !a.inner().ptr_eq(b.inner()) => return false,
1608 (Some(_), None) | (None, Some(_)) => return false,
1609 _ => {}
1610 };
1611
1612 if !self
1613 .buffers
1614 .iter()
1615 .zip(other.buffers.iter())
1616 .all(|(a, b)| a.as_ptr() == b.as_ptr())
1617 {
1618 return false;
1619 }
1620
1621 self.child_data
1622 .iter()
1623 .zip(other.child_data.iter())
1624 .all(|(a, b)| a.ptr_eq(b))
1625 }
1626
1627 pub fn into_builder(self) -> ArrayDataBuilder {
1629 self.into()
1630 }
1631}
1632
1633pub fn layout(data_type: &DataType) -> DataTypeLayout {
1636 use arrow_schema::IntervalUnit::*;
1639
1640 match data_type {
1641 DataType::Null => DataTypeLayout {
1642 buffers: vec![],
1643 can_contain_null_mask: false,
1644 variadic: false,
1645 },
1646 DataType::Boolean => DataTypeLayout {
1647 buffers: vec![BufferSpec::BitMap],
1648 can_contain_null_mask: true,
1649 variadic: false,
1650 },
1651 DataType::Int8 => DataTypeLayout::new_fixed_width::<i8>(),
1652 DataType::Int16 => DataTypeLayout::new_fixed_width::<i16>(),
1653 DataType::Int32 => DataTypeLayout::new_fixed_width::<i32>(),
1654 DataType::Int64 => DataTypeLayout::new_fixed_width::<i64>(),
1655 DataType::UInt8 => DataTypeLayout::new_fixed_width::<u8>(),
1656 DataType::UInt16 => DataTypeLayout::new_fixed_width::<u16>(),
1657 DataType::UInt32 => DataTypeLayout::new_fixed_width::<u32>(),
1658 DataType::UInt64 => DataTypeLayout::new_fixed_width::<u64>(),
1659 DataType::Float16 => DataTypeLayout::new_fixed_width::<half::f16>(),
1660 DataType::Float32 => DataTypeLayout::new_fixed_width::<f32>(),
1661 DataType::Float64 => DataTypeLayout::new_fixed_width::<f64>(),
1662 DataType::Timestamp(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
1663 DataType::Date32 => DataTypeLayout::new_fixed_width::<i32>(),
1664 DataType::Date64 => DataTypeLayout::new_fixed_width::<i64>(),
1665 DataType::Time32(_) => DataTypeLayout::new_fixed_width::<i32>(),
1666 DataType::Time64(_) => DataTypeLayout::new_fixed_width::<i64>(),
1667 DataType::Interval(YearMonth) => DataTypeLayout::new_fixed_width::<i32>(),
1668 DataType::Interval(DayTime) => DataTypeLayout::new_fixed_width::<IntervalDayTime>(),
1669 DataType::Interval(MonthDayNano) => {
1670 DataTypeLayout::new_fixed_width::<IntervalMonthDayNano>()
1671 }
1672 DataType::Duration(_) => DataTypeLayout::new_fixed_width::<i64>(),
1673 DataType::Decimal32(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
1674 DataType::Decimal64(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
1675 DataType::Decimal128(_, _) => DataTypeLayout::new_fixed_width::<i128>(),
1676 DataType::Decimal256(_, _) => DataTypeLayout::new_fixed_width::<i256>(),
1677 DataType::FixedSizeBinary(size) => {
1678 let spec = BufferSpec::FixedWidth {
1679 byte_width: (*size).try_into().unwrap(),
1680 alignment: mem::align_of::<u8>(),
1681 };
1682 DataTypeLayout {
1683 buffers: vec![spec],
1684 can_contain_null_mask: true,
1685 variadic: false,
1686 }
1687 }
1688 DataType::Binary => DataTypeLayout::new_binary::<i32>(),
1689 DataType::LargeBinary => DataTypeLayout::new_binary::<i64>(),
1690 DataType::Utf8 => DataTypeLayout::new_binary::<i32>(),
1691 DataType::LargeUtf8 => DataTypeLayout::new_binary::<i64>(),
1692 DataType::BinaryView | DataType::Utf8View => DataTypeLayout::new_view(),
1693 DataType::FixedSizeList(_, _) => DataTypeLayout::new_nullable_empty(), DataType::List(_) => DataTypeLayout::new_fixed_width::<i32>(),
1695 DataType::ListView(_) => DataTypeLayout::new_list_view::<i32>(),
1696 DataType::LargeListView(_) => DataTypeLayout::new_list_view::<i64>(),
1697 DataType::LargeList(_) => DataTypeLayout::new_fixed_width::<i64>(),
1698 DataType::Map(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
1699 DataType::Struct(_) => DataTypeLayout::new_nullable_empty(), DataType::RunEndEncoded(_, _) => DataTypeLayout::new_empty(), DataType::Union(_, mode) => {
1702 let type_ids = BufferSpec::FixedWidth {
1703 byte_width: mem::size_of::<i8>(),
1704 alignment: mem::align_of::<i8>(),
1705 };
1706
1707 DataTypeLayout {
1708 buffers: match mode {
1709 UnionMode::Sparse => {
1710 vec![type_ids]
1711 }
1712 UnionMode::Dense => {
1713 vec![
1714 type_ids,
1715 BufferSpec::FixedWidth {
1716 byte_width: mem::size_of::<i32>(),
1717 alignment: mem::align_of::<i32>(),
1718 },
1719 ]
1720 }
1721 },
1722 can_contain_null_mask: false,
1723 variadic: false,
1724 }
1725 }
1726 DataType::Dictionary(key_type, _value_type) => layout(key_type),
1727 }
1728}
1729
1730#[derive(Debug, PartialEq, Eq)]
1732pub struct DataTypeLayout {
1734 pub buffers: Vec<BufferSpec>,
1736
1737 pub can_contain_null_mask: bool,
1739
1740 pub variadic: bool,
1744}
1745
1746impl DataTypeLayout {
1747 pub fn new_fixed_width<T>() -> Self {
1749 Self {
1750 buffers: vec![BufferSpec::FixedWidth {
1751 byte_width: mem::size_of::<T>(),
1752 alignment: mem::align_of::<T>(),
1753 }],
1754 can_contain_null_mask: true,
1755 variadic: false,
1756 }
1757 }
1758
1759 pub fn new_nullable_empty() -> Self {
1762 Self {
1763 buffers: vec![],
1764 can_contain_null_mask: true,
1765 variadic: false,
1766 }
1767 }
1768
1769 pub fn new_empty() -> Self {
1772 Self {
1773 buffers: vec![],
1774 can_contain_null_mask: false,
1775 variadic: false,
1776 }
1777 }
1778
1779 pub fn new_binary<T>() -> Self {
1783 Self {
1784 buffers: vec![
1785 BufferSpec::FixedWidth {
1787 byte_width: mem::size_of::<T>(),
1788 alignment: mem::align_of::<T>(),
1789 },
1790 BufferSpec::VariableWidth,
1792 ],
1793 can_contain_null_mask: true,
1794 variadic: false,
1795 }
1796 }
1797
1798 pub fn new_view() -> Self {
1800 Self {
1801 buffers: vec![BufferSpec::FixedWidth {
1802 byte_width: mem::size_of::<u128>(),
1803 alignment: mem::align_of::<u128>(),
1804 }],
1805 can_contain_null_mask: true,
1806 variadic: true,
1807 }
1808 }
1809
1810 pub fn new_list_view<T>() -> Self {
1812 Self {
1813 buffers: vec![
1814 BufferSpec::FixedWidth {
1815 byte_width: mem::size_of::<T>(),
1816 alignment: mem::align_of::<T>(),
1817 },
1818 BufferSpec::FixedWidth {
1819 byte_width: mem::size_of::<T>(),
1820 alignment: mem::align_of::<T>(),
1821 },
1822 ],
1823 can_contain_null_mask: true,
1824 variadic: false,
1825 }
1826 }
1827}
1828
1829#[derive(Debug, PartialEq, Eq)]
1831pub enum BufferSpec {
1832 FixedWidth {
1843 byte_width: usize,
1845 alignment: usize,
1847 },
1848 VariableWidth,
1850 BitMap,
1856 #[allow(dead_code)]
1859 AlwaysNull,
1860}
1861
1862impl PartialEq for ArrayData {
1863 fn eq(&self, other: &Self) -> bool {
1864 equal::equal(self, other)
1865 }
1866}
1867
1868#[derive(Debug, Clone)]
1887#[doc(hidden)]
1888pub struct UnsafeFlag(bool);
1889
1890impl UnsafeFlag {
1891 #[inline]
1895 pub const fn new() -> Self {
1896 Self(false)
1897 }
1898
1899 #[inline]
1909 pub unsafe fn set(&mut self, val: bool) {
1910 self.0 = val;
1911 }
1912
1913 #[inline]
1915 pub fn get(&self) -> bool {
1916 self.0
1917 }
1918}
1919
1920impl Default for UnsafeFlag {
1922 fn default() -> Self {
1923 Self::new()
1924 }
1925}
1926
1927#[derive(Debug)]
1929pub struct ArrayDataBuilder {
1930 data_type: DataType,
1931 len: usize,
1932 null_count: Option<usize>,
1933 null_bit_buffer: Option<Buffer>,
1934 nulls: Option<NullBuffer>,
1935 offset: usize,
1936 buffers: Vec<Buffer>,
1937 child_data: Vec<ArrayData>,
1938 align_buffers: bool,
1942 skip_validation: UnsafeFlag,
1952}
1953
1954impl ArrayDataBuilder {
1955 #[inline]
1956 pub const fn new(data_type: DataType) -> Self {
1958 Self {
1959 data_type,
1960 len: 0,
1961 null_count: None,
1962 null_bit_buffer: None,
1963 nulls: None,
1964 offset: 0,
1965 buffers: vec![],
1966 child_data: vec![],
1967 align_buffers: false,
1968 skip_validation: UnsafeFlag::new(),
1969 }
1970 }
1971
1972 pub fn data_type(self, data_type: DataType) -> Self {
1974 Self { data_type, ..self }
1975 }
1976
1977 #[inline]
1978 #[allow(clippy::len_without_is_empty)]
1979 pub const fn len(mut self, n: usize) -> Self {
1981 self.len = n;
1982 self
1983 }
1984
1985 pub fn nulls(mut self, nulls: Option<NullBuffer>) -> Self {
1987 self.nulls = nulls;
1988 self.null_count = None;
1989 self.null_bit_buffer = None;
1990 self
1991 }
1992
1993 pub fn null_count(mut self, null_count: usize) -> Self {
1995 self.null_count = Some(null_count);
1996 self
1997 }
1998
1999 pub fn null_bit_buffer(mut self, buf: Option<Buffer>) -> Self {
2001 self.nulls = None;
2002 self.null_bit_buffer = buf;
2003 self
2004 }
2005
2006 #[inline]
2008 pub const fn offset(mut self, n: usize) -> Self {
2009 self.offset = n;
2010 self
2011 }
2012
2013 pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
2015 self.buffers = v;
2016 self
2017 }
2018
2019 pub fn add_buffer(mut self, b: Buffer) -> Self {
2021 self.buffers.push(b);
2022 self
2023 }
2024
2025 pub fn add_buffers<I: IntoIterator<Item = Buffer>>(mut self, bs: I) -> Self {
2027 self.buffers.extend(bs);
2028 self
2029 }
2030
2031 pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
2033 self.child_data = v;
2034 self
2035 }
2036
2037 pub fn add_child_data(mut self, r: ArrayData) -> Self {
2039 self.child_data.push(r);
2040 self
2041 }
2042
2043 pub unsafe fn build_unchecked(self) -> ArrayData {
2059 unsafe { self.skip_validation(true) }.build().unwrap()
2060 }
2061
2062 pub fn build(self) -> Result<ArrayData, ArrowError> {
2071 let Self {
2072 data_type,
2073 len,
2074 null_count,
2075 null_bit_buffer,
2076 nulls,
2077 offset,
2078 buffers,
2079 child_data,
2080 align_buffers,
2081 skip_validation,
2082 } = self;
2083
2084 let nulls = nulls
2085 .or_else(|| {
2086 let buffer = null_bit_buffer?;
2087 let buffer = BooleanBuffer::new(buffer, offset, len);
2088 Some(match null_count {
2089 Some(n) => {
2090 unsafe { NullBuffer::new_unchecked(buffer, n) }
2092 }
2093 None => NullBuffer::new(buffer),
2094 })
2095 })
2096 .filter(|b| b.null_count() != 0);
2097
2098 let mut data = ArrayData {
2099 data_type,
2100 len,
2101 offset,
2102 buffers,
2103 child_data,
2104 nulls,
2105 };
2106
2107 if align_buffers {
2108 data.align_buffers();
2109 }
2110
2111 if !skip_validation.get() || cfg!(feature = "force_validate") {
2113 data.validate_data()?;
2114 }
2115 Ok(data)
2116 }
2117
2118 #[deprecated(since = "54.1.0", note = "Use ArrayData::align_buffers instead")]
2120 pub fn build_aligned(self) -> Result<ArrayData, ArrowError> {
2121 self.align_buffers(true).build()
2122 }
2123
2124 pub fn align_buffers(mut self, align_buffers: bool) -> Self {
2140 self.align_buffers = align_buffers;
2141 self
2142 }
2143
2144 pub unsafe fn skip_validation(mut self, skip_validation: bool) -> Self {
2158 unsafe {
2159 self.skip_validation.set(skip_validation);
2160 }
2161 self
2162 }
2163}
2164
2165impl From<ArrayData> for ArrayDataBuilder {
2166 fn from(d: ArrayData) -> Self {
2167 Self {
2168 data_type: d.data_type,
2169 len: d.len,
2170 offset: d.offset,
2171 buffers: d.buffers,
2172 child_data: d.child_data,
2173 nulls: d.nulls,
2174 null_bit_buffer: None,
2175 null_count: None,
2176 align_buffers: false,
2177 skip_validation: UnsafeFlag::new(),
2178 }
2179 }
2180}
2181
2182#[cfg(test)]
2183mod tests {
2184 use super::*;
2185 use arrow_schema::{Field, Fields};
2186
2187 fn make_i32_buffer(n: usize) -> Buffer {
2191 Buffer::from_slice_ref(vec![42i32; n])
2192 }
2193
2194 fn make_f32_buffer(n: usize) -> Buffer {
2196 Buffer::from_slice_ref(vec![42f32; n])
2197 }
2198
2199 #[test]
2200 fn test_builder() {
2201 let v = (0..25).collect::<Vec<i32>>();
2203 let b1 = Buffer::from_slice_ref(&v);
2204 let arr_data = ArrayData::builder(DataType::Int32)
2205 .len(20)
2206 .offset(5)
2207 .add_buffer(b1)
2208 .null_bit_buffer(Some(Buffer::from([
2209 0b01011111, 0b10110101, 0b01100011, 0b00011110,
2210 ])))
2211 .build()
2212 .unwrap();
2213
2214 assert_eq!(20, arr_data.len());
2215 assert_eq!(10, arr_data.null_count());
2216 assert_eq!(5, arr_data.offset());
2217 assert_eq!(1, arr_data.buffers().len());
2218 assert_eq!(
2219 Buffer::from_slice_ref(&v).as_slice(),
2220 arr_data.buffers()[0].as_slice()
2221 );
2222 }
2223
2224 #[test]
2225 fn test_builder_with_child_data() {
2226 let child_arr_data = ArrayData::try_new(
2227 DataType::Int32,
2228 5,
2229 None,
2230 0,
2231 vec![Buffer::from_slice_ref([1i32, 2, 3, 4, 5])],
2232 vec![],
2233 )
2234 .unwrap();
2235
2236 let field = Arc::new(Field::new("x", DataType::Int32, true));
2237 let data_type = DataType::Struct(vec![field].into());
2238
2239 let arr_data = ArrayData::builder(data_type)
2240 .len(5)
2241 .offset(0)
2242 .add_child_data(child_arr_data.clone())
2243 .build()
2244 .unwrap();
2245
2246 assert_eq!(5, arr_data.len());
2247 assert_eq!(1, arr_data.child_data().len());
2248 assert_eq!(child_arr_data, arr_data.child_data()[0]);
2249 }
2250
2251 #[test]
2252 fn test_null_count() {
2253 let mut bit_v: [u8; 2] = [0; 2];
2254 bit_util::set_bit(&mut bit_v, 0);
2255 bit_util::set_bit(&mut bit_v, 3);
2256 bit_util::set_bit(&mut bit_v, 10);
2257 let arr_data = ArrayData::builder(DataType::Int32)
2258 .len(16)
2259 .add_buffer(make_i32_buffer(16))
2260 .null_bit_buffer(Some(Buffer::from(bit_v)))
2261 .build()
2262 .unwrap();
2263 assert_eq!(13, arr_data.null_count());
2264
2265 let mut bit_v: [u8; 2] = [0; 2];
2267 bit_util::set_bit(&mut bit_v, 0);
2268 bit_util::set_bit(&mut bit_v, 3);
2269 bit_util::set_bit(&mut bit_v, 10);
2270 let arr_data = ArrayData::builder(DataType::Int32)
2271 .len(12)
2272 .offset(2)
2273 .add_buffer(make_i32_buffer(14)) .null_bit_buffer(Some(Buffer::from(bit_v)))
2275 .build()
2276 .unwrap();
2277 assert_eq!(10, arr_data.null_count());
2278 }
2279
2280 #[test]
2281 fn test_null_buffer_ref() {
2282 let mut bit_v: [u8; 2] = [0; 2];
2283 bit_util::set_bit(&mut bit_v, 0);
2284 bit_util::set_bit(&mut bit_v, 3);
2285 bit_util::set_bit(&mut bit_v, 10);
2286 let arr_data = ArrayData::builder(DataType::Int32)
2287 .len(16)
2288 .add_buffer(make_i32_buffer(16))
2289 .null_bit_buffer(Some(Buffer::from(bit_v)))
2290 .build()
2291 .unwrap();
2292 assert!(arr_data.nulls().is_some());
2293 assert_eq!(&bit_v, arr_data.nulls().unwrap().validity());
2294 }
2295
2296 #[test]
2297 fn test_slice() {
2298 let mut bit_v: [u8; 2] = [0; 2];
2299 bit_util::set_bit(&mut bit_v, 0);
2300 bit_util::set_bit(&mut bit_v, 3);
2301 bit_util::set_bit(&mut bit_v, 10);
2302 let data = ArrayData::builder(DataType::Int32)
2303 .len(16)
2304 .add_buffer(make_i32_buffer(16))
2305 .null_bit_buffer(Some(Buffer::from(bit_v)))
2306 .build()
2307 .unwrap();
2308 let new_data = data.slice(1, 15);
2309 assert_eq!(data.len() - 1, new_data.len());
2310 assert_eq!(1, new_data.offset());
2311 assert_eq!(data.null_count(), new_data.null_count());
2312
2313 let new_data = new_data.slice(1, 14);
2315 assert_eq!(data.len() - 2, new_data.len());
2316 assert_eq!(2, new_data.offset());
2317 assert_eq!(data.null_count() - 1, new_data.null_count());
2318 }
2319
2320 #[test]
2321 #[should_panic(expected = "offset + length overflow")]
2322 fn test_slice_panics_on_offset_length_overflow() {
2323 let data = ArrayData::builder(DataType::Int32)
2324 .len(4)
2325 .add_buffer(make_i32_buffer(4))
2326 .build()
2327 .unwrap();
2328 let sliced = data.slice(1, 3);
2329
2330 sliced.slice(1, usize::MAX);
2331 }
2332
2333 #[test]
2334 fn test_typed_offsets_length_overflow() {
2335 let data = ArrayData {
2336 data_type: DataType::Binary,
2337 len: usize::MAX,
2338 offset: 0,
2339 buffers: vec![Buffer::from_slice_ref([0_i32])],
2340 child_data: vec![],
2341 nulls: None,
2342 };
2343 let err = data.typed_offsets::<i32>().unwrap_err();
2344
2345 assert_eq!(
2346 err.to_string(),
2347 format!(
2348 "Invalid argument error: Length {} with offset 1 overflows usize for Binary",
2349 usize::MAX
2350 )
2351 );
2352 }
2353
2354 #[test]
2355 fn test_validate_typed_buffer_length_overflow() {
2356 let data = ArrayData {
2357 data_type: DataType::Binary,
2358 len: 0,
2359 offset: 2,
2360 buffers: vec![Buffer::from_slice_ref([0_i32])],
2361 child_data: vec![],
2362 nulls: None,
2363 };
2364 let err = data.typed_buffer::<i32>(0, usize::MAX).unwrap_err();
2365
2366 assert_eq!(
2367 err.to_string(),
2368 format!(
2369 "Invalid argument error: Length {} with offset 2 overflows usize for Binary",
2370 usize::MAX
2371 )
2372 );
2373 }
2374
2375 fn try_new_binary_length_offset_overflow() -> Result<ArrayData, ArrowError> {
2377 ArrayData::try_new(
2378 DataType::Binary,
2379 usize::MAX,
2380 None,
2381 1,
2382 vec![
2383 Buffer::from_slice_ref([0_i32]),
2384 Buffer::from_iter(std::iter::empty::<u8>()),
2385 ],
2386 vec![],
2387 )
2388 }
2389
2390 #[cfg(not(feature = "force_validate"))]
2391 #[test]
2392 fn test_try_new_length_offset_overflow() {
2393 let err = try_new_binary_length_offset_overflow().unwrap_err();
2394
2395 assert_eq!(
2396 err.to_string(),
2397 format!(
2398 "Invalid argument error: Length {} with offset 1 overflows usize for Binary",
2399 usize::MAX
2400 )
2401 );
2402 }
2403
2404 #[cfg(feature = "force_validate")]
2405 #[test]
2406 #[should_panic(
2407 expected = "Length 18446744073709551615 with offset 1 overflows usize for Binary"
2408 )]
2409 fn test_try_new_length_offset_overflow_force_validate() {
2410 try_new_binary_length_offset_overflow().unwrap();
2411 }
2412
2413 #[test]
2414 fn test_equality() {
2415 let int_data = ArrayData::builder(DataType::Int32)
2416 .len(1)
2417 .add_buffer(make_i32_buffer(1))
2418 .build()
2419 .unwrap();
2420
2421 let float_data = ArrayData::builder(DataType::Float32)
2422 .len(1)
2423 .add_buffer(make_f32_buffer(1))
2424 .build()
2425 .unwrap();
2426 assert_ne!(int_data, float_data);
2427 assert!(!int_data.ptr_eq(&float_data));
2428 assert!(int_data.ptr_eq(&int_data));
2429
2430 #[allow(clippy::redundant_clone)]
2431 let int_data_clone = int_data.clone();
2432 assert_eq!(int_data, int_data_clone);
2433 assert!(int_data.ptr_eq(&int_data_clone));
2434 assert!(int_data_clone.ptr_eq(&int_data));
2435
2436 let int_data_slice = int_data_clone.slice(1, 0);
2437 assert!(int_data_slice.ptr_eq(&int_data_slice));
2438 assert!(!int_data.ptr_eq(&int_data_slice));
2439 assert!(!int_data_slice.ptr_eq(&int_data));
2440
2441 let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
2442 let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
2443 let string_data = ArrayData::try_new(
2444 DataType::Utf8,
2445 3,
2446 Some(Buffer::from_iter(vec![true, false, true])),
2447 0,
2448 vec![offsets_buffer, data_buffer],
2449 vec![],
2450 )
2451 .unwrap();
2452
2453 assert_ne!(float_data, string_data);
2454 assert!(!float_data.ptr_eq(&string_data));
2455
2456 assert!(string_data.ptr_eq(&string_data));
2457
2458 #[allow(clippy::redundant_clone)]
2459 let string_data_cloned = string_data.clone();
2460 assert!(string_data_cloned.ptr_eq(&string_data));
2461 assert!(string_data.ptr_eq(&string_data_cloned));
2462
2463 let string_data_slice = string_data.slice(1, 2);
2464 assert!(string_data_slice.ptr_eq(&string_data_slice));
2465 assert!(!string_data_slice.ptr_eq(&string_data))
2466 }
2467
2468 #[test]
2469 fn test_slice_memory_size() {
2470 let mut bit_v: [u8; 2] = [0; 2];
2471 bit_util::set_bit(&mut bit_v, 0);
2472 bit_util::set_bit(&mut bit_v, 3);
2473 bit_util::set_bit(&mut bit_v, 10);
2474 let data = ArrayData::builder(DataType::Int32)
2475 .len(16)
2476 .add_buffer(make_i32_buffer(16))
2477 .null_bit_buffer(Some(Buffer::from(bit_v)))
2478 .build()
2479 .unwrap();
2480 let new_data = data.slice(1, 14);
2481 assert_eq!(
2482 data.get_slice_memory_size().unwrap() - 8,
2483 new_data.get_slice_memory_size().unwrap()
2484 );
2485 let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
2486 let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
2487 let string_data = ArrayData::try_new(
2488 DataType::Utf8,
2489 3,
2490 Some(Buffer::from_iter(vec![true, false, true])),
2491 0,
2492 vec![offsets_buffer, data_buffer],
2493 vec![],
2494 )
2495 .unwrap();
2496 let string_data_slice = string_data.slice(1, 2);
2497 assert_eq!(
2499 string_data.get_slice_memory_size().unwrap() - 6,
2500 string_data_slice.get_slice_memory_size().unwrap()
2501 );
2502 }
2503
2504 #[test]
2505 fn test_count_nulls() {
2506 let buffer = Buffer::from([0b00010110, 0b10011111]);
2507 let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 16));
2508 let count = count_nulls(Some(&buffer), 0, 16);
2509 assert_eq!(count, 7);
2510
2511 let count = count_nulls(Some(&buffer), 4, 8);
2512 assert_eq!(count, 3);
2513 }
2514
2515 #[test]
2516 fn test_contains_nulls() {
2517 let buffer: Buffer =
2518 MutableBuffer::from_iter([false, false, false, true, true, false]).into();
2519 let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 6));
2520 assert!(contains_nulls(Some(&buffer), 0, 6));
2521 assert!(contains_nulls(Some(&buffer), 0, 3));
2522 assert!(!contains_nulls(Some(&buffer), 3, 2));
2523 assert!(!contains_nulls(Some(&buffer), 0, 0));
2524 }
2525
2526 #[test]
2527 fn test_alignment() {
2528 let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
2529 let sliced = buffer.slice(1);
2530
2531 let mut data = ArrayData {
2532 data_type: DataType::Int32,
2533 len: 0,
2534 offset: 0,
2535 buffers: vec![buffer],
2536 child_data: vec![],
2537 nulls: None,
2538 };
2539 data.validate_full().unwrap();
2540
2541 data.buffers[0] = sliced;
2543 let err = data.validate().unwrap_err();
2544
2545 assert_eq!(
2546 err.to_string(),
2547 "Invalid argument error: Misaligned buffers[0] in array of type Int32, offset from expected alignment of 4 by 1"
2548 );
2549
2550 data.align_buffers();
2551 data.validate_full().unwrap();
2552 }
2553
2554 #[test]
2555 fn test_alignment_struct() {
2556 let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
2557 let sliced = buffer.slice(1);
2558
2559 let child_data = ArrayData {
2560 data_type: DataType::Int32,
2561 len: 0,
2562 offset: 0,
2563 buffers: vec![buffer],
2564 child_data: vec![],
2565 nulls: None,
2566 };
2567
2568 let schema = DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)]));
2569 let mut data = ArrayData {
2570 data_type: schema,
2571 len: 0,
2572 offset: 0,
2573 buffers: vec![],
2574 child_data: vec![child_data],
2575 nulls: None,
2576 };
2577 data.validate_full().unwrap();
2578
2579 data.child_data[0].buffers[0] = sliced;
2581 let err = data.validate().unwrap_err();
2582
2583 assert_eq!(
2584 err.to_string(),
2585 "Invalid argument error: Misaligned buffers[0] in array of type Int32, offset from expected alignment of 4 by 1"
2586 );
2587
2588 data.align_buffers();
2589 data.validate_full().unwrap();
2590 }
2591
2592 #[test]
2593 fn test_null_view_types() {
2594 let array_len = 32;
2595 let array = ArrayData::new_null(&DataType::BinaryView, array_len);
2596 assert_eq!(array.len(), array_len);
2597 for i in 0..array.len() {
2598 assert!(array.is_null(i));
2599 }
2600
2601 let array = ArrayData::new_null(&DataType::Utf8View, array_len);
2602 assert_eq!(array.len(), array_len);
2603 for i in 0..array.len() {
2604 assert!(array.is_null(i));
2605 }
2606
2607 let array = ArrayData::new_null(
2608 &DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
2609 array_len,
2610 );
2611 assert_eq!(array.len(), array_len);
2612 for i in 0..array.len() {
2613 assert!(array.is_null(i));
2614 }
2615
2616 let array = ArrayData::new_null(
2617 &DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
2618 array_len,
2619 );
2620 assert_eq!(array.len(), array_len);
2621 for i in 0..array.len() {
2622 assert!(array.is_null(i));
2623 }
2624 }
2625}