1use crate::bit_iterator::BitSliceIterator;
22use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
23use arrow_buffer::{
24 ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer, bit_util, i256,
25};
26use arrow_schema::{ArrowError, DataType, UnionMode};
27use std::mem;
28use std::ops::Range;
29use std::sync::Arc;
30
31use crate::{equal, validate_binary_view, validate_string_view};
32
33#[inline]
34pub(crate) fn contains_nulls(
35 null_bit_buffer: Option<&NullBuffer>,
36 offset: usize,
37 len: usize,
38) -> bool {
39 match null_bit_buffer {
40 Some(buffer) => {
41 match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len).next() {
42 Some((start, end)) => start != 0 || end != len,
43 None => len != 0, }
45 }
46 None => false, }
48}
49
50#[inline]
51pub(crate) fn count_nulls(
52 null_bit_buffer: Option<&NullBuffer>,
53 offset: usize,
54 len: usize,
55) -> usize {
56 if let Some(buf) = null_bit_buffer {
57 let buffer = buf.buffer();
58 len - buffer.count_set_bits_offset(offset + buf.offset(), len)
59 } else {
60 0
61 }
62}
63
64#[inline]
66pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuffer; 2] {
67 let empty_buffer = MutableBuffer::new(0);
68 match data_type {
69 DataType::Null => [empty_buffer, MutableBuffer::new(0)],
70 DataType::Boolean => {
71 let bytes = bit_util::ceil(capacity, 8);
72 let buffer = MutableBuffer::new(bytes);
73 [buffer, empty_buffer]
74 }
75 DataType::UInt8
76 | DataType::UInt16
77 | DataType::UInt32
78 | DataType::UInt64
79 | DataType::Int8
80 | DataType::Int16
81 | DataType::Int32
82 | DataType::Int64
83 | DataType::Float16
84 | DataType::Float32
85 | DataType::Float64
86 | DataType::Decimal32(_, _)
87 | DataType::Decimal64(_, _)
88 | DataType::Decimal128(_, _)
89 | DataType::Decimal256(_, _)
90 | DataType::Date32
91 | DataType::Time32(_)
92 | DataType::Date64
93 | DataType::Time64(_)
94 | DataType::Duration(_)
95 | DataType::Timestamp(_, _)
96 | DataType::Interval(_) => [
97 MutableBuffer::new(capacity * data_type.primitive_width().unwrap()),
98 empty_buffer,
99 ],
100 DataType::Utf8 | DataType::Binary => {
101 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
102 buffer.push(0i32);
104 [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
105 }
106 DataType::LargeUtf8 | DataType::LargeBinary => {
107 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
108 buffer.push(0i64);
110 [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
111 }
112 DataType::BinaryView | DataType::Utf8View => [
113 MutableBuffer::new(capacity * mem::size_of::<u128>()),
114 empty_buffer,
115 ],
116 DataType::List(_) | DataType::Map(_, _) => {
117 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
119 buffer.push(0i32);
120 [buffer, empty_buffer]
121 }
122 DataType::ListView(_) => [
123 MutableBuffer::new(capacity * mem::size_of::<i32>()),
124 MutableBuffer::new(capacity * mem::size_of::<i32>()),
125 ],
126 DataType::LargeList(_) => {
127 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
129 buffer.push(0i64);
130 [buffer, empty_buffer]
131 }
132 DataType::LargeListView(_) => [
133 MutableBuffer::new(capacity * mem::size_of::<i64>()),
134 MutableBuffer::new(capacity * mem::size_of::<i64>()),
135 ],
136 DataType::FixedSizeBinary(size) => {
137 [MutableBuffer::new(capacity * *size as usize), empty_buffer]
138 }
139 DataType::Dictionary(k, _) => [
140 MutableBuffer::new(capacity * k.primitive_width().unwrap()),
141 empty_buffer,
142 ],
143 DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
144 [empty_buffer, MutableBuffer::new(0)]
145 }
146 DataType::Union(_, mode) => {
147 let type_ids = MutableBuffer::new(capacity * mem::size_of::<i8>());
148 match mode {
149 UnionMode::Sparse => [type_ids, empty_buffer],
150 UnionMode::Dense => {
151 let offsets = MutableBuffer::new(capacity * mem::size_of::<i32>());
152 [type_ids, offsets]
153 }
154 }
155 }
156 }
157}
158
159#[derive(Debug, Clone)]
205pub struct ArrayData {
206 data_type: DataType,
208
209 len: usize,
211
212 offset: usize,
217
218 buffers: Vec<Buffer>,
231
232 child_data: Vec<ArrayData>,
242
243 nulls: Option<NullBuffer>,
251}
252
253pub type ArrayDataRef = Arc<ArrayData>;
255
256fn checked_len_plus_offset(
257 data_type: &DataType,
258 len: usize,
259 offset: usize,
260) -> Result<usize, ArrowError> {
261 len.checked_add(offset).ok_or_else(|| {
262 ArrowError::InvalidArgumentError(format!(
263 "Length {len} with offset {offset} overflows usize for {data_type}"
264 ))
265 })
266}
267
268impl ArrayData {
269 pub unsafe fn new_unchecked(
286 data_type: DataType,
287 len: usize,
288 null_count: Option<usize>,
289 null_bit_buffer: Option<Buffer>,
290 offset: usize,
291 buffers: Vec<Buffer>,
292 child_data: Vec<ArrayData>,
293 ) -> Self {
294 let mut skip_validation = UnsafeFlag::new();
295 unsafe { skip_validation.set(true) };
297
298 ArrayDataBuilder {
299 data_type,
300 len,
301 null_count,
302 null_bit_buffer,
303 nulls: None,
304 offset,
305 buffers,
306 child_data,
307 align_buffers: false,
308 skip_validation,
309 }
310 .build()
311 .unwrap()
312 }
313
314 pub fn try_new(
328 data_type: DataType,
329 len: usize,
330 null_bit_buffer: Option<Buffer>,
331 offset: usize,
332 buffers: Vec<Buffer>,
333 child_data: Vec<ArrayData>,
334 ) -> Result<Self, ArrowError> {
335 if let Some(null_bit_buffer) = null_bit_buffer.as_ref() {
339 let len_plus_offset = checked_len_plus_offset(&data_type, len, offset)?;
340 let needed_len = bit_util::ceil(len_plus_offset, 8);
341 if null_bit_buffer.len() < needed_len {
342 return Err(ArrowError::InvalidArgumentError(format!(
343 "null_bit_buffer size too small. got {} needed {}",
344 null_bit_buffer.len(),
345 needed_len
346 )));
347 }
348 }
349 let new_self = unsafe {
351 Self::new_unchecked(
352 data_type,
353 len,
354 None,
355 null_bit_buffer,
356 offset,
357 buffers,
358 child_data,
359 )
360 };
361
362 new_self.validate_data()?;
367 Ok(new_self)
368 }
369
370 pub fn into_parts(
376 self,
377 ) -> (
378 DataType,
379 usize,
380 Option<NullBuffer>,
381 usize,
382 Vec<Buffer>,
383 Vec<ArrayData>,
384 ) {
385 let Self {
386 data_type,
387 len,
388 nulls,
389 offset,
390 buffers,
391 child_data,
392 } = self;
393
394 (data_type, len, nulls, offset, buffers, child_data)
395 }
396
397 #[inline]
399 pub const fn builder(data_type: DataType) -> ArrayDataBuilder {
400 ArrayDataBuilder::new(data_type)
401 }
402
403 #[inline]
405 pub const fn data_type(&self) -> &DataType {
406 &self.data_type
407 }
408
409 pub fn buffers(&self) -> &[Buffer] {
411 &self.buffers
412 }
413
414 pub fn child_data(&self) -> &[ArrayData] {
417 &self.child_data[..]
418 }
419
420 #[inline]
422 pub fn is_null(&self, i: usize) -> bool {
423 match &self.nulls {
424 Some(v) => v.is_null(i),
425 None => false,
426 }
427 }
428
429 #[inline]
433 pub fn nulls(&self) -> Option<&NullBuffer> {
434 self.nulls.as_ref()
435 }
436
437 #[inline]
439 pub fn is_valid(&self, i: usize) -> bool {
440 !self.is_null(i)
441 }
442
443 #[inline]
445 pub const fn len(&self) -> usize {
446 self.len
447 }
448
449 #[inline]
451 pub const fn is_empty(&self) -> bool {
452 self.len == 0
453 }
454
455 #[inline]
457 pub const fn offset(&self) -> usize {
458 self.offset
459 }
460
461 #[inline]
463 pub fn null_count(&self) -> usize {
464 self.nulls
465 .as_ref()
466 .map(|x| x.null_count())
467 .unwrap_or_default()
468 }
469
470 pub fn get_buffer_memory_size(&self) -> usize {
482 let mut size = 0;
483 for buffer in &self.buffers {
484 size += buffer.capacity();
485 }
486 if let Some(bitmap) = &self.nulls {
487 size += bitmap.buffer().capacity()
488 }
489 for child in &self.child_data {
490 size += child.get_buffer_memory_size();
491 }
492 size
493 }
494
495 pub fn get_slice_memory_size(&self) -> Result<usize, ArrowError> {
508 let mut result: usize = 0;
509 let layout = layout(&self.data_type);
510
511 for spec in layout.buffers.iter() {
512 match spec {
513 BufferSpec::FixedWidth { byte_width, .. } => {
514 let buffer_size = self.len.checked_mul(*byte_width).ok_or_else(|| {
515 ArrowError::ComputeError(
516 "Integer overflow computing buffer size".to_string(),
517 )
518 })?;
519 result += buffer_size;
520 }
521 BufferSpec::VariableWidth => {
522 let buffer_len = match self.data_type {
523 DataType::Utf8 | DataType::Binary => {
524 let offsets = self.typed_offsets::<i32>()?;
525 (offsets[self.len] - offsets[0]) as usize
526 }
527 DataType::LargeUtf8 | DataType::LargeBinary => {
528 let offsets = self.typed_offsets::<i64>()?;
529 (offsets[self.len] - offsets[0]) as usize
530 }
531 _ => {
532 return Err(ArrowError::NotYetImplemented(format!(
533 "Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
534 self.data_type
535 )));
536 }
537 };
538 result += buffer_len;
539 }
540 BufferSpec::BitMap => {
541 let buffer_size = bit_util::ceil(self.len, 8);
542 result += buffer_size;
543 }
544 BufferSpec::AlwaysNull => {
545 }
547 }
548 }
549
550 if self.nulls().is_some() {
551 result += bit_util::ceil(self.len, 8);
552 }
553
554 for child in &self.child_data {
555 result += child.get_slice_memory_size()?;
556 }
557 Ok(result)
558 }
559
560 pub fn get_array_memory_size(&self) -> usize {
569 let mut size = mem::size_of_val(self);
570
571 for buffer in &self.buffers {
573 size += mem::size_of::<Buffer>();
574 size += buffer.capacity();
575 }
576 if let Some(nulls) = &self.nulls {
577 size += nulls.buffer().capacity();
578 }
579 for child in &self.child_data {
580 size += child.get_array_memory_size();
581 }
582
583 size
584 }
585
586 pub fn slice(&self, offset: usize, length: usize) -> ArrayData {
594 assert!((offset + length) <= self.len());
595
596 if let DataType::Struct(_) = self.data_type() {
597 let new_offset = self.offset + offset;
599 ArrayData {
600 data_type: self.data_type().clone(),
601 len: length,
602 offset: new_offset,
603 buffers: self.buffers.clone(),
604 child_data: self
606 .child_data()
607 .iter()
608 .map(|data| data.slice(offset, length))
609 .collect(),
610 nulls: self.nulls.as_ref().map(|x| x.slice(offset, length)),
611 }
612 } else {
613 let mut new_data = self.clone();
614
615 new_data.len = length;
616 new_data.offset = offset + self.offset;
617 new_data.nulls = self.nulls.as_ref().map(|x| x.slice(offset, length));
618
619 new_data
620 }
621 }
622
623 pub fn buffer<T: ArrowNativeType>(&self, buffer: usize) -> &[T] {
630 &self.buffers()[buffer].typed_data()[self.offset..]
631 }
632
633 pub fn new_null(data_type: &DataType, len: usize) -> Self {
635 let bit_len = bit_util::ceil(len, 8);
636 let zeroed = |len: usize| Buffer::from(MutableBuffer::from_len_zeroed(len));
637
638 let (buffers, child_data, has_nulls) = match data_type.primitive_width() {
639 Some(width) => (vec![zeroed(width * len)], vec![], true),
640 None => match data_type {
641 DataType::Null => (vec![], vec![], false),
642 DataType::Boolean => (vec![zeroed(bit_len)], vec![], true),
643 DataType::Binary | DataType::Utf8 => {
644 (vec![zeroed((len + 1) * 4), zeroed(0)], vec![], true)
645 }
646 DataType::BinaryView | DataType::Utf8View => (vec![zeroed(len * 16)], vec![], true),
647 DataType::LargeBinary | DataType::LargeUtf8 => {
648 (vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true)
649 }
650 DataType::FixedSizeBinary(i) => (vec![zeroed(*i as usize * len)], vec![], true),
651 DataType::List(f) | DataType::Map(f, _) => (
652 vec![zeroed((len + 1) * 4)],
653 vec![ArrayData::new_empty(f.data_type())],
654 true,
655 ),
656 DataType::LargeList(f) => (
657 vec![zeroed((len + 1) * 8)],
658 vec![ArrayData::new_empty(f.data_type())],
659 true,
660 ),
661 DataType::ListView(f) => (
662 vec![zeroed(len * 4), zeroed(len * 4)],
663 vec![ArrayData::new_empty(f.data_type())],
664 true,
665 ),
666 DataType::LargeListView(f) => (
667 vec![zeroed(len * 8), zeroed(len * 8)],
668 vec![ArrayData::new_empty(f.data_type())],
669 true,
670 ),
671 DataType::FixedSizeList(f, list_len) => (
672 vec![],
673 vec![ArrayData::new_null(f.data_type(), *list_len as usize * len)],
674 true,
675 ),
676 DataType::Struct(fields) => (
677 vec![],
678 fields
679 .iter()
680 .map(|f| Self::new_null(f.data_type(), len))
681 .collect(),
682 true,
683 ),
684 DataType::Dictionary(k, v) => (
685 vec![zeroed(k.primitive_width().unwrap() * len)],
686 vec![ArrayData::new_empty(v.as_ref())],
687 true,
688 ),
689 DataType::Union(f, mode) => {
690 let (id, _) = f.iter().next().unwrap();
691 let ids = Buffer::from_iter(std::iter::repeat_n(id, len));
692 let buffers = match mode {
693 UnionMode::Sparse => vec![ids],
694 UnionMode::Dense => {
695 let end_offset = i32::from_usize(len).unwrap();
696 vec![ids, Buffer::from_iter(0_i32..end_offset)]
697 }
698 };
699
700 let children = f
701 .iter()
702 .enumerate()
703 .map(|(idx, (_, f))| {
704 if idx == 0 || *mode == UnionMode::Sparse {
705 Self::new_null(f.data_type(), len)
706 } else {
707 Self::new_empty(f.data_type())
708 }
709 })
710 .collect();
711
712 (buffers, children, false)
713 }
714 DataType::RunEndEncoded(r, v) => {
715 if len == 0 {
716 let runs = ArrayData::new_empty(r.data_type());
718 let values = ArrayData::new_empty(v.data_type());
719 (vec![], vec![runs, values], false)
720 } else {
721 let runs = match r.data_type() {
722 DataType::Int16 => {
723 let i = i16::from_usize(len).expect("run overflow");
724 Buffer::from_slice_ref([i])
725 }
726 DataType::Int32 => {
727 let i = i32::from_usize(len).expect("run overflow");
728 Buffer::from_slice_ref([i])
729 }
730 DataType::Int64 => {
731 let i = i64::from_usize(len).expect("run overflow");
732 Buffer::from_slice_ref([i])
733 }
734 dt => unreachable!("Invalid run ends data type {dt}"),
735 };
736
737 let builder = ArrayData::builder(r.data_type().clone())
738 .len(1)
739 .buffers(vec![runs]);
740
741 let runs = unsafe { builder.build_unchecked() };
744 (
745 vec![],
746 vec![runs, ArrayData::new_null(v.data_type(), 1)],
747 false,
748 )
749 }
750 }
751 DataType::Int8
753 | DataType::Int16
754 | DataType::Int32
755 | DataType::Int64
756 | DataType::UInt8
757 | DataType::UInt16
758 | DataType::UInt32
759 | DataType::UInt64
760 | DataType::Float16
761 | DataType::Float32
762 | DataType::Float64
763 | DataType::Timestamp(_, _)
764 | DataType::Date32
765 | DataType::Date64
766 | DataType::Time32(_)
767 | DataType::Time64(_)
768 | DataType::Duration(_)
769 | DataType::Interval(_)
770 | DataType::Decimal32(_, _)
771 | DataType::Decimal64(_, _)
772 | DataType::Decimal128(_, _)
773 | DataType::Decimal256(_, _) => unreachable!("{data_type}"),
774 },
775 };
776
777 let mut builder = ArrayDataBuilder::new(data_type.clone())
778 .len(len)
779 .buffers(buffers)
780 .child_data(child_data);
781
782 if has_nulls {
783 builder = builder.nulls(Some(NullBuffer::new_null(len)))
784 }
785
786 unsafe { builder.build_unchecked() }
789 }
790
791 pub fn new_empty(data_type: &DataType) -> Self {
793 Self::new_null(data_type, 0)
794 }
795
796 pub fn align_buffers(&mut self) {
805 let layout = layout(&self.data_type);
806 for (buffer, spec) in self.buffers.iter_mut().zip(&layout.buffers) {
807 if let BufferSpec::FixedWidth { alignment, .. } = spec {
808 if buffer.as_ptr().align_offset(*alignment) != 0 {
809 *buffer = Buffer::from_slice_ref(buffer.as_ref());
810 }
811 }
812 }
813 for data in self.child_data.iter_mut() {
815 data.align_buffers()
816 }
817 }
818
819 pub fn validate(&self) -> Result<(), ArrowError> {
830 let len_plus_offset = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
832
833 let layout = layout(&self.data_type);
835
836 if !layout.can_contain_null_mask && self.nulls.is_some() {
837 return Err(ArrowError::InvalidArgumentError(format!(
838 "Arrays of type {:?} cannot contain a null bitmask",
839 self.data_type,
840 )));
841 }
842
843 if self.buffers.len() < layout.buffers.len()
845 || (!layout.variadic && self.buffers.len() != layout.buffers.len())
846 {
847 return Err(ArrowError::InvalidArgumentError(format!(
848 "Expected {} buffers in array of type {:?}, got {}",
849 layout.buffers.len(),
850 self.data_type,
851 self.buffers.len(),
852 )));
853 }
854
855 for (i, (buffer, spec)) in self.buffers.iter().zip(layout.buffers.iter()).enumerate() {
856 match spec {
857 BufferSpec::FixedWidth {
858 byte_width,
859 alignment,
860 } => {
861 let min_buffer_size = len_plus_offset.saturating_mul(*byte_width);
862
863 if buffer.len() < min_buffer_size {
864 return Err(ArrowError::InvalidArgumentError(format!(
865 "Need at least {} bytes in buffers[{}] in array of type {:?}, but got {}",
866 min_buffer_size,
867 i,
868 self.data_type,
869 buffer.len()
870 )));
871 }
872
873 let align_offset = buffer.as_ptr().align_offset(*alignment);
874 if align_offset != 0 {
875 return Err(ArrowError::InvalidArgumentError(format!(
876 "Misaligned buffers[{i}] in array of type {:?}, offset from expected alignment of {alignment} by {}",
877 self.data_type,
878 align_offset.min(alignment - align_offset)
879 )));
880 }
881 }
882 BufferSpec::VariableWidth => {
883 }
887 BufferSpec::BitMap => {
888 let min_buffer_size = bit_util::ceil(len_plus_offset, 8);
889 if buffer.len() < min_buffer_size {
890 return Err(ArrowError::InvalidArgumentError(format!(
891 "Need at least {} bytes for bitmap in buffers[{}] in array of type {:?}, but got {}",
892 min_buffer_size,
893 i,
894 self.data_type,
895 buffer.len()
896 )));
897 }
898 }
899 BufferSpec::AlwaysNull => {
900 }
902 }
903 }
904
905 if let Some(nulls) = self.nulls() {
907 if nulls.null_count() > self.len {
908 return Err(ArrowError::InvalidArgumentError(format!(
909 "null_count {} for an array exceeds length of {} elements",
910 nulls.null_count(),
911 self.len
912 )));
913 }
914
915 let actual_len = nulls.validity().len();
916 let needed_len = bit_util::ceil(len_plus_offset, 8);
917 if actual_len < needed_len {
918 return Err(ArrowError::InvalidArgumentError(format!(
919 "null_bit_buffer size too small. got {actual_len} needed {needed_len}",
920 )));
921 }
922
923 if nulls.len() != self.len {
924 return Err(ArrowError::InvalidArgumentError(format!(
925 "null buffer incorrect size. got {} expected {}",
926 nulls.len(),
927 self.len
928 )));
929 }
930 }
931
932 self.validate_child_data()?;
933
934 match &self.data_type {
936 DataType::Utf8 | DataType::Binary => {
937 self.validate_offsets::<i32>(self.buffers[1].len())?;
938 }
939 DataType::LargeUtf8 | DataType::LargeBinary => {
940 self.validate_offsets::<i64>(self.buffers[1].len())?;
941 }
942 DataType::Dictionary(key_type, _value_type) => {
943 if !DataType::is_dictionary_key_type(key_type) {
945 return Err(ArrowError::InvalidArgumentError(format!(
946 "Dictionary key type must be integer, but was {key_type}"
947 )));
948 }
949 }
950 DataType::RunEndEncoded(run_ends_type, _) => {
951 if run_ends_type.is_nullable() {
952 return Err(ArrowError::InvalidArgumentError(
953 "The nullable should be set to false for the field defining run_ends array.".to_string()
954 ));
955 }
956 if !DataType::is_run_ends_type(run_ends_type.data_type()) {
957 return Err(ArrowError::InvalidArgumentError(format!(
958 "RunArray run_ends types must be Int16, Int32 or Int64, but was {}",
959 run_ends_type.data_type()
960 )));
961 }
962 }
963 _ => {}
964 };
965
966 Ok(())
967 }
968
969 fn typed_offsets<T: ArrowNativeType + num_traits::Num>(&self) -> Result<&[T], ArrowError> {
976 if self.len == 0 && self.buffers[0].is_empty() {
978 return Ok(&[]);
979 }
980
981 let len = checked_len_plus_offset(&self.data_type, self.len, 1)?;
982
983 self.typed_buffer(0, len)
984 }
985
986 fn typed_buffer<T: ArrowNativeType + num_traits::Num>(
988 &self,
989 idx: usize,
990 len: usize,
991 ) -> Result<&[T], ArrowError> {
992 let buffer = &self.buffers[idx];
993
994 let required_elements = checked_len_plus_offset(&self.data_type, len, self.offset)?;
995 let byte_width = mem::size_of::<T>();
996 let required_len = required_elements.checked_mul(byte_width).ok_or_else(|| {
997 ArrowError::InvalidArgumentError(format!(
998 "Buffer {idx} of {} byte length overflow: {} elements of {} bytes exceeds usize",
999 self.data_type, required_elements, byte_width
1000 ))
1001 })?;
1002
1003 if buffer.len() < required_len {
1004 return Err(ArrowError::InvalidArgumentError(format!(
1005 "Buffer {} of {} isn't large enough. Expected {} bytes got {}",
1006 idx,
1007 self.data_type,
1008 required_len,
1009 buffer.len()
1010 )));
1011 }
1012
1013 Ok(&buffer.typed_data::<T>()[self.offset..required_elements])
1014 }
1015
1016 fn validate_offsets<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
1019 &self,
1020 values_length: usize,
1021 ) -> Result<(), ArrowError> {
1022 let offsets = self.typed_offsets::<T>()?;
1024 if offsets.is_empty() {
1025 return Ok(());
1026 }
1027
1028 let first_offset = offsets[0].to_usize().ok_or_else(|| {
1029 ArrowError::InvalidArgumentError(format!(
1030 "Error converting offset[0] ({}) to usize for {}",
1031 offsets[0], self.data_type
1032 ))
1033 })?;
1034
1035 let last_offset = offsets[self.len].to_usize().ok_or_else(|| {
1036 ArrowError::InvalidArgumentError(format!(
1037 "Error converting offset[{}] ({}) to usize for {}",
1038 self.len, offsets[self.len], self.data_type
1039 ))
1040 })?;
1041
1042 if first_offset > values_length {
1043 return Err(ArrowError::InvalidArgumentError(format!(
1044 "First offset {} of {} is larger than values length {}",
1045 first_offset, self.data_type, values_length,
1046 )));
1047 }
1048
1049 if last_offset > values_length {
1050 return Err(ArrowError::InvalidArgumentError(format!(
1051 "Last offset {} of {} is larger than values length {}",
1052 last_offset, self.data_type, values_length,
1053 )));
1054 }
1055
1056 if first_offset > last_offset {
1057 return Err(ArrowError::InvalidArgumentError(format!(
1058 "First offset {} in {} is smaller than last offset {}",
1059 first_offset, self.data_type, last_offset,
1060 )));
1061 }
1062
1063 Ok(())
1064 }
1065
1066 fn validate_offsets_and_sizes<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
1069 &self,
1070 values_length: usize,
1071 ) -> Result<(), ArrowError> {
1072 let offsets: &[T] = self.typed_buffer(0, self.len)?;
1073 let sizes: &[T] = self.typed_buffer(1, self.len)?;
1074 if offsets.len() != sizes.len() {
1075 return Err(ArrowError::ComputeError(format!(
1076 "ListView offsets len {} does not match sizes len {}",
1077 offsets.len(),
1078 sizes.len()
1079 )));
1080 }
1081
1082 for i in 0..sizes.len() {
1083 let size = sizes[i].to_usize().ok_or_else(|| {
1084 ArrowError::InvalidArgumentError(format!(
1085 "Error converting size[{}] ({}) to usize for {}",
1086 i, sizes[i], self.data_type
1087 ))
1088 })?;
1089 let offset = offsets[i].to_usize().ok_or_else(|| {
1090 ArrowError::InvalidArgumentError(format!(
1091 "Error converting offset[{}] ({}) to usize for {}",
1092 i, offsets[i], self.data_type
1093 ))
1094 })?;
1095 if size
1096 .checked_add(offset)
1097 .expect("Offset and size have exceeded the usize boundary")
1098 > values_length
1099 {
1100 return Err(ArrowError::InvalidArgumentError(format!(
1101 "Size {} at index {} is larger than the remaining values for {}",
1102 size, i, self.data_type
1103 )));
1104 }
1105 }
1106 Ok(())
1107 }
1108
1109 fn validate_child_data(&self) -> Result<(), ArrowError> {
1111 match &self.data_type {
1112 DataType::List(field) | DataType::Map(field, _) => {
1113 let values_data = self.get_single_valid_child_data(field.data_type())?;
1114 self.validate_offsets::<i32>(values_data.len)?;
1115 Ok(())
1116 }
1117 DataType::LargeList(field) => {
1118 let values_data = self.get_single_valid_child_data(field.data_type())?;
1119 self.validate_offsets::<i64>(values_data.len)?;
1120 Ok(())
1121 }
1122 DataType::ListView(field) => {
1123 let values_data = self.get_single_valid_child_data(field.data_type())?;
1124 self.validate_offsets_and_sizes::<i32>(values_data.len)?;
1125 Ok(())
1126 }
1127 DataType::LargeListView(field) => {
1128 let values_data = self.get_single_valid_child_data(field.data_type())?;
1129 self.validate_offsets_and_sizes::<i64>(values_data.len)?;
1130 Ok(())
1131 }
1132 DataType::FixedSizeList(field, list_size) => {
1133 let values_data = self.get_single_valid_child_data(field.data_type())?;
1134
1135 let list_size: usize = (*list_size).try_into().map_err(|_| {
1136 ArrowError::InvalidArgumentError(format!(
1137 "{} has a negative list_size {}",
1138 self.data_type, list_size
1139 ))
1140 })?;
1141
1142 let expected_values_len = self.len
1143 .checked_mul(list_size)
1144 .expect("integer overflow computing expected number of expected values in FixedListSize");
1145
1146 if values_data.len < expected_values_len {
1147 return Err(ArrowError::InvalidArgumentError(format!(
1148 "Values length {} is less than the length ({}) multiplied by the value size ({}) for {}",
1149 values_data.len, self.len, list_size, self.data_type
1150 )));
1151 }
1152
1153 Ok(())
1154 }
1155 DataType::Struct(fields) => {
1156 self.validate_num_child_data(fields.len())?;
1157 for (i, field) in fields.iter().enumerate() {
1158 let field_data = self.get_valid_child_data(i, field.data_type())?;
1159
1160 if field_data.len < self.len {
1162 return Err(ArrowError::InvalidArgumentError(format!(
1163 "{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
1164 self.data_type,
1165 i,
1166 field.name(),
1167 field_data.len,
1168 self.len
1169 )));
1170 }
1171 }
1172 Ok(())
1173 }
1174 DataType::RunEndEncoded(run_ends_field, values_field) => {
1175 self.validate_num_child_data(2)?;
1176 let run_ends_data = self.get_valid_child_data(0, run_ends_field.data_type())?;
1177 let values_data = self.get_valid_child_data(1, values_field.data_type())?;
1178 if run_ends_data.len != values_data.len {
1179 return Err(ArrowError::InvalidArgumentError(format!(
1180 "The run_ends array length should be the same as values array length. Run_ends array length is {}, values array length is {}",
1181 run_ends_data.len, values_data.len
1182 )));
1183 }
1184 if run_ends_data.nulls.is_some() {
1185 return Err(ArrowError::InvalidArgumentError(
1186 "Found null values in run_ends array. The run_ends array should not have null values.".to_string(),
1187 ));
1188 }
1189 Ok(())
1190 }
1191 DataType::Union(fields, mode) => {
1192 self.validate_num_child_data(fields.len())?;
1193
1194 for (i, (_, field)) in fields.iter().enumerate() {
1195 let field_data = self.get_valid_child_data(i, field.data_type())?;
1196
1197 if mode == &UnionMode::Sparse {
1198 let len_plus_offset =
1199 checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1200 if field_data.len < len_plus_offset {
1201 return Err(ArrowError::InvalidArgumentError(format!(
1202 "Sparse union child array #{} has length smaller than expected for union array ({} < {})",
1203 i, field_data.len, len_plus_offset
1204 )));
1205 }
1206 }
1207 }
1208 Ok(())
1209 }
1210 DataType::Dictionary(_key_type, value_type) => {
1211 self.get_single_valid_child_data(value_type)?;
1212 Ok(())
1213 }
1214 _ => {
1215 if !self.child_data.is_empty() {
1217 return Err(ArrowError::InvalidArgumentError(format!(
1218 "Expected no child arrays for type {} but got {}",
1219 self.data_type,
1220 self.child_data.len()
1221 )));
1222 }
1223 Ok(())
1224 }
1225 }
1226 }
1227
1228 fn get_single_valid_child_data(
1232 &self,
1233 expected_type: &DataType,
1234 ) -> Result<&ArrayData, ArrowError> {
1235 self.validate_num_child_data(1)?;
1236 self.get_valid_child_data(0, expected_type)
1237 }
1238
1239 fn validate_num_child_data(&self, expected_len: usize) -> Result<(), ArrowError> {
1241 if self.child_data.len() != expected_len {
1242 Err(ArrowError::InvalidArgumentError(format!(
1243 "Value data for {} should contain {} child data array(s), had {}",
1244 self.data_type,
1245 expected_len,
1246 self.child_data.len()
1247 )))
1248 } else {
1249 Ok(())
1250 }
1251 }
1252
1253 fn get_valid_child_data(
1256 &self,
1257 i: usize,
1258 expected_type: &DataType,
1259 ) -> Result<&ArrayData, ArrowError> {
1260 let values_data = self.child_data.get(i).ok_or_else(|| {
1261 ArrowError::InvalidArgumentError(format!(
1262 "{} did not have enough child arrays. Expected at least {} but had only {}",
1263 self.data_type,
1264 i + 1,
1265 self.child_data.len()
1266 ))
1267 })?;
1268
1269 if expected_type != &values_data.data_type {
1270 return Err(ArrowError::InvalidArgumentError(format!(
1271 "Child type mismatch for {}. Expected {} but child data had {}",
1272 self.data_type, expected_type, values_data.data_type
1273 )));
1274 }
1275
1276 values_data.validate()?;
1277 Ok(values_data)
1278 }
1279
1280 pub fn validate_data(&self) -> Result<(), ArrowError> {
1296 self.validate()?;
1297
1298 self.validate_nulls()?;
1299 self.validate_values()?;
1300 Ok(())
1301 }
1302
1303 pub fn validate_full(&self) -> Result<(), ArrowError> {
1308 self.validate_data()?;
1309 self.child_data
1311 .iter()
1312 .enumerate()
1313 .try_for_each(|(i, child_data)| {
1314 child_data.validate_full().map_err(|e| {
1315 ArrowError::InvalidArgumentError(format!(
1316 "{} child #{} invalid: {}",
1317 self.data_type, i, e
1318 ))
1319 })
1320 })?;
1321 Ok(())
1322 }
1323
1324 pub fn validate_nulls(&self) -> Result<(), ArrowError> {
1334 if let Some(nulls) = &self.nulls {
1335 let actual = nulls.len() - nulls.inner().count_set_bits();
1336 if actual != nulls.null_count() {
1337 return Err(ArrowError::InvalidArgumentError(format!(
1338 "null_count value ({}) doesn't match actual number of nulls in array ({})",
1339 nulls.null_count(),
1340 actual
1341 )));
1342 }
1343 }
1344
1345 match &self.data_type {
1350 DataType::List(f) | DataType::LargeList(f) | DataType::Map(f, _) => {
1351 if !f.is_nullable() {
1352 self.validate_non_nullable(None, &self.child_data[0])?
1353 }
1354 }
1355 DataType::FixedSizeList(field, len) => {
1356 let child = &self.child_data[0];
1357 if !field.is_nullable() {
1358 match &self.nulls {
1359 Some(nulls) => {
1360 let element_len = *len as usize;
1361 let expanded = nulls.expand(element_len);
1362 self.validate_non_nullable(Some(&expanded), child)?;
1363 }
1364 None => self.validate_non_nullable(None, child)?,
1365 }
1366 }
1367 }
1368 DataType::Struct(fields) => {
1369 for (field, child) in fields.iter().zip(&self.child_data) {
1370 if !field.is_nullable() {
1371 self.validate_non_nullable(self.nulls(), child)?
1372 }
1373 }
1374 }
1375 _ => {}
1376 }
1377
1378 Ok(())
1379 }
1380
1381 fn validate_non_nullable(
1383 &self,
1384 mask: Option<&NullBuffer>,
1385 child: &ArrayData,
1386 ) -> Result<(), ArrowError> {
1387 let mask = match mask {
1388 Some(mask) => mask,
1389 None => {
1390 return match child.null_count() {
1391 0 => Ok(()),
1392 _ => Err(ArrowError::InvalidArgumentError(format!(
1393 "non-nullable child of type {} contains nulls not present in parent {}",
1394 child.data_type, self.data_type
1395 ))),
1396 };
1397 }
1398 };
1399
1400 match child.nulls() {
1401 Some(nulls) if !mask.contains(nulls) => Err(ArrowError::InvalidArgumentError(format!(
1402 "non-nullable child of type {} contains nulls not present in parent",
1403 child.data_type
1404 ))),
1405 _ => Ok(()),
1406 }
1407 }
1408
1409 pub fn validate_values(&self) -> Result<(), ArrowError> {
1415 match &self.data_type {
1416 DataType::Utf8 => self.validate_utf8::<i32>(),
1417 DataType::LargeUtf8 => self.validate_utf8::<i64>(),
1418 DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
1419 DataType::LargeBinary => self.validate_offsets_full::<i64>(self.buffers[1].len()),
1420 DataType::BinaryView => {
1421 let views = self.typed_buffer::<u128>(0, self.len)?;
1422 validate_binary_view(views, &self.buffers[1..])
1423 }
1424 DataType::Utf8View => {
1425 let views = self.typed_buffer::<u128>(0, self.len)?;
1426 validate_string_view(views, &self.buffers[1..])
1427 }
1428 DataType::List(_) | DataType::Map(_, _) => {
1429 let child = &self.child_data[0];
1430 self.validate_offsets_full::<i32>(child.len)
1431 }
1432 DataType::LargeList(_) => {
1433 let child = &self.child_data[0];
1434 self.validate_offsets_full::<i64>(child.len)
1435 }
1436 DataType::Union(_, _) => {
1437 Ok(())
1443 }
1444 DataType::Dictionary(key_type, _value_type) => {
1445 let dictionary_length: i64 = self.child_data[0].len.try_into().unwrap();
1446 let max_value = dictionary_length - 1;
1447 match key_type.as_ref() {
1448 DataType::UInt8 => self.check_bounds::<u8>(max_value),
1449 DataType::UInt16 => self.check_bounds::<u16>(max_value),
1450 DataType::UInt32 => self.check_bounds::<u32>(max_value),
1451 DataType::UInt64 => self.check_bounds::<u64>(max_value),
1452 DataType::Int8 => self.check_bounds::<i8>(max_value),
1453 DataType::Int16 => self.check_bounds::<i16>(max_value),
1454 DataType::Int32 => self.check_bounds::<i32>(max_value),
1455 DataType::Int64 => self.check_bounds::<i64>(max_value),
1456 _ => unreachable!(),
1457 }
1458 }
1459 DataType::RunEndEncoded(run_ends, _values) => {
1460 let run_ends_data = self.child_data()[0].clone();
1461 match run_ends.data_type() {
1462 DataType::Int16 => run_ends_data.check_run_ends::<i16>(),
1463 DataType::Int32 => run_ends_data.check_run_ends::<i32>(),
1464 DataType::Int64 => run_ends_data.check_run_ends::<i64>(),
1465 _ => unreachable!(),
1466 }
1467 }
1468 _ => {
1469 Ok(())
1471 }
1472 }
1473 }
1474
1475 fn validate_each_offset<T, V>(&self, offset_limit: usize, validate: V) -> Result<(), ArrowError>
1486 where
1487 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1488 V: Fn(usize, Range<usize>) -> Result<(), ArrowError>,
1489 {
1490 self.typed_offsets::<T>()?
1491 .iter()
1492 .enumerate()
1493 .map(|(i, x)| {
1494 let r = x.to_usize().ok_or_else(|| {
1496 ArrowError::InvalidArgumentError(format!(
1497 "Offset invariant failure: Could not convert offset {x} to usize at position {i}"))}
1498 );
1499 match r {
1501 Ok(n) if n <= offset_limit => Ok((i, n)),
1502 Ok(_) => Err(ArrowError::InvalidArgumentError(format!(
1503 "Offset invariant failure: offset at position {i} out of bounds: {x} > {offset_limit}"))
1504 ),
1505 Err(e) => Err(e),
1506 }
1507 })
1508 .scan(0_usize, |start, end| {
1509 match end {
1511 Ok((i, end)) if *start <= end => {
1512 let range = Some(Ok((i, *start..end)));
1513 *start = end;
1514 range
1515 }
1516 Ok((i, end)) => Some(Err(ArrowError::InvalidArgumentError(format!(
1517 "Offset invariant failure: non-monotonic offset at slot {}: {} > {}",
1518 i - 1, start, end))
1519 )),
1520 Err(err) => Some(Err(err)),
1521 }
1522 })
1523 .skip(1) .try_for_each(|res: Result<(usize, Range<usize>), ArrowError>| {
1525 let (item_index, range) = res?;
1526 validate(item_index-1, range)
1527 })
1528 }
1529
1530 fn validate_utf8<T>(&self) -> Result<(), ArrowError>
1533 where
1534 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1535 {
1536 let values_buffer = &self.buffers[1].as_slice();
1537 if let Ok(values_str) = std::str::from_utf8(values_buffer) {
1538 self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
1540 if !values_str.is_char_boundary(range.start)
1541 || !values_str.is_char_boundary(range.end)
1542 {
1543 return Err(ArrowError::InvalidArgumentError(format!(
1544 "incomplete utf-8 byte sequence from index {string_index}"
1545 )));
1546 }
1547 Ok(())
1548 })
1549 } else {
1550 self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
1552 std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
1553 ArrowError::InvalidArgumentError(format!(
1554 "Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
1555 ))
1556 })?;
1557 Ok(())
1558 })
1559 }
1560 }
1561
1562 fn validate_offsets_full<T>(&self, offset_limit: usize) -> Result<(), ArrowError>
1565 where
1566 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1567 {
1568 self.validate_each_offset::<T, _>(offset_limit, |_string_index, _range| {
1569 Ok(())
1572 })
1573 }
1574
1575 fn check_bounds<T>(&self, max_value: i64) -> Result<(), ArrowError>
1578 where
1579 T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
1580 {
1581 let required_len = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1582 let buffer = &self.buffers[0];
1583
1584 assert!(buffer.len() / mem::size_of::<T>() >= required_len);
1587
1588 let indexes: &[T] = &buffer.typed_data::<T>()[self.offset..required_len];
1590
1591 indexes.iter().enumerate().try_for_each(|(i, &dict_index)| {
1592 if self.is_null(i) {
1594 return Ok(());
1595 }
1596 let dict_index: i64 = dict_index.try_into().map_err(|_| {
1597 ArrowError::InvalidArgumentError(format!(
1598 "Value at position {i} out of bounds: {dict_index} (can not convert to i64)"
1599 ))
1600 })?;
1601
1602 if dict_index < 0 || dict_index > max_value {
1603 return Err(ArrowError::InvalidArgumentError(format!(
1604 "Value at position {i} out of bounds: {dict_index} (should be in [0, {max_value}])"
1605 )));
1606 }
1607 Ok(())
1608 })
1609 }
1610
1611 fn check_run_ends<T>(&self) -> Result<(), ArrowError>
1613 where
1614 T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
1615 {
1616 let values = self.typed_buffer::<T>(0, self.len)?;
1617 let mut prev_value: i64 = 0_i64;
1618 values.iter().enumerate().try_for_each(|(ix, &inp_value)| {
1619 let value: i64 = inp_value.try_into().map_err(|_| {
1620 ArrowError::InvalidArgumentError(format!(
1621 "Value at position {ix} out of bounds: {inp_value} (can not convert to i64)"
1622 ))
1623 })?;
1624 if value <= 0_i64 {
1625 return Err(ArrowError::InvalidArgumentError(format!(
1626 "The values in run_ends array should be strictly positive. Found value {value} at index {ix} that does not match the criteria."
1627 )));
1628 }
1629 if ix > 0 && value <= prev_value {
1630 return Err(ArrowError::InvalidArgumentError(format!(
1631 "The values in run_ends array should be strictly increasing. Found value {value} at index {ix} with previous value {prev_value} that does not match the criteria."
1632 )));
1633 }
1634
1635 prev_value = value;
1636 Ok(())
1637 })?;
1638
1639 let len_plus_offset = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1640 if prev_value.as_usize() < len_plus_offset {
1641 return Err(ArrowError::InvalidArgumentError(format!(
1642 "The offset + length of array should be less or equal to last value in the run_ends array. The last value of run_ends array is {prev_value} and offset + length of array is {}.",
1643 len_plus_offset
1644 )));
1645 }
1646 Ok(())
1647 }
1648
1649 pub fn ptr_eq(&self, other: &Self) -> bool {
1653 if self.offset != other.offset
1654 || self.len != other.len
1655 || self.data_type != other.data_type
1656 || self.buffers.len() != other.buffers.len()
1657 || self.child_data.len() != other.child_data.len()
1658 {
1659 return false;
1660 }
1661
1662 match (&self.nulls, &other.nulls) {
1663 (Some(a), Some(b)) if !a.inner().ptr_eq(b.inner()) => return false,
1664 (Some(_), None) | (None, Some(_)) => return false,
1665 _ => {}
1666 };
1667
1668 if !self
1669 .buffers
1670 .iter()
1671 .zip(other.buffers.iter())
1672 .all(|(a, b)| a.as_ptr() == b.as_ptr())
1673 {
1674 return false;
1675 }
1676
1677 self.child_data
1678 .iter()
1679 .zip(other.child_data.iter())
1680 .all(|(a, b)| a.ptr_eq(b))
1681 }
1682
1683 pub fn into_builder(self) -> ArrayDataBuilder {
1685 self.into()
1686 }
1687
1688 #[cfg(feature = "pool")]
1695 pub fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
1696 for buffer in &self.buffers {
1698 buffer.claim(pool);
1699 }
1700
1701 if let Some(nulls) = &self.nulls {
1703 nulls.claim(pool);
1704 }
1705
1706 for child in &self.child_data {
1708 child.claim(pool);
1709 }
1710 }
1711}
1712
1713pub fn layout(data_type: &DataType) -> DataTypeLayout {
1716 use arrow_schema::IntervalUnit::*;
1719
1720 match data_type {
1721 DataType::Null => DataTypeLayout {
1722 buffers: vec![],
1723 can_contain_null_mask: false,
1724 variadic: false,
1725 },
1726 DataType::Boolean => DataTypeLayout {
1727 buffers: vec![BufferSpec::BitMap],
1728 can_contain_null_mask: true,
1729 variadic: false,
1730 },
1731 DataType::Int8 => DataTypeLayout::new_fixed_width::<i8>(),
1732 DataType::Int16 => DataTypeLayout::new_fixed_width::<i16>(),
1733 DataType::Int32 => DataTypeLayout::new_fixed_width::<i32>(),
1734 DataType::Int64 => DataTypeLayout::new_fixed_width::<i64>(),
1735 DataType::UInt8 => DataTypeLayout::new_fixed_width::<u8>(),
1736 DataType::UInt16 => DataTypeLayout::new_fixed_width::<u16>(),
1737 DataType::UInt32 => DataTypeLayout::new_fixed_width::<u32>(),
1738 DataType::UInt64 => DataTypeLayout::new_fixed_width::<u64>(),
1739 DataType::Float16 => DataTypeLayout::new_fixed_width::<half::f16>(),
1740 DataType::Float32 => DataTypeLayout::new_fixed_width::<f32>(),
1741 DataType::Float64 => DataTypeLayout::new_fixed_width::<f64>(),
1742 DataType::Timestamp(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
1743 DataType::Date32 => DataTypeLayout::new_fixed_width::<i32>(),
1744 DataType::Date64 => DataTypeLayout::new_fixed_width::<i64>(),
1745 DataType::Time32(_) => DataTypeLayout::new_fixed_width::<i32>(),
1746 DataType::Time64(_) => DataTypeLayout::new_fixed_width::<i64>(),
1747 DataType::Interval(YearMonth) => DataTypeLayout::new_fixed_width::<i32>(),
1748 DataType::Interval(DayTime) => DataTypeLayout::new_fixed_width::<IntervalDayTime>(),
1749 DataType::Interval(MonthDayNano) => {
1750 DataTypeLayout::new_fixed_width::<IntervalMonthDayNano>()
1751 }
1752 DataType::Duration(_) => DataTypeLayout::new_fixed_width::<i64>(),
1753 DataType::Decimal32(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
1754 DataType::Decimal64(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
1755 DataType::Decimal128(_, _) => DataTypeLayout::new_fixed_width::<i128>(),
1756 DataType::Decimal256(_, _) => DataTypeLayout::new_fixed_width::<i256>(),
1757 DataType::FixedSizeBinary(size) => {
1758 let spec = BufferSpec::FixedWidth {
1759 byte_width: (*size).try_into().unwrap(),
1760 alignment: mem::align_of::<u8>(),
1761 };
1762 DataTypeLayout {
1763 buffers: vec![spec],
1764 can_contain_null_mask: true,
1765 variadic: false,
1766 }
1767 }
1768 DataType::Binary => DataTypeLayout::new_binary::<i32>(),
1769 DataType::LargeBinary => DataTypeLayout::new_binary::<i64>(),
1770 DataType::Utf8 => DataTypeLayout::new_binary::<i32>(),
1771 DataType::LargeUtf8 => DataTypeLayout::new_binary::<i64>(),
1772 DataType::BinaryView | DataType::Utf8View => DataTypeLayout::new_view(),
1773 DataType::FixedSizeList(_, _) => DataTypeLayout::new_nullable_empty(), DataType::List(_) => DataTypeLayout::new_fixed_width::<i32>(),
1775 DataType::ListView(_) => DataTypeLayout::new_list_view::<i32>(),
1776 DataType::LargeListView(_) => DataTypeLayout::new_list_view::<i64>(),
1777 DataType::LargeList(_) => DataTypeLayout::new_fixed_width::<i64>(),
1778 DataType::Map(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
1779 DataType::Struct(_) => DataTypeLayout::new_nullable_empty(), DataType::RunEndEncoded(_, _) => DataTypeLayout::new_empty(), DataType::Union(_, mode) => {
1782 let type_ids = BufferSpec::FixedWidth {
1783 byte_width: mem::size_of::<i8>(),
1784 alignment: mem::align_of::<i8>(),
1785 };
1786
1787 DataTypeLayout {
1788 buffers: match mode {
1789 UnionMode::Sparse => {
1790 vec![type_ids]
1791 }
1792 UnionMode::Dense => {
1793 vec![
1794 type_ids,
1795 BufferSpec::FixedWidth {
1796 byte_width: mem::size_of::<i32>(),
1797 alignment: mem::align_of::<i32>(),
1798 },
1799 ]
1800 }
1801 },
1802 can_contain_null_mask: false,
1803 variadic: false,
1804 }
1805 }
1806 DataType::Dictionary(key_type, _value_type) => layout(key_type),
1807 }
1808}
1809
1810#[derive(Debug, PartialEq, Eq)]
1812pub struct DataTypeLayout {
1814 pub buffers: Vec<BufferSpec>,
1816
1817 pub can_contain_null_mask: bool,
1819
1820 pub variadic: bool,
1824}
1825
1826impl DataTypeLayout {
1827 pub fn new_fixed_width<T>() -> Self {
1829 Self {
1830 buffers: vec![BufferSpec::FixedWidth {
1831 byte_width: mem::size_of::<T>(),
1832 alignment: mem::align_of::<T>(),
1833 }],
1834 can_contain_null_mask: true,
1835 variadic: false,
1836 }
1837 }
1838
1839 pub fn new_nullable_empty() -> Self {
1842 Self {
1843 buffers: vec![],
1844 can_contain_null_mask: true,
1845 variadic: false,
1846 }
1847 }
1848
1849 pub fn new_empty() -> Self {
1852 Self {
1853 buffers: vec![],
1854 can_contain_null_mask: false,
1855 variadic: false,
1856 }
1857 }
1858
1859 pub fn new_binary<T>() -> Self {
1863 Self {
1864 buffers: vec![
1865 BufferSpec::FixedWidth {
1867 byte_width: mem::size_of::<T>(),
1868 alignment: mem::align_of::<T>(),
1869 },
1870 BufferSpec::VariableWidth,
1872 ],
1873 can_contain_null_mask: true,
1874 variadic: false,
1875 }
1876 }
1877
1878 pub fn new_view() -> Self {
1880 Self {
1881 buffers: vec![BufferSpec::FixedWidth {
1882 byte_width: mem::size_of::<u128>(),
1883 alignment: mem::align_of::<u128>(),
1884 }],
1885 can_contain_null_mask: true,
1886 variadic: true,
1887 }
1888 }
1889
1890 pub fn new_list_view<T>() -> Self {
1892 Self {
1893 buffers: vec![
1894 BufferSpec::FixedWidth {
1895 byte_width: mem::size_of::<T>(),
1896 alignment: mem::align_of::<T>(),
1897 },
1898 BufferSpec::FixedWidth {
1899 byte_width: mem::size_of::<T>(),
1900 alignment: mem::align_of::<T>(),
1901 },
1902 ],
1903 can_contain_null_mask: true,
1904 variadic: false,
1905 }
1906 }
1907}
1908
1909#[derive(Debug, PartialEq, Eq)]
1911pub enum BufferSpec {
1912 FixedWidth {
1923 byte_width: usize,
1925 alignment: usize,
1927 },
1928 VariableWidth,
1930 BitMap,
1936 #[allow(dead_code)]
1939 AlwaysNull,
1940}
1941
1942impl PartialEq for ArrayData {
1943 fn eq(&self, other: &Self) -> bool {
1944 equal::equal(self, other)
1945 }
1946}
1947
1948#[derive(Debug, Clone)]
1967#[doc(hidden)]
1968pub struct UnsafeFlag(bool);
1969
1970impl UnsafeFlag {
1971 #[inline]
1975 pub const fn new() -> Self {
1976 Self(false)
1977 }
1978
1979 #[inline]
1989 pub unsafe fn set(&mut self, val: bool) {
1990 self.0 = val;
1991 }
1992
1993 #[inline]
1995 pub fn get(&self) -> bool {
1996 self.0
1997 }
1998}
1999
2000impl Default for UnsafeFlag {
2002 fn default() -> Self {
2003 Self::new()
2004 }
2005}
2006
2007#[derive(Debug)]
2009pub struct ArrayDataBuilder {
2010 data_type: DataType,
2011 len: usize,
2012 null_count: Option<usize>,
2013 null_bit_buffer: Option<Buffer>,
2014 nulls: Option<NullBuffer>,
2015 offset: usize,
2016 buffers: Vec<Buffer>,
2017 child_data: Vec<ArrayData>,
2018 align_buffers: bool,
2022 skip_validation: UnsafeFlag,
2032}
2033
2034impl ArrayDataBuilder {
2035 #[inline]
2036 pub const fn new(data_type: DataType) -> Self {
2038 Self {
2039 data_type,
2040 len: 0,
2041 null_count: None,
2042 null_bit_buffer: None,
2043 nulls: None,
2044 offset: 0,
2045 buffers: vec![],
2046 child_data: vec![],
2047 align_buffers: false,
2048 skip_validation: UnsafeFlag::new(),
2049 }
2050 }
2051
2052 pub fn data_type(self, data_type: DataType) -> Self {
2054 Self { data_type, ..self }
2055 }
2056
2057 #[inline]
2058 #[allow(clippy::len_without_is_empty)]
2059 pub const fn len(mut self, n: usize) -> Self {
2061 self.len = n;
2062 self
2063 }
2064
2065 pub fn nulls(mut self, nulls: Option<NullBuffer>) -> Self {
2067 self.nulls = nulls;
2068 self.null_count = None;
2069 self.null_bit_buffer = None;
2070 self
2071 }
2072
2073 pub fn null_count(mut self, null_count: usize) -> Self {
2075 self.null_count = Some(null_count);
2076 self
2077 }
2078
2079 pub fn null_bit_buffer(mut self, buf: Option<Buffer>) -> Self {
2081 self.nulls = None;
2082 self.null_bit_buffer = buf;
2083 self
2084 }
2085
2086 #[inline]
2088 pub const fn offset(mut self, n: usize) -> Self {
2089 self.offset = n;
2090 self
2091 }
2092
2093 pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
2095 self.buffers = v;
2096 self
2097 }
2098
2099 pub fn add_buffer(mut self, b: Buffer) -> Self {
2101 self.buffers.push(b);
2102 self
2103 }
2104
2105 pub fn add_buffers<I: IntoIterator<Item = Buffer>>(mut self, bs: I) -> Self {
2107 self.buffers.extend(bs);
2108 self
2109 }
2110
2111 pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
2113 self.child_data = v;
2114 self
2115 }
2116
2117 pub fn add_child_data(mut self, r: ArrayData) -> Self {
2119 self.child_data.push(r);
2120 self
2121 }
2122
2123 pub unsafe fn build_unchecked(self) -> ArrayData {
2139 unsafe { self.skip_validation(true) }.build().unwrap()
2140 }
2141
2142 pub fn build(self) -> Result<ArrayData, ArrowError> {
2151 let Self {
2152 data_type,
2153 len,
2154 null_count,
2155 null_bit_buffer,
2156 nulls,
2157 offset,
2158 buffers,
2159 child_data,
2160 align_buffers,
2161 skip_validation,
2162 } = self;
2163
2164 let nulls = nulls
2165 .or_else(|| {
2166 let buffer = null_bit_buffer?;
2167 let buffer = BooleanBuffer::new(buffer, offset, len);
2168 Some(match null_count {
2169 Some(n) => {
2170 unsafe { NullBuffer::new_unchecked(buffer, n) }
2172 }
2173 None => NullBuffer::new(buffer),
2174 })
2175 })
2176 .filter(|b| b.null_count() != 0);
2177
2178 let mut data = ArrayData {
2179 data_type,
2180 len,
2181 offset,
2182 buffers,
2183 child_data,
2184 nulls,
2185 };
2186
2187 if align_buffers {
2188 data.align_buffers();
2189 }
2190
2191 if !skip_validation.get() || cfg!(feature = "force_validate") {
2193 data.validate_data()?;
2194 }
2195 Ok(data)
2196 }
2197
2198 #[deprecated(since = "54.1.0", note = "Use ArrayData::align_buffers instead")]
2200 pub fn build_aligned(self) -> Result<ArrayData, ArrowError> {
2201 self.align_buffers(true).build()
2202 }
2203
2204 pub fn align_buffers(mut self, align_buffers: bool) -> Self {
2220 self.align_buffers = align_buffers;
2221 self
2222 }
2223
2224 pub unsafe fn skip_validation(mut self, skip_validation: bool) -> Self {
2238 unsafe {
2239 self.skip_validation.set(skip_validation);
2240 }
2241 self
2242 }
2243}
2244
2245impl From<ArrayData> for ArrayDataBuilder {
2246 fn from(d: ArrayData) -> Self {
2247 Self {
2248 data_type: d.data_type,
2249 len: d.len,
2250 offset: d.offset,
2251 buffers: d.buffers,
2252 child_data: d.child_data,
2253 nulls: d.nulls,
2254 null_bit_buffer: None,
2255 null_count: None,
2256 align_buffers: false,
2257 skip_validation: UnsafeFlag::new(),
2258 }
2259 }
2260}
2261
2262#[cfg(test)]
2263mod tests {
2264 use super::*;
2265 use arrow_schema::{Field, Fields};
2266
2267 fn make_i32_buffer(n: usize) -> Buffer {
2271 Buffer::from_slice_ref(vec![42i32; n])
2272 }
2273
2274 fn make_f32_buffer(n: usize) -> Buffer {
2276 Buffer::from_slice_ref(vec![42f32; n])
2277 }
2278
2279 #[test]
2280 fn test_builder() {
2281 let v = (0..25).collect::<Vec<i32>>();
2283 let b1 = Buffer::from_slice_ref(&v);
2284 let arr_data = ArrayData::builder(DataType::Int32)
2285 .len(20)
2286 .offset(5)
2287 .add_buffer(b1)
2288 .null_bit_buffer(Some(Buffer::from([
2289 0b01011111, 0b10110101, 0b01100011, 0b00011110,
2290 ])))
2291 .build()
2292 .unwrap();
2293
2294 assert_eq!(20, arr_data.len());
2295 assert_eq!(10, arr_data.null_count());
2296 assert_eq!(5, arr_data.offset());
2297 assert_eq!(1, arr_data.buffers().len());
2298 assert_eq!(
2299 Buffer::from_slice_ref(&v).as_slice(),
2300 arr_data.buffers()[0].as_slice()
2301 );
2302 }
2303
2304 #[test]
2305 fn test_builder_with_child_data() {
2306 let child_arr_data = ArrayData::try_new(
2307 DataType::Int32,
2308 5,
2309 None,
2310 0,
2311 vec![Buffer::from_slice_ref([1i32, 2, 3, 4, 5])],
2312 vec![],
2313 )
2314 .unwrap();
2315
2316 let field = Arc::new(Field::new("x", DataType::Int32, true));
2317 let data_type = DataType::Struct(vec![field].into());
2318
2319 let arr_data = ArrayData::builder(data_type)
2320 .len(5)
2321 .offset(0)
2322 .add_child_data(child_arr_data.clone())
2323 .build()
2324 .unwrap();
2325
2326 assert_eq!(5, arr_data.len());
2327 assert_eq!(1, arr_data.child_data().len());
2328 assert_eq!(child_arr_data, arr_data.child_data()[0]);
2329 }
2330
2331 #[test]
2332 fn test_null_count() {
2333 let mut bit_v: [u8; 2] = [0; 2];
2334 bit_util::set_bit(&mut bit_v, 0);
2335 bit_util::set_bit(&mut bit_v, 3);
2336 bit_util::set_bit(&mut bit_v, 10);
2337 let arr_data = ArrayData::builder(DataType::Int32)
2338 .len(16)
2339 .add_buffer(make_i32_buffer(16))
2340 .null_bit_buffer(Some(Buffer::from(bit_v)))
2341 .build()
2342 .unwrap();
2343 assert_eq!(13, arr_data.null_count());
2344
2345 let mut bit_v: [u8; 2] = [0; 2];
2347 bit_util::set_bit(&mut bit_v, 0);
2348 bit_util::set_bit(&mut bit_v, 3);
2349 bit_util::set_bit(&mut bit_v, 10);
2350 let arr_data = ArrayData::builder(DataType::Int32)
2351 .len(12)
2352 .offset(2)
2353 .add_buffer(make_i32_buffer(14)) .null_bit_buffer(Some(Buffer::from(bit_v)))
2355 .build()
2356 .unwrap();
2357 assert_eq!(10, arr_data.null_count());
2358 }
2359
2360 #[test]
2361 fn test_null_buffer_ref() {
2362 let mut bit_v: [u8; 2] = [0; 2];
2363 bit_util::set_bit(&mut bit_v, 0);
2364 bit_util::set_bit(&mut bit_v, 3);
2365 bit_util::set_bit(&mut bit_v, 10);
2366 let arr_data = ArrayData::builder(DataType::Int32)
2367 .len(16)
2368 .add_buffer(make_i32_buffer(16))
2369 .null_bit_buffer(Some(Buffer::from(bit_v)))
2370 .build()
2371 .unwrap();
2372 assert!(arr_data.nulls().is_some());
2373 assert_eq!(&bit_v, arr_data.nulls().unwrap().validity());
2374 }
2375
2376 #[test]
2377 fn test_slice() {
2378 let mut bit_v: [u8; 2] = [0; 2];
2379 bit_util::set_bit(&mut bit_v, 0);
2380 bit_util::set_bit(&mut bit_v, 3);
2381 bit_util::set_bit(&mut bit_v, 10);
2382 let data = ArrayData::builder(DataType::Int32)
2383 .len(16)
2384 .add_buffer(make_i32_buffer(16))
2385 .null_bit_buffer(Some(Buffer::from(bit_v)))
2386 .build()
2387 .unwrap();
2388 let new_data = data.slice(1, 15);
2389 assert_eq!(data.len() - 1, new_data.len());
2390 assert_eq!(1, new_data.offset());
2391 assert_eq!(data.null_count(), new_data.null_count());
2392
2393 let new_data = new_data.slice(1, 14);
2395 assert_eq!(data.len() - 2, new_data.len());
2396 assert_eq!(2, new_data.offset());
2397 assert_eq!(data.null_count() - 1, new_data.null_count());
2398 }
2399
2400 #[test]
2401 fn test_typed_offsets_length_overflow() {
2402 let data = ArrayData {
2403 data_type: DataType::Binary,
2404 len: usize::MAX,
2405 offset: 0,
2406 buffers: vec![Buffer::from_slice_ref([0_i32])],
2407 child_data: vec![],
2408 nulls: None,
2409 };
2410 let err = data.typed_offsets::<i32>().unwrap_err();
2411
2412 assert_eq!(
2413 err.to_string(),
2414 format!(
2415 "Invalid argument error: Length {} with offset 1 overflows usize for Binary",
2416 usize::MAX
2417 )
2418 );
2419 }
2420
2421 #[test]
2422 fn test_validate_typed_buffer_length_overflow() {
2423 let data = ArrayData {
2424 data_type: DataType::Binary,
2425 len: 0,
2426 offset: 2,
2427 buffers: vec![Buffer::from_slice_ref([0_i32])],
2428 child_data: vec![],
2429 nulls: None,
2430 };
2431 let err = data.typed_buffer::<i32>(0, usize::MAX).unwrap_err();
2432
2433 assert_eq!(
2434 err.to_string(),
2435 format!(
2436 "Invalid argument error: Length {} with offset 2 overflows usize for Binary",
2437 usize::MAX
2438 )
2439 );
2440 }
2441
2442 fn try_new_binary_length_offset_overflow() -> Result<ArrayData, ArrowError> {
2444 ArrayData::try_new(
2445 DataType::Binary,
2446 usize::MAX,
2447 None,
2448 1,
2449 vec![
2450 Buffer::from_slice_ref([0_i32]),
2451 Buffer::from_iter(std::iter::empty::<u8>()),
2452 ],
2453 vec![],
2454 )
2455 }
2456
2457 #[cfg(not(feature = "force_validate"))]
2458 #[test]
2459 fn test_try_new_length_offset_overflow() {
2460 let err = try_new_binary_length_offset_overflow().unwrap_err();
2461
2462 assert_eq!(
2463 err.to_string(),
2464 format!(
2465 "Invalid argument error: Length {} with offset 1 overflows usize for Binary",
2466 usize::MAX
2467 )
2468 );
2469 }
2470
2471 #[cfg(feature = "force_validate")]
2472 #[test]
2473 #[should_panic(
2474 expected = "Length 18446744073709551615 with offset 1 overflows usize for Binary"
2475 )]
2476 fn test_try_new_length_offset_overflow_force_validate() {
2477 try_new_binary_length_offset_overflow().unwrap();
2478 }
2479
2480 #[test]
2481 fn test_equality() {
2482 let int_data = ArrayData::builder(DataType::Int32)
2483 .len(1)
2484 .add_buffer(make_i32_buffer(1))
2485 .build()
2486 .unwrap();
2487
2488 let float_data = ArrayData::builder(DataType::Float32)
2489 .len(1)
2490 .add_buffer(make_f32_buffer(1))
2491 .build()
2492 .unwrap();
2493 assert_ne!(int_data, float_data);
2494 assert!(!int_data.ptr_eq(&float_data));
2495 assert!(int_data.ptr_eq(&int_data));
2496
2497 #[allow(clippy::redundant_clone)]
2498 let int_data_clone = int_data.clone();
2499 assert_eq!(int_data, int_data_clone);
2500 assert!(int_data.ptr_eq(&int_data_clone));
2501 assert!(int_data_clone.ptr_eq(&int_data));
2502
2503 let int_data_slice = int_data_clone.slice(1, 0);
2504 assert!(int_data_slice.ptr_eq(&int_data_slice));
2505 assert!(!int_data.ptr_eq(&int_data_slice));
2506 assert!(!int_data_slice.ptr_eq(&int_data));
2507
2508 let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
2509 let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
2510 let string_data = ArrayData::try_new(
2511 DataType::Utf8,
2512 3,
2513 Some(Buffer::from_iter(vec![true, false, true])),
2514 0,
2515 vec![offsets_buffer, data_buffer],
2516 vec![],
2517 )
2518 .unwrap();
2519
2520 assert_ne!(float_data, string_data);
2521 assert!(!float_data.ptr_eq(&string_data));
2522
2523 assert!(string_data.ptr_eq(&string_data));
2524
2525 #[allow(clippy::redundant_clone)]
2526 let string_data_cloned = string_data.clone();
2527 assert!(string_data_cloned.ptr_eq(&string_data));
2528 assert!(string_data.ptr_eq(&string_data_cloned));
2529
2530 let string_data_slice = string_data.slice(1, 2);
2531 assert!(string_data_slice.ptr_eq(&string_data_slice));
2532 assert!(!string_data_slice.ptr_eq(&string_data))
2533 }
2534
2535 #[test]
2536 fn test_slice_memory_size() {
2537 let mut bit_v: [u8; 2] = [0; 2];
2538 bit_util::set_bit(&mut bit_v, 0);
2539 bit_util::set_bit(&mut bit_v, 3);
2540 bit_util::set_bit(&mut bit_v, 10);
2541 let data = ArrayData::builder(DataType::Int32)
2542 .len(16)
2543 .add_buffer(make_i32_buffer(16))
2544 .null_bit_buffer(Some(Buffer::from(bit_v)))
2545 .build()
2546 .unwrap();
2547 let new_data = data.slice(1, 14);
2548 assert_eq!(
2549 data.get_slice_memory_size().unwrap() - 8,
2550 new_data.get_slice_memory_size().unwrap()
2551 );
2552 let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
2553 let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
2554 let string_data = ArrayData::try_new(
2555 DataType::Utf8,
2556 3,
2557 Some(Buffer::from_iter(vec![true, false, true])),
2558 0,
2559 vec![offsets_buffer, data_buffer],
2560 vec![],
2561 )
2562 .unwrap();
2563 let string_data_slice = string_data.slice(1, 2);
2564 assert_eq!(
2566 string_data.get_slice_memory_size().unwrap() - 6,
2567 string_data_slice.get_slice_memory_size().unwrap()
2568 );
2569 }
2570
2571 #[test]
2572 fn test_count_nulls() {
2573 let buffer = Buffer::from([0b00010110, 0b10011111]);
2574 let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 16));
2575 let count = count_nulls(Some(&buffer), 0, 16);
2576 assert_eq!(count, 7);
2577
2578 let count = count_nulls(Some(&buffer), 4, 8);
2579 assert_eq!(count, 3);
2580 }
2581
2582 #[test]
2583 fn test_contains_nulls() {
2584 let buffer: Buffer =
2585 MutableBuffer::from_iter([false, false, false, true, true, false]).into();
2586 let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 6));
2587 assert!(contains_nulls(Some(&buffer), 0, 6));
2588 assert!(contains_nulls(Some(&buffer), 0, 3));
2589 assert!(!contains_nulls(Some(&buffer), 3, 2));
2590 assert!(!contains_nulls(Some(&buffer), 0, 0));
2591 }
2592
2593 #[test]
2594 fn test_alignment() {
2595 let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
2596 let sliced = buffer.slice(1);
2597
2598 let mut data = ArrayData {
2599 data_type: DataType::Int32,
2600 len: 0,
2601 offset: 0,
2602 buffers: vec![buffer],
2603 child_data: vec![],
2604 nulls: None,
2605 };
2606 data.validate_full().unwrap();
2607
2608 data.buffers[0] = sliced;
2610 let err = data.validate().unwrap_err();
2611
2612 assert_eq!(
2613 err.to_string(),
2614 "Invalid argument error: Misaligned buffers[0] in array of type Int32, offset from expected alignment of 4 by 1"
2615 );
2616
2617 data.align_buffers();
2618 data.validate_full().unwrap();
2619 }
2620
2621 #[test]
2622 fn test_alignment_struct() {
2623 let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
2624 let sliced = buffer.slice(1);
2625
2626 let child_data = ArrayData {
2627 data_type: DataType::Int32,
2628 len: 0,
2629 offset: 0,
2630 buffers: vec![buffer],
2631 child_data: vec![],
2632 nulls: None,
2633 };
2634
2635 let schema = DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)]));
2636 let mut data = ArrayData {
2637 data_type: schema,
2638 len: 0,
2639 offset: 0,
2640 buffers: vec![],
2641 child_data: vec![child_data],
2642 nulls: None,
2643 };
2644 data.validate_full().unwrap();
2645
2646 data.child_data[0].buffers[0] = sliced;
2648 let err = data.validate().unwrap_err();
2649
2650 assert_eq!(
2651 err.to_string(),
2652 "Invalid argument error: Misaligned buffers[0] in array of type Int32, offset from expected alignment of 4 by 1"
2653 );
2654
2655 data.align_buffers();
2656 data.validate_full().unwrap();
2657 }
2658
2659 #[test]
2660 fn test_null_view_types() {
2661 let array_len = 32;
2662 let array = ArrayData::new_null(&DataType::BinaryView, array_len);
2663 assert_eq!(array.len(), array_len);
2664 for i in 0..array.len() {
2665 assert!(array.is_null(i));
2666 }
2667
2668 let array = ArrayData::new_null(&DataType::Utf8View, array_len);
2669 assert_eq!(array.len(), array_len);
2670 for i in 0..array.len() {
2671 assert!(array.is_null(i));
2672 }
2673
2674 let array = ArrayData::new_null(
2675 &DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
2676 array_len,
2677 );
2678 assert_eq!(array.len(), array_len);
2679 for i in 0..array.len() {
2680 assert!(array.is_null(i));
2681 }
2682
2683 let array = ArrayData::new_null(
2684 &DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
2685 array_len,
2686 );
2687 assert_eq!(array.len(), array_len);
2688 for i in 0..array.len() {
2689 assert!(array.is_null(i));
2690 }
2691 }
2692}