1use crate::bit_iterator::BitSliceIterator;
22use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
23use arrow_buffer::{
24 ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer, bit_util, i256,
25};
26use arrow_schema::{ArrowError, DataType, UnionMode};
27use std::mem;
28use std::ops::Range;
29use std::sync::Arc;
30
31use crate::{equal, validate_binary_view, validate_string_view};
32
33#[inline]
34pub(crate) fn contains_nulls(
35 null_bit_buffer: Option<&NullBuffer>,
36 offset: usize,
37 len: usize,
38) -> bool {
39 match null_bit_buffer {
40 Some(buffer) => {
41 match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len).next() {
42 Some((start, end)) => start != 0 || end != len,
43 None => len != 0, }
45 }
46 None => false, }
48}
49
50#[inline]
51pub(crate) fn count_nulls(
52 null_bit_buffer: Option<&NullBuffer>,
53 offset: usize,
54 len: usize,
55) -> usize {
56 if let Some(buf) = null_bit_buffer {
57 let buffer = buf.buffer();
58 len - buffer.count_set_bits_offset(offset + buf.offset(), len)
59 } else {
60 0
61 }
62}
63
64#[inline]
66pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuffer; 2] {
67 let empty_buffer = MutableBuffer::new(0);
68 match data_type {
69 DataType::Null => [empty_buffer, MutableBuffer::new(0)],
70 DataType::Boolean => {
71 let bytes = bit_util::ceil(capacity, 8);
72 let buffer = MutableBuffer::new(bytes);
73 [buffer, empty_buffer]
74 }
75 DataType::UInt8
76 | DataType::UInt16
77 | DataType::UInt32
78 | DataType::UInt64
79 | DataType::Int8
80 | DataType::Int16
81 | DataType::Int32
82 | DataType::Int64
83 | DataType::Float16
84 | DataType::Float32
85 | DataType::Float64
86 | DataType::Decimal32(_, _)
87 | DataType::Decimal64(_, _)
88 | DataType::Decimal128(_, _)
89 | DataType::Decimal256(_, _)
90 | DataType::Date32
91 | DataType::Time32(_)
92 | DataType::Date64
93 | DataType::Time64(_)
94 | DataType::Duration(_)
95 | DataType::Timestamp(_, _)
96 | DataType::Interval(_) => [
97 MutableBuffer::new(capacity * data_type.primitive_width().unwrap()),
98 empty_buffer,
99 ],
100 DataType::Utf8 | DataType::Binary => {
101 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
102 buffer.push(0i32);
104 [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
105 }
106 DataType::LargeUtf8 | DataType::LargeBinary => {
107 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
108 buffer.push(0i64);
110 [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
111 }
112 DataType::BinaryView | DataType::Utf8View => [
113 MutableBuffer::new(capacity * mem::size_of::<u128>()),
114 empty_buffer,
115 ],
116 DataType::List(_) | DataType::Map(_, _) => {
117 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
119 buffer.push(0i32);
120 [buffer, empty_buffer]
121 }
122 DataType::ListView(_) => [
123 MutableBuffer::new(capacity * mem::size_of::<i32>()),
124 MutableBuffer::new(capacity * mem::size_of::<i32>()),
125 ],
126 DataType::LargeList(_) => {
127 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
129 buffer.push(0i64);
130 [buffer, empty_buffer]
131 }
132 DataType::LargeListView(_) => [
133 MutableBuffer::new(capacity * mem::size_of::<i64>()),
134 MutableBuffer::new(capacity * mem::size_of::<i64>()),
135 ],
136 DataType::FixedSizeBinary(size) => {
137 [MutableBuffer::new(capacity * *size as usize), empty_buffer]
138 }
139 DataType::Dictionary(k, _) => [
140 MutableBuffer::new(capacity * k.primitive_width().unwrap()),
141 empty_buffer,
142 ],
143 DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
144 [empty_buffer, MutableBuffer::new(0)]
145 }
146 DataType::Union(_, mode) => {
147 let type_ids = MutableBuffer::new(capacity * mem::size_of::<i8>());
148 match mode {
149 UnionMode::Sparse => [type_ids, empty_buffer],
150 UnionMode::Dense => {
151 let offsets = MutableBuffer::new(capacity * mem::size_of::<i32>());
152 [type_ids, offsets]
153 }
154 }
155 }
156 }
157}
158
159#[derive(Debug, Clone)]
205pub struct ArrayData {
206 data_type: DataType,
208
209 len: usize,
211
212 offset: usize,
217
218 buffers: Vec<Buffer>,
231
232 child_data: Vec<ArrayData>,
242
243 nulls: Option<NullBuffer>,
251}
252
253pub type ArrayDataRef = Arc<ArrayData>;
255
256fn checked_len_plus_offset(
257 data_type: &DataType,
258 len: usize,
259 offset: usize,
260) -> Result<usize, ArrowError> {
261 len.checked_add(offset).ok_or_else(|| {
262 ArrowError::InvalidArgumentError(format!(
263 "Length {len} with offset {offset} overflows usize for {data_type}"
264 ))
265 })
266}
267
268impl ArrayData {
269 pub unsafe fn new_unchecked(
286 data_type: DataType,
287 len: usize,
288 null_count: Option<usize>,
289 null_bit_buffer: Option<Buffer>,
290 offset: usize,
291 buffers: Vec<Buffer>,
292 child_data: Vec<ArrayData>,
293 ) -> Self {
294 let mut skip_validation = UnsafeFlag::new();
295 unsafe { skip_validation.set(true) };
297
298 ArrayDataBuilder {
299 data_type,
300 len,
301 null_count,
302 null_bit_buffer,
303 nulls: None,
304 offset,
305 buffers,
306 child_data,
307 align_buffers: false,
308 skip_validation,
309 }
310 .build()
311 .unwrap()
312 }
313
314 pub fn try_new(
328 data_type: DataType,
329 len: usize,
330 null_bit_buffer: Option<Buffer>,
331 offset: usize,
332 buffers: Vec<Buffer>,
333 child_data: Vec<ArrayData>,
334 ) -> Result<Self, ArrowError> {
335 if let Some(null_bit_buffer) = null_bit_buffer.as_ref() {
339 let len_plus_offset = checked_len_plus_offset(&data_type, len, offset)?;
340 let needed_len = bit_util::ceil(len_plus_offset, 8);
341 if null_bit_buffer.len() < needed_len {
342 return Err(ArrowError::InvalidArgumentError(format!(
343 "null_bit_buffer size too small. got {} needed {}",
344 null_bit_buffer.len(),
345 needed_len
346 )));
347 }
348 }
349 let new_self = unsafe {
351 Self::new_unchecked(
352 data_type,
353 len,
354 None,
355 null_bit_buffer,
356 offset,
357 buffers,
358 child_data,
359 )
360 };
361
362 new_self.validate_data()?;
367 Ok(new_self)
368 }
369
370 pub fn into_parts(
376 self,
377 ) -> (
378 DataType,
379 usize,
380 Option<NullBuffer>,
381 usize,
382 Vec<Buffer>,
383 Vec<ArrayData>,
384 ) {
385 let Self {
386 data_type,
387 len,
388 nulls,
389 offset,
390 buffers,
391 child_data,
392 } = self;
393
394 (data_type, len, nulls, offset, buffers, child_data)
395 }
396
397 #[inline]
399 pub const fn builder(data_type: DataType) -> ArrayDataBuilder {
400 ArrayDataBuilder::new(data_type)
401 }
402
403 #[inline]
405 pub const fn data_type(&self) -> &DataType {
406 &self.data_type
407 }
408
409 pub fn buffers(&self) -> &[Buffer] {
411 &self.buffers
412 }
413
414 pub fn child_data(&self) -> &[ArrayData] {
417 &self.child_data[..]
418 }
419
420 #[inline]
422 pub fn is_null(&self, i: usize) -> bool {
423 match &self.nulls {
424 Some(v) => v.is_null(i),
425 None => false,
426 }
427 }
428
429 #[inline]
433 pub fn nulls(&self) -> Option<&NullBuffer> {
434 self.nulls.as_ref()
435 }
436
437 #[inline]
439 pub fn is_valid(&self, i: usize) -> bool {
440 !self.is_null(i)
441 }
442
443 #[inline]
445 pub const fn len(&self) -> usize {
446 self.len
447 }
448
449 #[inline]
451 pub const fn is_empty(&self) -> bool {
452 self.len == 0
453 }
454
455 #[inline]
457 pub const fn offset(&self) -> usize {
458 self.offset
459 }
460
461 #[inline]
463 pub fn null_count(&self) -> usize {
464 self.nulls
465 .as_ref()
466 .map(|x| x.null_count())
467 .unwrap_or_default()
468 }
469
470 pub fn get_buffer_memory_size(&self) -> usize {
482 let mut size = 0;
483 for buffer in &self.buffers {
484 size += buffer.capacity();
485 }
486 if let Some(bitmap) = &self.nulls {
487 size += bitmap.buffer().capacity()
488 }
489 for child in &self.child_data {
490 size += child.get_buffer_memory_size();
491 }
492 size
493 }
494
495 pub fn get_slice_memory_size(&self) -> Result<usize, ArrowError> {
508 let mut result: usize = 0;
509 let layout = layout(&self.data_type);
510
511 for spec in layout.buffers.iter() {
512 match spec {
513 BufferSpec::FixedWidth { byte_width, .. } => {
514 let buffer_size = self.len.checked_mul(*byte_width).ok_or_else(|| {
515 ArrowError::ComputeError(
516 "Integer overflow computing buffer size".to_string(),
517 )
518 })?;
519 result += buffer_size;
520 }
521 BufferSpec::VariableWidth => {
522 let buffer_len = match self.data_type {
523 DataType::Utf8 | DataType::Binary => {
524 let offsets = self.typed_offsets::<i32>()?;
525 (offsets[self.len] - offsets[0]) as usize
526 }
527 DataType::LargeUtf8 | DataType::LargeBinary => {
528 let offsets = self.typed_offsets::<i64>()?;
529 (offsets[self.len] - offsets[0]) as usize
530 }
531 _ => {
532 return Err(ArrowError::NotYetImplemented(format!(
533 "Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
534 self.data_type
535 )));
536 }
537 };
538 result += buffer_len;
539 }
540 BufferSpec::BitMap => {
541 let buffer_size = bit_util::ceil(self.len, 8);
542 result += buffer_size;
543 }
544 BufferSpec::AlwaysNull => {
545 }
547 }
548 }
549
550 if self.nulls().is_some() {
551 result += bit_util::ceil(self.len, 8);
552 }
553
554 for child in &self.child_data {
555 result += child.get_slice_memory_size()?;
556 }
557 Ok(result)
558 }
559
560 pub fn get_array_memory_size(&self) -> usize {
569 let mut size = mem::size_of_val(self);
570
571 for buffer in &self.buffers {
573 size += mem::size_of::<Buffer>();
574 size += buffer.capacity();
575 }
576 if let Some(nulls) = &self.nulls {
577 size += nulls.buffer().capacity();
578 }
579 for child in &self.child_data {
580 size += child.get_array_memory_size();
581 }
582
583 size
584 }
585
586 pub fn slice(&self, offset: usize, length: usize) -> ArrayData {
594 let end = offset
595 .checked_add(length)
596 .expect("offset + length overflow");
597 assert!(end <= self.len());
598
599 if let DataType::Struct(_) = self.data_type() {
600 let new_offset = self.offset + offset;
602 ArrayData {
603 data_type: self.data_type().clone(),
604 len: length,
605 offset: new_offset,
606 buffers: self.buffers.clone(),
607 child_data: self
609 .child_data()
610 .iter()
611 .map(|data| data.slice(offset, length))
612 .collect(),
613 nulls: self.nulls.as_ref().map(|x| x.slice(offset, length)),
614 }
615 } else {
616 let mut new_data = self.clone();
617
618 new_data.len = length;
619 new_data.offset = offset + self.offset;
620 new_data.nulls = self.nulls.as_ref().map(|x| x.slice(offset, length));
621
622 new_data
623 }
624 }
625
626 pub fn buffer<T: ArrowNativeType>(&self, buffer: usize) -> &[T] {
633 &self.buffers()[buffer].typed_data()[self.offset..]
634 }
635
636 pub fn new_null(data_type: &DataType, len: usize) -> Self {
638 let bit_len = bit_util::ceil(len, 8);
639 let zeroed = |len: usize| Buffer::from(MutableBuffer::from_len_zeroed(len));
640
641 let (buffers, child_data, has_nulls) = match data_type.primitive_width() {
642 Some(width) => (vec![zeroed(width * len)], vec![], true),
643 None => match data_type {
644 DataType::Null => (vec![], vec![], false),
645 DataType::Boolean => (vec![zeroed(bit_len)], vec![], true),
646 DataType::Binary | DataType::Utf8 => {
647 (vec![zeroed((len + 1) * 4), zeroed(0)], vec![], true)
648 }
649 DataType::BinaryView | DataType::Utf8View => (vec![zeroed(len * 16)], vec![], true),
650 DataType::LargeBinary | DataType::LargeUtf8 => {
651 (vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true)
652 }
653 DataType::FixedSizeBinary(i) => (vec![zeroed(*i as usize * len)], vec![], true),
654 DataType::List(f) | DataType::Map(f, _) => (
655 vec![zeroed((len + 1) * 4)],
656 vec![ArrayData::new_empty(f.data_type())],
657 true,
658 ),
659 DataType::LargeList(f) => (
660 vec![zeroed((len + 1) * 8)],
661 vec![ArrayData::new_empty(f.data_type())],
662 true,
663 ),
664 DataType::ListView(f) => (
665 vec![zeroed(len * 4), zeroed(len * 4)],
666 vec![ArrayData::new_empty(f.data_type())],
667 true,
668 ),
669 DataType::LargeListView(f) => (
670 vec![zeroed(len * 8), zeroed(len * 8)],
671 vec![ArrayData::new_empty(f.data_type())],
672 true,
673 ),
674 DataType::FixedSizeList(f, list_len) => (
675 vec![],
676 vec![ArrayData::new_null(f.data_type(), *list_len as usize * len)],
677 true,
678 ),
679 DataType::Struct(fields) => (
680 vec![],
681 fields
682 .iter()
683 .map(|f| Self::new_null(f.data_type(), len))
684 .collect(),
685 true,
686 ),
687 DataType::Dictionary(k, v) => (
688 vec![zeroed(k.primitive_width().unwrap() * len)],
689 vec![ArrayData::new_empty(v.as_ref())],
690 true,
691 ),
692 DataType::Union(f, mode) => {
693 let (id, _) = f.iter().next().unwrap();
694 let ids = Buffer::from_iter(std::iter::repeat_n(id, len));
695 let buffers = match mode {
696 UnionMode::Sparse => vec![ids],
697 UnionMode::Dense => {
698 let end_offset = i32::from_usize(len).unwrap();
699 vec![ids, Buffer::from_iter(0_i32..end_offset)]
700 }
701 };
702
703 let children = f
704 .iter()
705 .enumerate()
706 .map(|(idx, (_, f))| {
707 if idx == 0 || *mode == UnionMode::Sparse {
708 Self::new_null(f.data_type(), len)
709 } else {
710 Self::new_empty(f.data_type())
711 }
712 })
713 .collect();
714
715 (buffers, children, false)
716 }
717 DataType::RunEndEncoded(r, v) => {
718 if len == 0 {
719 let runs = ArrayData::new_empty(r.data_type());
721 let values = ArrayData::new_empty(v.data_type());
722 (vec![], vec![runs, values], false)
723 } else {
724 let runs = match r.data_type() {
725 DataType::Int16 => {
726 let i = i16::from_usize(len).expect("run overflow");
727 Buffer::from_slice_ref([i])
728 }
729 DataType::Int32 => {
730 let i = i32::from_usize(len).expect("run overflow");
731 Buffer::from_slice_ref([i])
732 }
733 DataType::Int64 => {
734 let i = i64::from_usize(len).expect("run overflow");
735 Buffer::from_slice_ref([i])
736 }
737 dt => unreachable!("Invalid run ends data type {dt}"),
738 };
739
740 let builder = ArrayData::builder(r.data_type().clone())
741 .len(1)
742 .buffers(vec![runs]);
743
744 let runs = unsafe { builder.build_unchecked() };
747 (
748 vec![],
749 vec![runs, ArrayData::new_null(v.data_type(), 1)],
750 false,
751 )
752 }
753 }
754 DataType::Int8
756 | DataType::Int16
757 | DataType::Int32
758 | DataType::Int64
759 | DataType::UInt8
760 | DataType::UInt16
761 | DataType::UInt32
762 | DataType::UInt64
763 | DataType::Float16
764 | DataType::Float32
765 | DataType::Float64
766 | DataType::Timestamp(_, _)
767 | DataType::Date32
768 | DataType::Date64
769 | DataType::Time32(_)
770 | DataType::Time64(_)
771 | DataType::Duration(_)
772 | DataType::Interval(_)
773 | DataType::Decimal32(_, _)
774 | DataType::Decimal64(_, _)
775 | DataType::Decimal128(_, _)
776 | DataType::Decimal256(_, _) => unreachable!("{data_type}"),
777 },
778 };
779
780 let mut builder = ArrayDataBuilder::new(data_type.clone())
781 .len(len)
782 .buffers(buffers)
783 .child_data(child_data);
784
785 if has_nulls {
786 builder = builder.nulls(Some(NullBuffer::new_null(len)))
787 }
788
789 unsafe { builder.build_unchecked() }
792 }
793
794 pub fn new_empty(data_type: &DataType) -> Self {
796 Self::new_null(data_type, 0)
797 }
798
799 pub fn align_buffers(&mut self) {
808 let layout = layout(&self.data_type);
809 for (buffer, spec) in self.buffers.iter_mut().zip(&layout.buffers) {
810 if let BufferSpec::FixedWidth { alignment, .. } = spec {
811 if buffer.as_ptr().align_offset(*alignment) != 0 {
812 *buffer = Buffer::from_slice_ref(buffer.as_ref());
813 }
814 }
815 }
816 for data in self.child_data.iter_mut() {
818 data.align_buffers()
819 }
820 }
821
822 pub fn validate(&self) -> Result<(), ArrowError> {
833 let len_plus_offset = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
835
836 let layout = layout(&self.data_type);
838
839 if !layout.can_contain_null_mask && self.nulls.is_some() {
840 return Err(ArrowError::InvalidArgumentError(format!(
841 "Arrays of type {:?} cannot contain a null bitmask",
842 self.data_type,
843 )));
844 }
845
846 if self.buffers.len() < layout.buffers.len()
848 || (!layout.variadic && self.buffers.len() != layout.buffers.len())
849 {
850 return Err(ArrowError::InvalidArgumentError(format!(
851 "Expected {} buffers in array of type {:?}, got {}",
852 layout.buffers.len(),
853 self.data_type,
854 self.buffers.len(),
855 )));
856 }
857
858 for (i, (buffer, spec)) in self.buffers.iter().zip(layout.buffers.iter()).enumerate() {
859 match spec {
860 BufferSpec::FixedWidth {
861 byte_width,
862 alignment,
863 } => {
864 let min_buffer_size = len_plus_offset.saturating_mul(*byte_width);
865
866 if buffer.len() < min_buffer_size {
867 return Err(ArrowError::InvalidArgumentError(format!(
868 "Need at least {} bytes in buffers[{}] in array of type {:?}, but got {}",
869 min_buffer_size,
870 i,
871 self.data_type,
872 buffer.len()
873 )));
874 }
875
876 let align_offset = buffer.as_ptr().align_offset(*alignment);
877 if align_offset != 0 {
878 return Err(ArrowError::InvalidArgumentError(format!(
879 "Misaligned buffers[{i}] in array of type {:?}, offset from expected alignment of {alignment} by {}",
880 self.data_type,
881 align_offset.min(alignment - align_offset)
882 )));
883 }
884 }
885 BufferSpec::VariableWidth => {
886 }
890 BufferSpec::BitMap => {
891 let min_buffer_size = bit_util::ceil(len_plus_offset, 8);
892 if buffer.len() < min_buffer_size {
893 return Err(ArrowError::InvalidArgumentError(format!(
894 "Need at least {} bytes for bitmap in buffers[{}] in array of type {:?}, but got {}",
895 min_buffer_size,
896 i,
897 self.data_type,
898 buffer.len()
899 )));
900 }
901 }
902 BufferSpec::AlwaysNull => {
903 }
905 }
906 }
907
908 if let Some(nulls) = self.nulls() {
910 if nulls.null_count() > self.len {
911 return Err(ArrowError::InvalidArgumentError(format!(
912 "null_count {} for an array exceeds length of {} elements",
913 nulls.null_count(),
914 self.len
915 )));
916 }
917
918 let actual_len = nulls.validity().len();
919 let needed_len = bit_util::ceil(len_plus_offset, 8);
920 if actual_len < needed_len {
921 return Err(ArrowError::InvalidArgumentError(format!(
922 "null_bit_buffer size too small. got {actual_len} needed {needed_len}",
923 )));
924 }
925
926 if nulls.len() != self.len {
927 return Err(ArrowError::InvalidArgumentError(format!(
928 "null buffer incorrect size. got {} expected {}",
929 nulls.len(),
930 self.len
931 )));
932 }
933 }
934
935 self.validate_child_data()?;
936
937 match &self.data_type {
939 DataType::Utf8 | DataType::Binary => {
940 self.validate_offsets::<i32>(self.buffers[1].len())?;
941 }
942 DataType::LargeUtf8 | DataType::LargeBinary => {
943 self.validate_offsets::<i64>(self.buffers[1].len())?;
944 }
945 DataType::Dictionary(key_type, _value_type) => {
946 if !DataType::is_dictionary_key_type(key_type) {
948 return Err(ArrowError::InvalidArgumentError(format!(
949 "Dictionary key type must be integer, but was {key_type}"
950 )));
951 }
952 }
953 DataType::RunEndEncoded(run_ends_type, _) => {
954 if run_ends_type.is_nullable() {
955 return Err(ArrowError::InvalidArgumentError(
956 "The nullable should be set to false for the field defining run_ends array.".to_string()
957 ));
958 }
959 if !DataType::is_run_ends_type(run_ends_type.data_type()) {
960 return Err(ArrowError::InvalidArgumentError(format!(
961 "RunArray run_ends types must be Int16, Int32 or Int64, but was {}",
962 run_ends_type.data_type()
963 )));
964 }
965 }
966 _ => {}
967 };
968
969 Ok(())
970 }
971
972 fn typed_offsets<T: ArrowNativeType + num_traits::Num>(&self) -> Result<&[T], ArrowError> {
979 if self.len == 0 && self.buffers[0].is_empty() {
981 return Ok(&[]);
982 }
983
984 let len = checked_len_plus_offset(&self.data_type, self.len, 1)?;
985
986 self.typed_buffer(0, len)
987 }
988
989 fn typed_buffer<T: ArrowNativeType + num_traits::Num>(
991 &self,
992 idx: usize,
993 len: usize,
994 ) -> Result<&[T], ArrowError> {
995 let buffer = &self.buffers[idx];
996
997 let required_elements = checked_len_plus_offset(&self.data_type, len, self.offset)?;
998 let byte_width = mem::size_of::<T>();
999 let required_len = required_elements.checked_mul(byte_width).ok_or_else(|| {
1000 ArrowError::InvalidArgumentError(format!(
1001 "Buffer {idx} of {} byte length overflow: {} elements of {} bytes exceeds usize",
1002 self.data_type, required_elements, byte_width
1003 ))
1004 })?;
1005
1006 if buffer.len() < required_len {
1007 return Err(ArrowError::InvalidArgumentError(format!(
1008 "Buffer {} of {} isn't large enough. Expected {} bytes got {}",
1009 idx,
1010 self.data_type,
1011 required_len,
1012 buffer.len()
1013 )));
1014 }
1015
1016 Ok(&buffer.typed_data::<T>()[self.offset..required_elements])
1017 }
1018
1019 fn validate_offsets<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
1022 &self,
1023 values_length: usize,
1024 ) -> Result<(), ArrowError> {
1025 let offsets = self.typed_offsets::<T>()?;
1027 if offsets.is_empty() {
1028 return Ok(());
1029 }
1030
1031 let first_offset = offsets[0].to_usize().ok_or_else(|| {
1032 ArrowError::InvalidArgumentError(format!(
1033 "Error converting offset[0] ({}) to usize for {}",
1034 offsets[0], self.data_type
1035 ))
1036 })?;
1037
1038 let last_offset = offsets[self.len].to_usize().ok_or_else(|| {
1039 ArrowError::InvalidArgumentError(format!(
1040 "Error converting offset[{}] ({}) to usize for {}",
1041 self.len, offsets[self.len], self.data_type
1042 ))
1043 })?;
1044
1045 if first_offset > values_length {
1046 return Err(ArrowError::InvalidArgumentError(format!(
1047 "First offset {} of {} is larger than values length {}",
1048 first_offset, self.data_type, values_length,
1049 )));
1050 }
1051
1052 if last_offset > values_length {
1053 return Err(ArrowError::InvalidArgumentError(format!(
1054 "Last offset {} of {} is larger than values length {}",
1055 last_offset, self.data_type, values_length,
1056 )));
1057 }
1058
1059 if first_offset > last_offset {
1060 return Err(ArrowError::InvalidArgumentError(format!(
1061 "First offset {} in {} is smaller than last offset {}",
1062 first_offset, self.data_type, last_offset,
1063 )));
1064 }
1065
1066 Ok(())
1067 }
1068
1069 fn validate_offsets_and_sizes<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
1072 &self,
1073 values_length: usize,
1074 ) -> Result<(), ArrowError> {
1075 let offsets: &[T] = self.typed_buffer(0, self.len)?;
1076 let sizes: &[T] = self.typed_buffer(1, self.len)?;
1077 if offsets.len() != sizes.len() {
1078 return Err(ArrowError::ComputeError(format!(
1079 "ListView offsets len {} does not match sizes len {}",
1080 offsets.len(),
1081 sizes.len()
1082 )));
1083 }
1084
1085 for i in 0..sizes.len() {
1086 let size = sizes[i].to_usize().ok_or_else(|| {
1087 ArrowError::InvalidArgumentError(format!(
1088 "Error converting size[{}] ({}) to usize for {}",
1089 i, sizes[i], self.data_type
1090 ))
1091 })?;
1092 let offset = offsets[i].to_usize().ok_or_else(|| {
1093 ArrowError::InvalidArgumentError(format!(
1094 "Error converting offset[{}] ({}) to usize for {}",
1095 i, offsets[i], self.data_type
1096 ))
1097 })?;
1098 if size
1099 .checked_add(offset)
1100 .expect("Offset and size have exceeded the usize boundary")
1101 > values_length
1102 {
1103 return Err(ArrowError::InvalidArgumentError(format!(
1104 "Size {} at index {} is larger than the remaining values for {}",
1105 size, i, self.data_type
1106 )));
1107 }
1108 }
1109 Ok(())
1110 }
1111
1112 fn validate_child_data(&self) -> Result<(), ArrowError> {
1114 match &self.data_type {
1115 DataType::List(field) | DataType::Map(field, _) => {
1116 let values_data = self.get_single_valid_child_data(field.data_type())?;
1117 self.validate_offsets::<i32>(values_data.len)?;
1118 Ok(())
1119 }
1120 DataType::LargeList(field) => {
1121 let values_data = self.get_single_valid_child_data(field.data_type())?;
1122 self.validate_offsets::<i64>(values_data.len)?;
1123 Ok(())
1124 }
1125 DataType::ListView(field) => {
1126 let values_data = self.get_single_valid_child_data(field.data_type())?;
1127 self.validate_offsets_and_sizes::<i32>(values_data.len)?;
1128 Ok(())
1129 }
1130 DataType::LargeListView(field) => {
1131 let values_data = self.get_single_valid_child_data(field.data_type())?;
1132 self.validate_offsets_and_sizes::<i64>(values_data.len)?;
1133 Ok(())
1134 }
1135 DataType::FixedSizeList(field, list_size) => {
1136 let values_data = self.get_single_valid_child_data(field.data_type())?;
1137
1138 let list_size: usize = (*list_size).try_into().map_err(|_| {
1139 ArrowError::InvalidArgumentError(format!(
1140 "{} has a negative list_size {}",
1141 self.data_type, list_size
1142 ))
1143 })?;
1144
1145 let expected_values_len = self.len
1146 .checked_mul(list_size)
1147 .expect("integer overflow computing expected number of expected values in FixedListSize");
1148
1149 if values_data.len < expected_values_len {
1150 return Err(ArrowError::InvalidArgumentError(format!(
1151 "Values length {} is less than the length ({}) multiplied by the value size ({}) for {}",
1152 values_data.len, self.len, list_size, self.data_type
1153 )));
1154 }
1155
1156 Ok(())
1157 }
1158 DataType::Struct(fields) => {
1159 self.validate_num_child_data(fields.len())?;
1160 for (i, field) in fields.iter().enumerate() {
1161 let field_data = self.get_valid_child_data(i, field.data_type())?;
1162
1163 if field_data.len < self.len {
1165 return Err(ArrowError::InvalidArgumentError(format!(
1166 "{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
1167 self.data_type,
1168 i,
1169 field.name(),
1170 field_data.len,
1171 self.len
1172 )));
1173 }
1174 }
1175 Ok(())
1176 }
1177 DataType::RunEndEncoded(run_ends_field, values_field) => {
1178 self.validate_num_child_data(2)?;
1179 let run_ends_data = self.get_valid_child_data(0, run_ends_field.data_type())?;
1180 let values_data = self.get_valid_child_data(1, values_field.data_type())?;
1181 if run_ends_data.len != values_data.len {
1182 return Err(ArrowError::InvalidArgumentError(format!(
1183 "The run_ends array length should be the same as values array length. Run_ends array length is {}, values array length is {}",
1184 run_ends_data.len, values_data.len
1185 )));
1186 }
1187 if run_ends_data.nulls.is_some() {
1188 return Err(ArrowError::InvalidArgumentError(
1189 "Found null values in run_ends array. The run_ends array should not have null values.".to_string(),
1190 ));
1191 }
1192 Ok(())
1193 }
1194 DataType::Union(fields, mode) => {
1195 self.validate_num_child_data(fields.len())?;
1196
1197 for (i, (_, field)) in fields.iter().enumerate() {
1198 let field_data = self.get_valid_child_data(i, field.data_type())?;
1199
1200 if mode == &UnionMode::Sparse {
1201 let len_plus_offset =
1202 checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1203 if field_data.len < len_plus_offset {
1204 return Err(ArrowError::InvalidArgumentError(format!(
1205 "Sparse union child array #{} has length smaller than expected for union array ({} < {})",
1206 i, field_data.len, len_plus_offset
1207 )));
1208 }
1209 }
1210 }
1211 Ok(())
1212 }
1213 DataType::Dictionary(_key_type, value_type) => {
1214 self.get_single_valid_child_data(value_type)?;
1215 Ok(())
1216 }
1217 _ => {
1218 if !self.child_data.is_empty() {
1220 return Err(ArrowError::InvalidArgumentError(format!(
1221 "Expected no child arrays for type {} but got {}",
1222 self.data_type,
1223 self.child_data.len()
1224 )));
1225 }
1226 Ok(())
1227 }
1228 }
1229 }
1230
1231 fn get_single_valid_child_data(
1235 &self,
1236 expected_type: &DataType,
1237 ) -> Result<&ArrayData, ArrowError> {
1238 self.validate_num_child_data(1)?;
1239 self.get_valid_child_data(0, expected_type)
1240 }
1241
1242 fn validate_num_child_data(&self, expected_len: usize) -> Result<(), ArrowError> {
1244 if self.child_data.len() != expected_len {
1245 Err(ArrowError::InvalidArgumentError(format!(
1246 "Value data for {} should contain {} child data array(s), had {}",
1247 self.data_type,
1248 expected_len,
1249 self.child_data.len()
1250 )))
1251 } else {
1252 Ok(())
1253 }
1254 }
1255
1256 fn get_valid_child_data(
1259 &self,
1260 i: usize,
1261 expected_type: &DataType,
1262 ) -> Result<&ArrayData, ArrowError> {
1263 let values_data = self.child_data.get(i).ok_or_else(|| {
1264 ArrowError::InvalidArgumentError(format!(
1265 "{} did not have enough child arrays. Expected at least {} but had only {}",
1266 self.data_type,
1267 i + 1,
1268 self.child_data.len()
1269 ))
1270 })?;
1271
1272 if expected_type != &values_data.data_type {
1273 return Err(ArrowError::InvalidArgumentError(format!(
1274 "Child type mismatch for {}. Expected {} but child data had {}",
1275 self.data_type, expected_type, values_data.data_type
1276 )));
1277 }
1278
1279 values_data.validate()?;
1280 Ok(values_data)
1281 }
1282
1283 pub fn validate_data(&self) -> Result<(), ArrowError> {
1299 self.validate()?;
1300
1301 self.validate_nulls()?;
1302 self.validate_values()?;
1303 Ok(())
1304 }
1305
1306 pub fn validate_full(&self) -> Result<(), ArrowError> {
1311 self.validate_data()?;
1312 self.child_data
1314 .iter()
1315 .enumerate()
1316 .try_for_each(|(i, child_data)| {
1317 child_data.validate_full().map_err(|e| {
1318 ArrowError::InvalidArgumentError(format!(
1319 "{} child #{} invalid: {}",
1320 self.data_type, i, e
1321 ))
1322 })
1323 })?;
1324 Ok(())
1325 }
1326
1327 pub fn validate_nulls(&self) -> Result<(), ArrowError> {
1337 if let Some(nulls) = &self.nulls {
1338 let actual = nulls.len() - nulls.inner().count_set_bits();
1339 if actual != nulls.null_count() {
1340 return Err(ArrowError::InvalidArgumentError(format!(
1341 "null_count value ({}) doesn't match actual number of nulls in array ({})",
1342 nulls.null_count(),
1343 actual
1344 )));
1345 }
1346 }
1347
1348 match &self.data_type {
1353 DataType::List(f) | DataType::LargeList(f) | DataType::Map(f, _) => {
1354 if !f.is_nullable() {
1355 self.validate_non_nullable(None, &self.child_data[0])?
1356 }
1357 }
1358 DataType::FixedSizeList(field, len) => {
1359 let child = &self.child_data[0];
1360 if !field.is_nullable() {
1361 match &self.nulls {
1362 Some(nulls) => {
1363 let element_len = *len as usize;
1364 let expanded = nulls.expand(element_len);
1365 self.validate_non_nullable(Some(&expanded), child)?;
1366 }
1367 None => self.validate_non_nullable(None, child)?,
1368 }
1369 }
1370 }
1371 DataType::Struct(fields) => {
1372 for (field, child) in fields.iter().zip(&self.child_data) {
1373 if !field.is_nullable() {
1374 self.validate_non_nullable(self.nulls(), child)?
1375 }
1376 }
1377 }
1378 _ => {}
1379 }
1380
1381 Ok(())
1382 }
1383
1384 fn validate_non_nullable(
1386 &self,
1387 mask: Option<&NullBuffer>,
1388 child: &ArrayData,
1389 ) -> Result<(), ArrowError> {
1390 let mask = match mask {
1391 Some(mask) => mask,
1392 None => {
1393 return match child.null_count() {
1394 0 => Ok(()),
1395 _ => Err(ArrowError::InvalidArgumentError(format!(
1396 "non-nullable child of type {} contains nulls not present in parent {}",
1397 child.data_type, self.data_type
1398 ))),
1399 };
1400 }
1401 };
1402
1403 match child.nulls() {
1404 Some(nulls) if !mask.contains(nulls) => Err(ArrowError::InvalidArgumentError(format!(
1405 "non-nullable child of type {} contains nulls not present in parent",
1406 child.data_type
1407 ))),
1408 _ => Ok(()),
1409 }
1410 }
1411
1412 pub fn validate_values(&self) -> Result<(), ArrowError> {
1418 match &self.data_type {
1419 DataType::Utf8 => self.validate_utf8::<i32>(),
1420 DataType::LargeUtf8 => self.validate_utf8::<i64>(),
1421 DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
1422 DataType::LargeBinary => self.validate_offsets_full::<i64>(self.buffers[1].len()),
1423 DataType::BinaryView => {
1424 let views = self.typed_buffer::<u128>(0, self.len)?;
1425 validate_binary_view(views, &self.buffers[1..])
1426 }
1427 DataType::Utf8View => {
1428 let views = self.typed_buffer::<u128>(0, self.len)?;
1429 validate_string_view(views, &self.buffers[1..])
1430 }
1431 DataType::List(_) | DataType::Map(_, _) => {
1432 let child = &self.child_data[0];
1433 self.validate_offsets_full::<i32>(child.len)
1434 }
1435 DataType::LargeList(_) => {
1436 let child = &self.child_data[0];
1437 self.validate_offsets_full::<i64>(child.len)
1438 }
1439 DataType::Union(_, _) => {
1440 Ok(())
1446 }
1447 DataType::Dictionary(key_type, _value_type) => {
1448 let dictionary_length: i64 = self.child_data[0].len.try_into().unwrap();
1449 let max_value = dictionary_length - 1;
1450 match key_type.as_ref() {
1451 DataType::UInt8 => self.check_bounds::<u8>(max_value),
1452 DataType::UInt16 => self.check_bounds::<u16>(max_value),
1453 DataType::UInt32 => self.check_bounds::<u32>(max_value),
1454 DataType::UInt64 => self.check_bounds::<u64>(max_value),
1455 DataType::Int8 => self.check_bounds::<i8>(max_value),
1456 DataType::Int16 => self.check_bounds::<i16>(max_value),
1457 DataType::Int32 => self.check_bounds::<i32>(max_value),
1458 DataType::Int64 => self.check_bounds::<i64>(max_value),
1459 _ => unreachable!(),
1460 }
1461 }
1462 DataType::RunEndEncoded(run_ends, _values) => {
1463 let run_ends_data = self.child_data()[0].clone();
1464 match run_ends.data_type() {
1465 DataType::Int16 => run_ends_data.check_run_ends::<i16>(),
1466 DataType::Int32 => run_ends_data.check_run_ends::<i32>(),
1467 DataType::Int64 => run_ends_data.check_run_ends::<i64>(),
1468 _ => unreachable!(),
1469 }
1470 }
1471 _ => {
1472 Ok(())
1474 }
1475 }
1476 }
1477
1478 fn validate_each_offset<T, V>(&self, offset_limit: usize, validate: V) -> Result<(), ArrowError>
1489 where
1490 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1491 V: Fn(usize, Range<usize>) -> Result<(), ArrowError>,
1492 {
1493 self.typed_offsets::<T>()?
1494 .iter()
1495 .enumerate()
1496 .map(|(i, x)| {
1497 let r = x.to_usize().ok_or_else(|| {
1499 ArrowError::InvalidArgumentError(format!(
1500 "Offset invariant failure: Could not convert offset {x} to usize at position {i}"))}
1501 );
1502 match r {
1504 Ok(n) if n <= offset_limit => Ok((i, n)),
1505 Ok(_) => Err(ArrowError::InvalidArgumentError(format!(
1506 "Offset invariant failure: offset at position {i} out of bounds: {x} > {offset_limit}"))
1507 ),
1508 Err(e) => Err(e),
1509 }
1510 })
1511 .scan(0_usize, |start, end| {
1512 match end {
1514 Ok((i, end)) if *start <= end => {
1515 let range = Some(Ok((i, *start..end)));
1516 *start = end;
1517 range
1518 }
1519 Ok((i, end)) => Some(Err(ArrowError::InvalidArgumentError(format!(
1520 "Offset invariant failure: non-monotonic offset at slot {}: {} > {}",
1521 i - 1, start, end))
1522 )),
1523 Err(err) => Some(Err(err)),
1524 }
1525 })
1526 .skip(1) .try_for_each(|res: Result<(usize, Range<usize>), ArrowError>| {
1528 let (item_index, range) = res?;
1529 validate(item_index-1, range)
1530 })
1531 }
1532
1533 fn validate_utf8<T>(&self) -> Result<(), ArrowError>
1536 where
1537 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1538 {
1539 let values_buffer = &self.buffers[1].as_slice();
1540 if let Ok(values_str) = std::str::from_utf8(values_buffer) {
1541 self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
1543 if !values_str.is_char_boundary(range.start)
1544 || !values_str.is_char_boundary(range.end)
1545 {
1546 return Err(ArrowError::InvalidArgumentError(format!(
1547 "incomplete utf-8 byte sequence from index {string_index}"
1548 )));
1549 }
1550 Ok(())
1551 })
1552 } else {
1553 self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
1555 std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
1556 ArrowError::InvalidArgumentError(format!(
1557 "Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
1558 ))
1559 })?;
1560 Ok(())
1561 })
1562 }
1563 }
1564
1565 fn validate_offsets_full<T>(&self, offset_limit: usize) -> Result<(), ArrowError>
1568 where
1569 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1570 {
1571 self.validate_each_offset::<T, _>(offset_limit, |_string_index, _range| {
1572 Ok(())
1575 })
1576 }
1577
1578 fn check_bounds<T>(&self, max_value: i64) -> Result<(), ArrowError>
1581 where
1582 T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
1583 {
1584 let required_len = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1585 let buffer = &self.buffers[0];
1586
1587 assert!(buffer.len() / mem::size_of::<T>() >= required_len);
1590
1591 let indexes: &[T] = &buffer.typed_data::<T>()[self.offset..required_len];
1593
1594 indexes.iter().enumerate().try_for_each(|(i, &dict_index)| {
1595 if self.is_null(i) {
1597 return Ok(());
1598 }
1599 let dict_index: i64 = dict_index.try_into().map_err(|_| {
1600 ArrowError::InvalidArgumentError(format!(
1601 "Value at position {i} out of bounds: {dict_index} (can not convert to i64)"
1602 ))
1603 })?;
1604
1605 if dict_index < 0 || dict_index > max_value {
1606 return Err(ArrowError::InvalidArgumentError(format!(
1607 "Value at position {i} out of bounds: {dict_index} (should be in [0, {max_value}])"
1608 )));
1609 }
1610 Ok(())
1611 })
1612 }
1613
1614 fn check_run_ends<T>(&self) -> Result<(), ArrowError>
1616 where
1617 T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
1618 {
1619 let values = self.typed_buffer::<T>(0, self.len)?;
1620 let mut prev_value: i64 = 0_i64;
1621 values.iter().enumerate().try_for_each(|(ix, &inp_value)| {
1622 let value: i64 = inp_value.try_into().map_err(|_| {
1623 ArrowError::InvalidArgumentError(format!(
1624 "Value at position {ix} out of bounds: {inp_value} (can not convert to i64)"
1625 ))
1626 })?;
1627 if value <= 0_i64 {
1628 return Err(ArrowError::InvalidArgumentError(format!(
1629 "The values in run_ends array should be strictly positive. Found value {value} at index {ix} that does not match the criteria."
1630 )));
1631 }
1632 if ix > 0 && value <= prev_value {
1633 return Err(ArrowError::InvalidArgumentError(format!(
1634 "The values in run_ends array should be strictly increasing. Found value {value} at index {ix} with previous value {prev_value} that does not match the criteria."
1635 )));
1636 }
1637
1638 prev_value = value;
1639 Ok(())
1640 })?;
1641
1642 let len_plus_offset = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1643 if prev_value.as_usize() < len_plus_offset {
1644 return Err(ArrowError::InvalidArgumentError(format!(
1645 "The offset + length of array should be less or equal to last value in the run_ends array. The last value of run_ends array is {prev_value} and offset + length of array is {}.",
1646 len_plus_offset
1647 )));
1648 }
1649 Ok(())
1650 }
1651
1652 pub fn ptr_eq(&self, other: &Self) -> bool {
1656 if self.offset != other.offset
1657 || self.len != other.len
1658 || self.data_type != other.data_type
1659 || self.buffers.len() != other.buffers.len()
1660 || self.child_data.len() != other.child_data.len()
1661 {
1662 return false;
1663 }
1664
1665 match (&self.nulls, &other.nulls) {
1666 (Some(a), Some(b)) if !a.inner().ptr_eq(b.inner()) => return false,
1667 (Some(_), None) | (None, Some(_)) => return false,
1668 _ => {}
1669 };
1670
1671 if !self
1672 .buffers
1673 .iter()
1674 .zip(other.buffers.iter())
1675 .all(|(a, b)| a.as_ptr() == b.as_ptr())
1676 {
1677 return false;
1678 }
1679
1680 self.child_data
1681 .iter()
1682 .zip(other.child_data.iter())
1683 .all(|(a, b)| a.ptr_eq(b))
1684 }
1685
1686 pub fn into_builder(self) -> ArrayDataBuilder {
1688 self.into()
1689 }
1690
1691 #[cfg(feature = "pool")]
1698 pub fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
1699 for buffer in &self.buffers {
1701 buffer.claim(pool);
1702 }
1703
1704 if let Some(nulls) = &self.nulls {
1706 nulls.claim(pool);
1707 }
1708
1709 for child in &self.child_data {
1711 child.claim(pool);
1712 }
1713 }
1714}
1715
1716pub fn layout(data_type: &DataType) -> DataTypeLayout {
1719 use arrow_schema::IntervalUnit::*;
1722
1723 match data_type {
1724 DataType::Null => DataTypeLayout {
1725 buffers: vec![],
1726 can_contain_null_mask: false,
1727 variadic: false,
1728 },
1729 DataType::Boolean => DataTypeLayout {
1730 buffers: vec![BufferSpec::BitMap],
1731 can_contain_null_mask: true,
1732 variadic: false,
1733 },
1734 DataType::Int8 => DataTypeLayout::new_fixed_width::<i8>(),
1735 DataType::Int16 => DataTypeLayout::new_fixed_width::<i16>(),
1736 DataType::Int32 => DataTypeLayout::new_fixed_width::<i32>(),
1737 DataType::Int64 => DataTypeLayout::new_fixed_width::<i64>(),
1738 DataType::UInt8 => DataTypeLayout::new_fixed_width::<u8>(),
1739 DataType::UInt16 => DataTypeLayout::new_fixed_width::<u16>(),
1740 DataType::UInt32 => DataTypeLayout::new_fixed_width::<u32>(),
1741 DataType::UInt64 => DataTypeLayout::new_fixed_width::<u64>(),
1742 DataType::Float16 => DataTypeLayout::new_fixed_width::<half::f16>(),
1743 DataType::Float32 => DataTypeLayout::new_fixed_width::<f32>(),
1744 DataType::Float64 => DataTypeLayout::new_fixed_width::<f64>(),
1745 DataType::Timestamp(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
1746 DataType::Date32 => DataTypeLayout::new_fixed_width::<i32>(),
1747 DataType::Date64 => DataTypeLayout::new_fixed_width::<i64>(),
1748 DataType::Time32(_) => DataTypeLayout::new_fixed_width::<i32>(),
1749 DataType::Time64(_) => DataTypeLayout::new_fixed_width::<i64>(),
1750 DataType::Interval(YearMonth) => DataTypeLayout::new_fixed_width::<i32>(),
1751 DataType::Interval(DayTime) => DataTypeLayout::new_fixed_width::<IntervalDayTime>(),
1752 DataType::Interval(MonthDayNano) => {
1753 DataTypeLayout::new_fixed_width::<IntervalMonthDayNano>()
1754 }
1755 DataType::Duration(_) => DataTypeLayout::new_fixed_width::<i64>(),
1756 DataType::Decimal32(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
1757 DataType::Decimal64(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
1758 DataType::Decimal128(_, _) => DataTypeLayout::new_fixed_width::<i128>(),
1759 DataType::Decimal256(_, _) => DataTypeLayout::new_fixed_width::<i256>(),
1760 DataType::FixedSizeBinary(size) => {
1761 let spec = BufferSpec::FixedWidth {
1762 byte_width: (*size).try_into().unwrap(),
1763 alignment: mem::align_of::<u8>(),
1764 };
1765 DataTypeLayout {
1766 buffers: vec![spec],
1767 can_contain_null_mask: true,
1768 variadic: false,
1769 }
1770 }
1771 DataType::Binary => DataTypeLayout::new_binary::<i32>(),
1772 DataType::LargeBinary => DataTypeLayout::new_binary::<i64>(),
1773 DataType::Utf8 => DataTypeLayout::new_binary::<i32>(),
1774 DataType::LargeUtf8 => DataTypeLayout::new_binary::<i64>(),
1775 DataType::BinaryView | DataType::Utf8View => DataTypeLayout::new_view(),
1776 DataType::FixedSizeList(_, _) => DataTypeLayout::new_nullable_empty(), DataType::List(_) => DataTypeLayout::new_fixed_width::<i32>(),
1778 DataType::ListView(_) => DataTypeLayout::new_list_view::<i32>(),
1779 DataType::LargeListView(_) => DataTypeLayout::new_list_view::<i64>(),
1780 DataType::LargeList(_) => DataTypeLayout::new_fixed_width::<i64>(),
1781 DataType::Map(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
1782 DataType::Struct(_) => DataTypeLayout::new_nullable_empty(), DataType::RunEndEncoded(_, _) => DataTypeLayout::new_empty(), DataType::Union(_, mode) => {
1785 let type_ids = BufferSpec::FixedWidth {
1786 byte_width: mem::size_of::<i8>(),
1787 alignment: mem::align_of::<i8>(),
1788 };
1789
1790 DataTypeLayout {
1791 buffers: match mode {
1792 UnionMode::Sparse => {
1793 vec![type_ids]
1794 }
1795 UnionMode::Dense => {
1796 vec![
1797 type_ids,
1798 BufferSpec::FixedWidth {
1799 byte_width: mem::size_of::<i32>(),
1800 alignment: mem::align_of::<i32>(),
1801 },
1802 ]
1803 }
1804 },
1805 can_contain_null_mask: false,
1806 variadic: false,
1807 }
1808 }
1809 DataType::Dictionary(key_type, _value_type) => layout(key_type),
1810 }
1811}
1812
1813#[derive(Debug, PartialEq, Eq)]
1815pub struct DataTypeLayout {
1817 pub buffers: Vec<BufferSpec>,
1819
1820 pub can_contain_null_mask: bool,
1822
1823 pub variadic: bool,
1827}
1828
1829impl DataTypeLayout {
1830 pub fn new_fixed_width<T>() -> Self {
1832 Self {
1833 buffers: vec![BufferSpec::FixedWidth {
1834 byte_width: mem::size_of::<T>(),
1835 alignment: mem::align_of::<T>(),
1836 }],
1837 can_contain_null_mask: true,
1838 variadic: false,
1839 }
1840 }
1841
1842 pub fn new_nullable_empty() -> Self {
1845 Self {
1846 buffers: vec![],
1847 can_contain_null_mask: true,
1848 variadic: false,
1849 }
1850 }
1851
1852 pub fn new_empty() -> Self {
1855 Self {
1856 buffers: vec![],
1857 can_contain_null_mask: false,
1858 variadic: false,
1859 }
1860 }
1861
1862 pub fn new_binary<T>() -> Self {
1866 Self {
1867 buffers: vec![
1868 BufferSpec::FixedWidth {
1870 byte_width: mem::size_of::<T>(),
1871 alignment: mem::align_of::<T>(),
1872 },
1873 BufferSpec::VariableWidth,
1875 ],
1876 can_contain_null_mask: true,
1877 variadic: false,
1878 }
1879 }
1880
1881 pub fn new_view() -> Self {
1883 Self {
1884 buffers: vec![BufferSpec::FixedWidth {
1885 byte_width: mem::size_of::<u128>(),
1886 alignment: mem::align_of::<u128>(),
1887 }],
1888 can_contain_null_mask: true,
1889 variadic: true,
1890 }
1891 }
1892
1893 pub fn new_list_view<T>() -> Self {
1895 Self {
1896 buffers: vec![
1897 BufferSpec::FixedWidth {
1898 byte_width: mem::size_of::<T>(),
1899 alignment: mem::align_of::<T>(),
1900 },
1901 BufferSpec::FixedWidth {
1902 byte_width: mem::size_of::<T>(),
1903 alignment: mem::align_of::<T>(),
1904 },
1905 ],
1906 can_contain_null_mask: true,
1907 variadic: false,
1908 }
1909 }
1910}
1911
1912#[derive(Debug, PartialEq, Eq)]
1914pub enum BufferSpec {
1915 FixedWidth {
1926 byte_width: usize,
1928 alignment: usize,
1930 },
1931 VariableWidth,
1933 BitMap,
1939 #[allow(dead_code)]
1942 AlwaysNull,
1943}
1944
1945impl PartialEq for ArrayData {
1946 fn eq(&self, other: &Self) -> bool {
1947 equal::equal(self, other)
1948 }
1949}
1950
1951#[derive(Debug, Clone)]
1970#[doc(hidden)]
1971pub struct UnsafeFlag(bool);
1972
1973impl UnsafeFlag {
1974 #[inline]
1978 pub const fn new() -> Self {
1979 Self(false)
1980 }
1981
1982 #[inline]
1992 pub unsafe fn set(&mut self, val: bool) {
1993 self.0 = val;
1994 }
1995
1996 #[inline]
1998 pub fn get(&self) -> bool {
1999 self.0
2000 }
2001}
2002
2003impl Default for UnsafeFlag {
2005 fn default() -> Self {
2006 Self::new()
2007 }
2008}
2009
2010#[derive(Debug)]
2012pub struct ArrayDataBuilder {
2013 data_type: DataType,
2014 len: usize,
2015 null_count: Option<usize>,
2016 null_bit_buffer: Option<Buffer>,
2017 nulls: Option<NullBuffer>,
2018 offset: usize,
2019 buffers: Vec<Buffer>,
2020 child_data: Vec<ArrayData>,
2021 align_buffers: bool,
2025 skip_validation: UnsafeFlag,
2035}
2036
2037impl ArrayDataBuilder {
2038 #[inline]
2039 pub const fn new(data_type: DataType) -> Self {
2041 Self {
2042 data_type,
2043 len: 0,
2044 null_count: None,
2045 null_bit_buffer: None,
2046 nulls: None,
2047 offset: 0,
2048 buffers: vec![],
2049 child_data: vec![],
2050 align_buffers: false,
2051 skip_validation: UnsafeFlag::new(),
2052 }
2053 }
2054
2055 pub fn data_type(self, data_type: DataType) -> Self {
2057 Self { data_type, ..self }
2058 }
2059
2060 #[inline]
2061 #[allow(clippy::len_without_is_empty)]
2062 pub const fn len(mut self, n: usize) -> Self {
2064 self.len = n;
2065 self
2066 }
2067
2068 pub fn nulls(mut self, nulls: Option<NullBuffer>) -> Self {
2070 self.nulls = nulls;
2071 self.null_count = None;
2072 self.null_bit_buffer = None;
2073 self
2074 }
2075
2076 pub fn null_count(mut self, null_count: usize) -> Self {
2078 self.null_count = Some(null_count);
2079 self
2080 }
2081
2082 pub fn null_bit_buffer(mut self, buf: Option<Buffer>) -> Self {
2084 self.nulls = None;
2085 self.null_bit_buffer = buf;
2086 self
2087 }
2088
2089 #[inline]
2091 pub const fn offset(mut self, n: usize) -> Self {
2092 self.offset = n;
2093 self
2094 }
2095
2096 pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
2098 self.buffers = v;
2099 self
2100 }
2101
2102 pub fn add_buffer(mut self, b: Buffer) -> Self {
2104 self.buffers.push(b);
2105 self
2106 }
2107
2108 pub fn add_buffers<I: IntoIterator<Item = Buffer>>(mut self, bs: I) -> Self {
2110 self.buffers.extend(bs);
2111 self
2112 }
2113
2114 pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
2116 self.child_data = v;
2117 self
2118 }
2119
2120 pub fn add_child_data(mut self, r: ArrayData) -> Self {
2122 self.child_data.push(r);
2123 self
2124 }
2125
2126 pub unsafe fn build_unchecked(self) -> ArrayData {
2142 unsafe { self.skip_validation(true) }.build().unwrap()
2143 }
2144
2145 pub fn build(self) -> Result<ArrayData, ArrowError> {
2154 let Self {
2155 data_type,
2156 len,
2157 null_count,
2158 null_bit_buffer,
2159 nulls,
2160 offset,
2161 buffers,
2162 child_data,
2163 align_buffers,
2164 skip_validation,
2165 } = self;
2166
2167 let nulls = nulls
2168 .or_else(|| {
2169 let buffer = null_bit_buffer?;
2170 let buffer = BooleanBuffer::new(buffer, offset, len);
2171 Some(match null_count {
2172 Some(n) => {
2173 unsafe { NullBuffer::new_unchecked(buffer, n) }
2175 }
2176 None => NullBuffer::new(buffer),
2177 })
2178 })
2179 .filter(|b| b.null_count() != 0);
2180
2181 let mut data = ArrayData {
2182 data_type,
2183 len,
2184 offset,
2185 buffers,
2186 child_data,
2187 nulls,
2188 };
2189
2190 if align_buffers {
2191 data.align_buffers();
2192 }
2193
2194 if !skip_validation.get() || cfg!(feature = "force_validate") {
2196 data.validate_data()?;
2197 }
2198 Ok(data)
2199 }
2200
2201 #[deprecated(since = "54.1.0", note = "Use ArrayData::align_buffers instead")]
2203 pub fn build_aligned(self) -> Result<ArrayData, ArrowError> {
2204 self.align_buffers(true).build()
2205 }
2206
2207 pub fn align_buffers(mut self, align_buffers: bool) -> Self {
2223 self.align_buffers = align_buffers;
2224 self
2225 }
2226
2227 pub unsafe fn skip_validation(mut self, skip_validation: bool) -> Self {
2241 unsafe {
2242 self.skip_validation.set(skip_validation);
2243 }
2244 self
2245 }
2246}
2247
2248impl From<ArrayData> for ArrayDataBuilder {
2249 fn from(d: ArrayData) -> Self {
2250 Self {
2251 data_type: d.data_type,
2252 len: d.len,
2253 offset: d.offset,
2254 buffers: d.buffers,
2255 child_data: d.child_data,
2256 nulls: d.nulls,
2257 null_bit_buffer: None,
2258 null_count: None,
2259 align_buffers: false,
2260 skip_validation: UnsafeFlag::new(),
2261 }
2262 }
2263}
2264
2265#[cfg(test)]
2266mod tests {
2267 use super::*;
2268 use arrow_schema::{Field, Fields};
2269
2270 fn make_i32_buffer(n: usize) -> Buffer {
2274 Buffer::from_slice_ref(vec![42i32; n])
2275 }
2276
2277 fn make_f32_buffer(n: usize) -> Buffer {
2279 Buffer::from_slice_ref(vec![42f32; n])
2280 }
2281
2282 #[test]
2283 fn test_builder() {
2284 let v = (0..25).collect::<Vec<i32>>();
2286 let b1 = Buffer::from_slice_ref(&v);
2287 let arr_data = ArrayData::builder(DataType::Int32)
2288 .len(20)
2289 .offset(5)
2290 .add_buffer(b1)
2291 .null_bit_buffer(Some(Buffer::from([
2292 0b01011111, 0b10110101, 0b01100011, 0b00011110,
2293 ])))
2294 .build()
2295 .unwrap();
2296
2297 assert_eq!(20, arr_data.len());
2298 assert_eq!(10, arr_data.null_count());
2299 assert_eq!(5, arr_data.offset());
2300 assert_eq!(1, arr_data.buffers().len());
2301 assert_eq!(
2302 Buffer::from_slice_ref(&v).as_slice(),
2303 arr_data.buffers()[0].as_slice()
2304 );
2305 }
2306
2307 #[test]
2308 fn test_builder_with_child_data() {
2309 let child_arr_data = ArrayData::try_new(
2310 DataType::Int32,
2311 5,
2312 None,
2313 0,
2314 vec![Buffer::from_slice_ref([1i32, 2, 3, 4, 5])],
2315 vec![],
2316 )
2317 .unwrap();
2318
2319 let field = Arc::new(Field::new("x", DataType::Int32, true));
2320 let data_type = DataType::Struct(vec![field].into());
2321
2322 let arr_data = ArrayData::builder(data_type)
2323 .len(5)
2324 .offset(0)
2325 .add_child_data(child_arr_data.clone())
2326 .build()
2327 .unwrap();
2328
2329 assert_eq!(5, arr_data.len());
2330 assert_eq!(1, arr_data.child_data().len());
2331 assert_eq!(child_arr_data, arr_data.child_data()[0]);
2332 }
2333
2334 #[test]
2335 fn test_null_count() {
2336 let mut bit_v: [u8; 2] = [0; 2];
2337 bit_util::set_bit(&mut bit_v, 0);
2338 bit_util::set_bit(&mut bit_v, 3);
2339 bit_util::set_bit(&mut bit_v, 10);
2340 let arr_data = ArrayData::builder(DataType::Int32)
2341 .len(16)
2342 .add_buffer(make_i32_buffer(16))
2343 .null_bit_buffer(Some(Buffer::from(bit_v)))
2344 .build()
2345 .unwrap();
2346 assert_eq!(13, arr_data.null_count());
2347
2348 let mut bit_v: [u8; 2] = [0; 2];
2350 bit_util::set_bit(&mut bit_v, 0);
2351 bit_util::set_bit(&mut bit_v, 3);
2352 bit_util::set_bit(&mut bit_v, 10);
2353 let arr_data = ArrayData::builder(DataType::Int32)
2354 .len(12)
2355 .offset(2)
2356 .add_buffer(make_i32_buffer(14)) .null_bit_buffer(Some(Buffer::from(bit_v)))
2358 .build()
2359 .unwrap();
2360 assert_eq!(10, arr_data.null_count());
2361 }
2362
2363 #[test]
2364 fn test_null_buffer_ref() {
2365 let mut bit_v: [u8; 2] = [0; 2];
2366 bit_util::set_bit(&mut bit_v, 0);
2367 bit_util::set_bit(&mut bit_v, 3);
2368 bit_util::set_bit(&mut bit_v, 10);
2369 let arr_data = ArrayData::builder(DataType::Int32)
2370 .len(16)
2371 .add_buffer(make_i32_buffer(16))
2372 .null_bit_buffer(Some(Buffer::from(bit_v)))
2373 .build()
2374 .unwrap();
2375 assert!(arr_data.nulls().is_some());
2376 assert_eq!(&bit_v, arr_data.nulls().unwrap().validity());
2377 }
2378
2379 #[test]
2380 fn test_slice() {
2381 let mut bit_v: [u8; 2] = [0; 2];
2382 bit_util::set_bit(&mut bit_v, 0);
2383 bit_util::set_bit(&mut bit_v, 3);
2384 bit_util::set_bit(&mut bit_v, 10);
2385 let data = ArrayData::builder(DataType::Int32)
2386 .len(16)
2387 .add_buffer(make_i32_buffer(16))
2388 .null_bit_buffer(Some(Buffer::from(bit_v)))
2389 .build()
2390 .unwrap();
2391 let new_data = data.slice(1, 15);
2392 assert_eq!(data.len() - 1, new_data.len());
2393 assert_eq!(1, new_data.offset());
2394 assert_eq!(data.null_count(), new_data.null_count());
2395
2396 let new_data = new_data.slice(1, 14);
2398 assert_eq!(data.len() - 2, new_data.len());
2399 assert_eq!(2, new_data.offset());
2400 assert_eq!(data.null_count() - 1, new_data.null_count());
2401 }
2402
2403 #[test]
2404 #[should_panic(expected = "offset + length overflow")]
2405 fn test_slice_panics_on_offset_length_overflow() {
2406 let data = ArrayData::builder(DataType::Int32)
2407 .len(4)
2408 .add_buffer(make_i32_buffer(4))
2409 .build()
2410 .unwrap();
2411 let sliced = data.slice(1, 3);
2412
2413 sliced.slice(1, usize::MAX);
2414 }
2415
2416 #[test]
2417 fn test_typed_offsets_length_overflow() {
2418 let data = ArrayData {
2419 data_type: DataType::Binary,
2420 len: usize::MAX,
2421 offset: 0,
2422 buffers: vec![Buffer::from_slice_ref([0_i32])],
2423 child_data: vec![],
2424 nulls: None,
2425 };
2426 let err = data.typed_offsets::<i32>().unwrap_err();
2427
2428 assert_eq!(
2429 err.to_string(),
2430 format!(
2431 "Invalid argument error: Length {} with offset 1 overflows usize for Binary",
2432 usize::MAX
2433 )
2434 );
2435 }
2436
2437 #[test]
2438 fn test_validate_typed_buffer_length_overflow() {
2439 let data = ArrayData {
2440 data_type: DataType::Binary,
2441 len: 0,
2442 offset: 2,
2443 buffers: vec![Buffer::from_slice_ref([0_i32])],
2444 child_data: vec![],
2445 nulls: None,
2446 };
2447 let err = data.typed_buffer::<i32>(0, usize::MAX).unwrap_err();
2448
2449 assert_eq!(
2450 err.to_string(),
2451 format!(
2452 "Invalid argument error: Length {} with offset 2 overflows usize for Binary",
2453 usize::MAX
2454 )
2455 );
2456 }
2457
2458 fn try_new_binary_length_offset_overflow() -> Result<ArrayData, ArrowError> {
2460 ArrayData::try_new(
2461 DataType::Binary,
2462 usize::MAX,
2463 None,
2464 1,
2465 vec![
2466 Buffer::from_slice_ref([0_i32]),
2467 Buffer::from_iter(std::iter::empty::<u8>()),
2468 ],
2469 vec![],
2470 )
2471 }
2472
2473 #[cfg(not(feature = "force_validate"))]
2474 #[test]
2475 fn test_try_new_length_offset_overflow() {
2476 let err = try_new_binary_length_offset_overflow().unwrap_err();
2477
2478 assert_eq!(
2479 err.to_string(),
2480 format!(
2481 "Invalid argument error: Length {} with offset 1 overflows usize for Binary",
2482 usize::MAX
2483 )
2484 );
2485 }
2486
2487 #[cfg(feature = "force_validate")]
2488 #[test]
2489 #[should_panic(
2490 expected = "Length 18446744073709551615 with offset 1 overflows usize for Binary"
2491 )]
2492 fn test_try_new_length_offset_overflow_force_validate() {
2493 try_new_binary_length_offset_overflow().unwrap();
2494 }
2495
2496 #[test]
2497 fn test_equality() {
2498 let int_data = ArrayData::builder(DataType::Int32)
2499 .len(1)
2500 .add_buffer(make_i32_buffer(1))
2501 .build()
2502 .unwrap();
2503
2504 let float_data = ArrayData::builder(DataType::Float32)
2505 .len(1)
2506 .add_buffer(make_f32_buffer(1))
2507 .build()
2508 .unwrap();
2509 assert_ne!(int_data, float_data);
2510 assert!(!int_data.ptr_eq(&float_data));
2511 assert!(int_data.ptr_eq(&int_data));
2512
2513 #[allow(clippy::redundant_clone)]
2514 let int_data_clone = int_data.clone();
2515 assert_eq!(int_data, int_data_clone);
2516 assert!(int_data.ptr_eq(&int_data_clone));
2517 assert!(int_data_clone.ptr_eq(&int_data));
2518
2519 let int_data_slice = int_data_clone.slice(1, 0);
2520 assert!(int_data_slice.ptr_eq(&int_data_slice));
2521 assert!(!int_data.ptr_eq(&int_data_slice));
2522 assert!(!int_data_slice.ptr_eq(&int_data));
2523
2524 let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
2525 let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
2526 let string_data = ArrayData::try_new(
2527 DataType::Utf8,
2528 3,
2529 Some(Buffer::from_iter(vec![true, false, true])),
2530 0,
2531 vec![offsets_buffer, data_buffer],
2532 vec![],
2533 )
2534 .unwrap();
2535
2536 assert_ne!(float_data, string_data);
2537 assert!(!float_data.ptr_eq(&string_data));
2538
2539 assert!(string_data.ptr_eq(&string_data));
2540
2541 #[allow(clippy::redundant_clone)]
2542 let string_data_cloned = string_data.clone();
2543 assert!(string_data_cloned.ptr_eq(&string_data));
2544 assert!(string_data.ptr_eq(&string_data_cloned));
2545
2546 let string_data_slice = string_data.slice(1, 2);
2547 assert!(string_data_slice.ptr_eq(&string_data_slice));
2548 assert!(!string_data_slice.ptr_eq(&string_data))
2549 }
2550
2551 #[test]
2552 fn test_slice_memory_size() {
2553 let mut bit_v: [u8; 2] = [0; 2];
2554 bit_util::set_bit(&mut bit_v, 0);
2555 bit_util::set_bit(&mut bit_v, 3);
2556 bit_util::set_bit(&mut bit_v, 10);
2557 let data = ArrayData::builder(DataType::Int32)
2558 .len(16)
2559 .add_buffer(make_i32_buffer(16))
2560 .null_bit_buffer(Some(Buffer::from(bit_v)))
2561 .build()
2562 .unwrap();
2563 let new_data = data.slice(1, 14);
2564 assert_eq!(
2565 data.get_slice_memory_size().unwrap() - 8,
2566 new_data.get_slice_memory_size().unwrap()
2567 );
2568 let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
2569 let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
2570 let string_data = ArrayData::try_new(
2571 DataType::Utf8,
2572 3,
2573 Some(Buffer::from_iter(vec![true, false, true])),
2574 0,
2575 vec![offsets_buffer, data_buffer],
2576 vec![],
2577 )
2578 .unwrap();
2579 let string_data_slice = string_data.slice(1, 2);
2580 assert_eq!(
2582 string_data.get_slice_memory_size().unwrap() - 6,
2583 string_data_slice.get_slice_memory_size().unwrap()
2584 );
2585 }
2586
2587 #[test]
2588 fn test_count_nulls() {
2589 let buffer = Buffer::from([0b00010110, 0b10011111]);
2590 let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 16));
2591 let count = count_nulls(Some(&buffer), 0, 16);
2592 assert_eq!(count, 7);
2593
2594 let count = count_nulls(Some(&buffer), 4, 8);
2595 assert_eq!(count, 3);
2596 }
2597
2598 #[test]
2599 fn test_contains_nulls() {
2600 let buffer: Buffer =
2601 MutableBuffer::from_iter([false, false, false, true, true, false]).into();
2602 let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 6));
2603 assert!(contains_nulls(Some(&buffer), 0, 6));
2604 assert!(contains_nulls(Some(&buffer), 0, 3));
2605 assert!(!contains_nulls(Some(&buffer), 3, 2));
2606 assert!(!contains_nulls(Some(&buffer), 0, 0));
2607 }
2608
2609 #[test]
2610 fn test_alignment() {
2611 let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
2612 let sliced = buffer.slice(1);
2613
2614 let mut data = ArrayData {
2615 data_type: DataType::Int32,
2616 len: 0,
2617 offset: 0,
2618 buffers: vec![buffer],
2619 child_data: vec![],
2620 nulls: None,
2621 };
2622 data.validate_full().unwrap();
2623
2624 data.buffers[0] = sliced;
2626 let err = data.validate().unwrap_err();
2627
2628 assert_eq!(
2629 err.to_string(),
2630 "Invalid argument error: Misaligned buffers[0] in array of type Int32, offset from expected alignment of 4 by 1"
2631 );
2632
2633 data.align_buffers();
2634 data.validate_full().unwrap();
2635 }
2636
2637 #[test]
2638 fn test_alignment_struct() {
2639 let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
2640 let sliced = buffer.slice(1);
2641
2642 let child_data = ArrayData {
2643 data_type: DataType::Int32,
2644 len: 0,
2645 offset: 0,
2646 buffers: vec![buffer],
2647 child_data: vec![],
2648 nulls: None,
2649 };
2650
2651 let schema = DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)]));
2652 let mut data = ArrayData {
2653 data_type: schema,
2654 len: 0,
2655 offset: 0,
2656 buffers: vec![],
2657 child_data: vec![child_data],
2658 nulls: None,
2659 };
2660 data.validate_full().unwrap();
2661
2662 data.child_data[0].buffers[0] = sliced;
2664 let err = data.validate().unwrap_err();
2665
2666 assert_eq!(
2667 err.to_string(),
2668 "Invalid argument error: Misaligned buffers[0] in array of type Int32, offset from expected alignment of 4 by 1"
2669 );
2670
2671 data.align_buffers();
2672 data.validate_full().unwrap();
2673 }
2674
2675 #[test]
2676 fn test_null_view_types() {
2677 let array_len = 32;
2678 let array = ArrayData::new_null(&DataType::BinaryView, array_len);
2679 assert_eq!(array.len(), array_len);
2680 for i in 0..array.len() {
2681 assert!(array.is_null(i));
2682 }
2683
2684 let array = ArrayData::new_null(&DataType::Utf8View, array_len);
2685 assert_eq!(array.len(), array_len);
2686 for i in 0..array.len() {
2687 assert!(array.is_null(i));
2688 }
2689
2690 let array = ArrayData::new_null(
2691 &DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
2692 array_len,
2693 );
2694 assert_eq!(array.len(), array_len);
2695 for i in 0..array.len() {
2696 assert!(array.is_null(i));
2697 }
2698
2699 let array = ArrayData::new_null(
2700 &DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
2701 array_len,
2702 );
2703 assert_eq!(array.len(), array_len);
2704 for i in 0..array.len() {
2705 assert!(array.is_null(i));
2706 }
2707 }
2708}