1#![allow(clippy::enum_clike_unportable_variant)]
18
19use crate::{make_array, Array, ArrayRef};
20use arrow_buffer::bit_chunk_iterator::{BitChunkIterator, BitChunks};
21use arrow_buffer::buffer::NullBuffer;
22use arrow_buffer::{BooleanBuffer, MutableBuffer, ScalarBuffer};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::{ArrowError, DataType, UnionFields, UnionMode};
25use std::any::Any;
28use std::collections::HashSet;
29use std::sync::Arc;
30
31#[derive(Clone)]
123pub struct UnionArray {
124 data_type: DataType,
125 type_ids: ScalarBuffer<i8>,
126 offsets: Option<ScalarBuffer<i32>>,
127 fields: Vec<Option<ArrayRef>>,
128}
129
130impl UnionArray {
131 pub unsafe fn new_unchecked(
150 fields: UnionFields,
151 type_ids: ScalarBuffer<i8>,
152 offsets: Option<ScalarBuffer<i32>>,
153 children: Vec<ArrayRef>,
154 ) -> Self {
155 let mode = if offsets.is_some() {
156 UnionMode::Dense
157 } else {
158 UnionMode::Sparse
159 };
160
161 let len = type_ids.len();
162 let builder = ArrayData::builder(DataType::Union(fields, mode))
163 .add_buffer(type_ids.into_inner())
164 .child_data(children.into_iter().map(Array::into_data).collect())
165 .len(len);
166
167 let data = match offsets {
168 Some(offsets) => builder.add_buffer(offsets.into_inner()).build_unchecked(),
169 None => builder.build_unchecked(),
170 };
171 Self::from(data)
172 }
173
174 pub fn try_new(
178 fields: UnionFields,
179 type_ids: ScalarBuffer<i8>,
180 offsets: Option<ScalarBuffer<i32>>,
181 children: Vec<ArrayRef>,
182 ) -> Result<Self, ArrowError> {
183 if fields.len() != children.len() {
185 return Err(ArrowError::InvalidArgumentError(
186 "Union fields length must match child arrays length".to_string(),
187 ));
188 }
189
190 if let Some(offsets) = &offsets {
191 if offsets.len() != type_ids.len() {
193 return Err(ArrowError::InvalidArgumentError(
194 "Type Ids and Offsets lengths must match".to_string(),
195 ));
196 }
197 } else {
198 for child in &children {
200 if child.len() != type_ids.len() {
201 return Err(ArrowError::InvalidArgumentError(
202 "Sparse union child arrays must be equal in length to the length of the union".to_string(),
203 ));
204 }
205 }
206 }
207
208 let max_id = fields.iter().map(|(i, _)| i).max().unwrap_or_default() as usize;
210 let mut array_lens = vec![i32::MIN; max_id + 1];
211 for (cd, (field_id, _)) in children.iter().zip(fields.iter()) {
212 array_lens[field_id as usize] = cd.len() as i32;
213 }
214
215 for id in &type_ids {
217 match array_lens.get(*id as usize) {
218 Some(x) if *x != i32::MIN => {}
219 _ => {
220 return Err(ArrowError::InvalidArgumentError(
221 "Type Ids values must match one of the field type ids".to_owned(),
222 ))
223 }
224 }
225 }
226
227 if let Some(offsets) = &offsets {
229 let mut iter = type_ids.iter().zip(offsets.iter());
230 if iter.any(|(type_id, &offset)| offset < 0 || offset >= array_lens[*type_id as usize])
231 {
232 return Err(ArrowError::InvalidArgumentError(
233 "Offsets must be positive and within the length of the Array".to_owned(),
234 ));
235 }
236 }
237
238 let union_array = unsafe { Self::new_unchecked(fields, type_ids, offsets, children) };
241 Ok(union_array)
242 }
243
244 pub fn child(&self, type_id: i8) -> &ArrayRef {
251 assert!((type_id as usize) < self.fields.len());
252 let boxed = &self.fields[type_id as usize];
253 boxed.as_ref().expect("invalid type id")
254 }
255
256 pub fn type_id(&self, index: usize) -> i8 {
262 assert!(index < self.type_ids.len());
263 self.type_ids[index]
264 }
265
266 pub fn type_ids(&self) -> &ScalarBuffer<i8> {
268 &self.type_ids
269 }
270
271 pub fn offsets(&self) -> Option<&ScalarBuffer<i32>> {
273 self.offsets.as_ref()
274 }
275
276 pub fn value_offset(&self, index: usize) -> usize {
282 assert!(index < self.len());
283 match &self.offsets {
284 Some(offsets) => offsets[index] as usize,
285 None => self.offset() + index,
286 }
287 }
288
289 pub fn value(&self, i: usize) -> ArrayRef {
293 let type_id = self.type_id(i);
294 let value_offset = self.value_offset(i);
295 let child = self.child(type_id);
296 child.slice(value_offset, 1)
297 }
298
299 pub fn type_names(&self) -> Vec<&str> {
301 match self.data_type() {
302 DataType::Union(fields, _) => fields
303 .iter()
304 .map(|(_, f)| f.name().as_str())
305 .collect::<Vec<&str>>(),
306 _ => unreachable!("Union array's data type is not a union!"),
307 }
308 }
309
310 fn is_dense(&self) -> bool {
312 match self.data_type() {
313 DataType::Union(_, mode) => mode == &UnionMode::Dense,
314 _ => unreachable!("Union array's data type is not a union!"),
315 }
316 }
317
318 pub fn slice(&self, offset: usize, length: usize) -> Self {
320 let (offsets, fields) = match self.offsets.as_ref() {
321 Some(offsets) => (Some(offsets.slice(offset, length)), self.fields.clone()),
323 None => {
325 let fields = self
326 .fields
327 .iter()
328 .map(|x| x.as_ref().map(|x| x.slice(offset, length)))
329 .collect();
330 (None, fields)
331 }
332 };
333
334 Self {
335 data_type: self.data_type.clone(),
336 type_ids: self.type_ids.slice(offset, length),
337 offsets,
338 fields,
339 }
340 }
341
342 #[allow(clippy::type_complexity)]
370 pub fn into_parts(
371 self,
372 ) -> (
373 UnionFields,
374 ScalarBuffer<i8>,
375 Option<ScalarBuffer<i32>>,
376 Vec<ArrayRef>,
377 ) {
378 let Self {
379 data_type,
380 type_ids,
381 offsets,
382 mut fields,
383 } = self;
384 match data_type {
385 DataType::Union(union_fields, _) => {
386 let children = union_fields
387 .iter()
388 .map(|(type_id, _)| fields[type_id as usize].take().unwrap())
389 .collect();
390 (union_fields, type_ids, offsets, children)
391 }
392 _ => unreachable!(),
393 }
394 }
395
396 fn mask_sparse_skip_without_nulls(&self, nulls: Vec<(i8, NullBuffer)>) -> BooleanBuffer {
398 let fold = |(with_nulls_selected, union_nulls), (is_field, field_nulls)| {
404 (
405 with_nulls_selected | is_field,
406 union_nulls | (is_field & field_nulls),
407 )
408 };
409
410 self.mask_sparse_helper(
411 nulls,
412 |type_ids_chunk_array, nulls_masks_iters| {
413 let (with_nulls_selected, union_nulls) = nulls_masks_iters
414 .iter_mut()
415 .map(|(field_type_id, field_nulls)| {
416 let field_nulls = field_nulls.next().unwrap();
417 let is_field = selection_mask(type_ids_chunk_array, *field_type_id);
418
419 (is_field, field_nulls)
420 })
421 .fold((0, 0), fold);
422
423 let without_nulls_selected = !with_nulls_selected;
425
426 without_nulls_selected | union_nulls
429 },
430 |type_ids_remainder, bit_chunks| {
431 let (with_nulls_selected, union_nulls) = bit_chunks
432 .iter()
433 .map(|(field_type_id, field_bit_chunks)| {
434 let field_nulls = field_bit_chunks.remainder_bits();
435 let is_field = selection_mask(type_ids_remainder, *field_type_id);
436
437 (is_field, field_nulls)
438 })
439 .fold((0, 0), fold);
440
441 let without_nulls_selected = !with_nulls_selected;
442
443 without_nulls_selected | union_nulls
444 },
445 )
446 }
447
448 fn mask_sparse_skip_fully_null(&self, mut nulls: Vec<(i8, NullBuffer)>) -> BooleanBuffer {
450 let fields = match self.data_type() {
451 DataType::Union(fields, _) => fields,
452 _ => unreachable!("Union array's data type is not a union!"),
453 };
454
455 let type_ids = fields.iter().map(|(id, _)| id).collect::<HashSet<_>>();
456 let with_nulls = nulls.iter().map(|(id, _)| *id).collect::<HashSet<_>>();
457
458 let without_nulls_ids = type_ids
459 .difference(&with_nulls)
460 .copied()
461 .collect::<Vec<_>>();
462
463 nulls.retain(|(_, nulls)| nulls.null_count() < nulls.len());
464
465 self.mask_sparse_helper(
470 nulls,
471 |type_ids_chunk_array, nulls_masks_iters| {
472 let union_nulls = nulls_masks_iters.iter_mut().fold(
473 0,
474 |union_nulls, (field_type_id, nulls_iter)| {
475 let field_nulls = nulls_iter.next().unwrap();
476
477 if field_nulls == 0 {
478 union_nulls
479 } else {
480 let is_field = selection_mask(type_ids_chunk_array, *field_type_id);
481
482 union_nulls | (is_field & field_nulls)
483 }
484 },
485 );
486
487 let without_nulls_selected =
489 without_nulls_selected(type_ids_chunk_array, &without_nulls_ids);
490
491 union_nulls | without_nulls_selected
494 },
495 |type_ids_remainder, bit_chunks| {
496 let union_nulls =
497 bit_chunks
498 .iter()
499 .fold(0, |union_nulls, (field_type_id, field_bit_chunks)| {
500 let is_field = selection_mask(type_ids_remainder, *field_type_id);
501 let field_nulls = field_bit_chunks.remainder_bits();
502
503 union_nulls | is_field & field_nulls
504 });
505
506 union_nulls | without_nulls_selected(type_ids_remainder, &without_nulls_ids)
507 },
508 )
509 }
510
511 fn mask_sparse_all_with_nulls_skip_one(&self, nulls: Vec<(i8, NullBuffer)>) -> BooleanBuffer {
513 self.mask_sparse_helper(
520 nulls,
521 |type_ids_chunk_array, nulls_masks_iters| {
522 let (is_not_first, union_nulls) = nulls_masks_iters[1..] .iter_mut()
524 .fold(
525 (0, 0),
526 |(is_not_first, union_nulls), (field_type_id, nulls_iter)| {
527 let field_nulls = nulls_iter.next().unwrap();
528 let is_field = selection_mask(type_ids_chunk_array, *field_type_id);
529
530 (
531 is_not_first | is_field,
532 union_nulls | (is_field & field_nulls),
533 )
534 },
535 );
536
537 let is_first = !is_not_first;
538 let first_nulls = nulls_masks_iters[0].1.next().unwrap();
539
540 (is_first & first_nulls) | union_nulls
541 },
542 |type_ids_remainder, bit_chunks| {
543 bit_chunks
544 .iter()
545 .fold(0, |union_nulls, (field_type_id, field_bit_chunks)| {
546 let field_nulls = field_bit_chunks.remainder_bits();
547 let is_field = selection_mask(type_ids_remainder, *field_type_id);
550
551 union_nulls | (is_field & field_nulls)
552 })
553 },
554 )
555 }
556
557 fn mask_sparse_helper(
560 &self,
561 nulls: Vec<(i8, NullBuffer)>,
562 mut mask_chunk: impl FnMut(&[i8; 64], &mut [(i8, BitChunkIterator)]) -> u64,
563 mask_remainder: impl FnOnce(&[i8], &[(i8, BitChunks)]) -> u64,
564 ) -> BooleanBuffer {
565 let bit_chunks = nulls
566 .iter()
567 .map(|(type_id, nulls)| (*type_id, nulls.inner().bit_chunks()))
568 .collect::<Vec<_>>();
569
570 let mut nulls_masks_iter = bit_chunks
571 .iter()
572 .map(|(type_id, bit_chunks)| (*type_id, bit_chunks.iter()))
573 .collect::<Vec<_>>();
574
575 let chunks_exact = self.type_ids.chunks_exact(64);
576 let remainder = chunks_exact.remainder();
577
578 let chunks = chunks_exact.map(|type_ids_chunk| {
579 let type_ids_chunk_array = <&[i8; 64]>::try_from(type_ids_chunk).unwrap();
580
581 mask_chunk(type_ids_chunk_array, &mut nulls_masks_iter)
582 });
583
584 let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
587
588 if !remainder.is_empty() {
589 buffer.push(mask_remainder(remainder, &bit_chunks));
590 }
591
592 BooleanBuffer::new(buffer.into(), 0, self.type_ids.len())
593 }
594
595 fn gather_nulls(&self, nulls: Vec<(i8, NullBuffer)>) -> BooleanBuffer {
597 let one_null = NullBuffer::new_null(1);
598 let one_valid = NullBuffer::new_valid(1);
599
600 let mut logical_nulls_array = [(&one_valid, Mask::Zero); 256];
607
608 for (type_id, nulls) in &nulls {
609 if nulls.null_count() == nulls.len() {
610 logical_nulls_array[*type_id as u8 as usize] = (&one_null, Mask::Zero);
612 } else {
613 logical_nulls_array[*type_id as u8 as usize] = (nulls, Mask::Max);
614 }
615 }
616
617 match &self.offsets {
618 Some(offsets) => {
619 assert_eq!(self.type_ids.len(), offsets.len());
620
621 BooleanBuffer::collect_bool(self.type_ids.len(), |i| unsafe {
622 let type_id = *self.type_ids.get_unchecked(i);
624 let offset = *offsets.get_unchecked(i);
626
627 let (nulls, offset_mask) = &logical_nulls_array[type_id as u8 as usize];
628
629 nulls
635 .inner()
636 .value_unchecked(offset as usize & *offset_mask as usize)
637 })
638 }
639 None => {
640 BooleanBuffer::collect_bool(self.type_ids.len(), |index| unsafe {
641 let type_id = *self.type_ids.get_unchecked(index);
643
644 let (nulls, index_mask) = &logical_nulls_array[type_id as u8 as usize];
645
646 nulls.inner().value_unchecked(index & *index_mask as usize)
652 })
653 }
654 }
655 }
656}
657
658impl From<ArrayData> for UnionArray {
659 fn from(data: ArrayData) -> Self {
660 let (fields, mode) = match data.data_type() {
661 DataType::Union(fields, mode) => (fields, *mode),
662 d => panic!("UnionArray expected ArrayData with type Union got {d}"),
663 };
664 let (type_ids, offsets) = match mode {
665 UnionMode::Sparse => (
666 ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()),
667 None,
668 ),
669 UnionMode::Dense => (
670 ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()),
671 Some(ScalarBuffer::new(
672 data.buffers()[1].clone(),
673 data.offset(),
674 data.len(),
675 )),
676 ),
677 };
678
679 let max_id = fields.iter().map(|(i, _)| i).max().unwrap_or_default() as usize;
680 let mut boxed_fields = vec![None; max_id + 1];
681 for (cd, (field_id, _)) in data.child_data().iter().zip(fields.iter()) {
682 boxed_fields[field_id as usize] = Some(make_array(cd.clone()));
683 }
684 Self {
685 data_type: data.data_type().clone(),
686 type_ids,
687 offsets,
688 fields: boxed_fields,
689 }
690 }
691}
692
693impl From<UnionArray> for ArrayData {
694 fn from(array: UnionArray) -> Self {
695 let len = array.len();
696 let f = match &array.data_type {
697 DataType::Union(f, _) => f,
698 _ => unreachable!(),
699 };
700 let buffers = match array.offsets {
701 Some(o) => vec![array.type_ids.into_inner(), o.into_inner()],
702 None => vec![array.type_ids.into_inner()],
703 };
704
705 let child = f
706 .iter()
707 .map(|(i, _)| array.fields[i as usize].as_ref().unwrap().to_data())
708 .collect();
709
710 let builder = ArrayDataBuilder::new(array.data_type)
711 .len(len)
712 .buffers(buffers)
713 .child_data(child);
714 unsafe { builder.build_unchecked() }
715 }
716}
717
718impl Array for UnionArray {
719 fn as_any(&self) -> &dyn Any {
720 self
721 }
722
723 fn to_data(&self) -> ArrayData {
724 self.clone().into()
725 }
726
727 fn into_data(self) -> ArrayData {
728 self.into()
729 }
730
731 fn data_type(&self) -> &DataType {
732 &self.data_type
733 }
734
735 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
736 Arc::new(self.slice(offset, length))
737 }
738
739 fn len(&self) -> usize {
740 self.type_ids.len()
741 }
742
743 fn is_empty(&self) -> bool {
744 self.type_ids.is_empty()
745 }
746
747 fn shrink_to_fit(&mut self) {
748 self.type_ids.shrink_to_fit();
749 if let Some(offsets) = &mut self.offsets {
750 offsets.shrink_to_fit();
751 }
752 for array in self.fields.iter_mut().flatten() {
753 array.shrink_to_fit();
754 }
755 self.fields.shrink_to_fit();
756 }
757
758 fn offset(&self) -> usize {
759 0
760 }
761
762 fn nulls(&self) -> Option<&NullBuffer> {
763 None
764 }
765
766 fn logical_nulls(&self) -> Option<NullBuffer> {
767 let fields = match self.data_type() {
768 DataType::Union(fields, _) => fields,
769 _ => unreachable!(),
770 };
771
772 if fields.len() <= 1 {
773 return self
774 .fields
775 .iter()
776 .flatten()
777 .map(Array::logical_nulls)
778 .next()
779 .flatten();
780 }
781
782 let logical_nulls = fields
783 .iter()
784 .filter_map(|(type_id, _)| Some((type_id, self.child(type_id).logical_nulls()?)))
785 .filter(|(_, nulls)| nulls.null_count() > 0)
786 .collect::<Vec<_>>();
787
788 if logical_nulls.is_empty() {
789 return None;
790 }
791
792 let fully_null_count = logical_nulls
793 .iter()
794 .filter(|(_, nulls)| nulls.null_count() == nulls.len())
795 .count();
796
797 if fully_null_count == fields.len() {
798 if let Some((_, exactly_sized)) = logical_nulls
799 .iter()
800 .find(|(_, nulls)| nulls.len() == self.len())
801 {
802 return Some(exactly_sized.clone());
803 }
804
805 if let Some((_, bigger)) = logical_nulls
806 .iter()
807 .find(|(_, nulls)| nulls.len() > self.len())
808 {
809 return Some(bigger.slice(0, self.len()));
810 }
811
812 return Some(NullBuffer::new_null(self.len()));
813 }
814
815 let boolean_buffer = match &self.offsets {
816 Some(_) => self.gather_nulls(logical_nulls),
817 None => {
818 let gather_relative_cost = if cfg!(target_feature = "avx2") {
826 10
827 } else if cfg!(target_feature = "sse4.1") {
828 3
829 } else if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") {
830 2
832 } else {
833 0
837 };
838
839 let strategies = [
840 (SparseStrategy::Gather, gather_relative_cost, true),
841 (
842 SparseStrategy::MaskAllFieldsWithNullsSkipOne,
843 fields.len() - 1,
844 fields.len() == logical_nulls.len(),
845 ),
846 (
847 SparseStrategy::MaskSkipWithoutNulls,
848 logical_nulls.len(),
849 true,
850 ),
851 (
852 SparseStrategy::MaskSkipFullyNull,
853 fields.len() - fully_null_count,
854 true,
855 ),
856 ];
857
858 let (strategy, _, _) = strategies
859 .iter()
860 .filter(|(_, _, applicable)| *applicable)
861 .min_by_key(|(_, cost, _)| cost)
862 .unwrap();
863
864 match strategy {
865 SparseStrategy::Gather => self.gather_nulls(logical_nulls),
866 SparseStrategy::MaskAllFieldsWithNullsSkipOne => {
867 self.mask_sparse_all_with_nulls_skip_one(logical_nulls)
868 }
869 SparseStrategy::MaskSkipWithoutNulls => {
870 self.mask_sparse_skip_without_nulls(logical_nulls)
871 }
872 SparseStrategy::MaskSkipFullyNull => {
873 self.mask_sparse_skip_fully_null(logical_nulls)
874 }
875 }
876 }
877 };
878
879 let null_buffer = NullBuffer::from(boolean_buffer);
880
881 if null_buffer.null_count() > 0 {
882 Some(null_buffer)
883 } else {
884 None
885 }
886 }
887
888 fn is_nullable(&self) -> bool {
889 self.fields
890 .iter()
891 .flatten()
892 .any(|field| field.is_nullable())
893 }
894
895 fn get_buffer_memory_size(&self) -> usize {
896 let mut sum = self.type_ids.inner().capacity();
897 if let Some(o) = self.offsets.as_ref() {
898 sum += o.inner().capacity()
899 }
900 self.fields
901 .iter()
902 .flat_map(|x| x.as_ref().map(|x| x.get_buffer_memory_size()))
903 .sum::<usize>()
904 + sum
905 }
906
907 fn get_array_memory_size(&self) -> usize {
908 let mut sum = self.type_ids.inner().capacity();
909 if let Some(o) = self.offsets.as_ref() {
910 sum += o.inner().capacity()
911 }
912 std::mem::size_of::<Self>()
913 + self
914 .fields
915 .iter()
916 .flat_map(|x| x.as_ref().map(|x| x.get_array_memory_size()))
917 .sum::<usize>()
918 + sum
919 }
920}
921
922impl std::fmt::Debug for UnionArray {
923 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
924 let header = if self.is_dense() {
925 "UnionArray(Dense)\n["
926 } else {
927 "UnionArray(Sparse)\n["
928 };
929 writeln!(f, "{header}")?;
930
931 writeln!(f, "-- type id buffer:")?;
932 writeln!(f, "{:?}", self.type_ids)?;
933
934 if let Some(offsets) = &self.offsets {
935 writeln!(f, "-- offsets buffer:")?;
936 writeln!(f, "{:?}", offsets)?;
937 }
938
939 let fields = match self.data_type() {
940 DataType::Union(fields, _) => fields,
941 _ => unreachable!(),
942 };
943
944 for (type_id, field) in fields.iter() {
945 let child = self.child(type_id);
946 writeln!(
947 f,
948 "-- child {}: \"{}\" ({:?})",
949 type_id,
950 field.name(),
951 field.data_type()
952 )?;
953 std::fmt::Debug::fmt(child, f)?;
954 writeln!(f)?;
955 }
956 writeln!(f, "]")
957 }
958}
959
960enum SparseStrategy {
965 Gather,
967 MaskAllFieldsWithNullsSkipOne,
969 MaskSkipWithoutNulls,
971 MaskSkipFullyNull,
973}
974
975#[derive(Copy, Clone)]
976#[repr(usize)]
977enum Mask {
978 Zero = 0,
979 #[allow(clippy::enum_clike_unportable_variant)]
981 Max = usize::MAX,
982}
983
984fn selection_mask(type_ids_chunk: &[i8], type_id: i8) -> u64 {
985 type_ids_chunk
986 .iter()
987 .copied()
988 .enumerate()
989 .fold(0, |packed, (bit_idx, v)| {
990 packed | ((v == type_id) as u64) << bit_idx
991 })
992}
993
994fn without_nulls_selected(type_ids_chunk: &[i8], without_nulls_ids: &[i8]) -> u64 {
996 without_nulls_ids
997 .iter()
998 .fold(0, |fully_valid_selected, field_type_id| {
999 fully_valid_selected | selection_mask(type_ids_chunk, *field_type_id)
1000 })
1001}
1002
1003#[cfg(test)]
1004mod tests {
1005 use super::*;
1006 use std::collections::HashSet;
1007
1008 use crate::array::Int8Type;
1009 use crate::builder::UnionBuilder;
1010 use crate::cast::AsArray;
1011 use crate::types::{Float32Type, Float64Type, Int32Type, Int64Type};
1012 use crate::{Float64Array, Int32Array, Int64Array, StringArray};
1013 use crate::{Int8Array, RecordBatch};
1014 use arrow_buffer::Buffer;
1015 use arrow_schema::{Field, Schema};
1016
1017 #[test]
1018 fn test_dense_i32() {
1019 let mut builder = UnionBuilder::new_dense();
1020 builder.append::<Int32Type>("a", 1).unwrap();
1021 builder.append::<Int32Type>("b", 2).unwrap();
1022 builder.append::<Int32Type>("c", 3).unwrap();
1023 builder.append::<Int32Type>("a", 4).unwrap();
1024 builder.append::<Int32Type>("c", 5).unwrap();
1025 builder.append::<Int32Type>("a", 6).unwrap();
1026 builder.append::<Int32Type>("b", 7).unwrap();
1027 let union = builder.build().unwrap();
1028
1029 let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1];
1030 let expected_offsets = vec![0_i32, 0, 0, 1, 1, 2, 1];
1031 let expected_array_values = [1_i32, 2, 3, 4, 5, 6, 7];
1032
1033 assert_eq!(*union.type_ids(), expected_type_ids);
1035 for (i, id) in expected_type_ids.iter().enumerate() {
1036 assert_eq!(id, &union.type_id(i));
1037 }
1038
1039 assert_eq!(*union.offsets().unwrap(), expected_offsets);
1041 for (i, id) in expected_offsets.iter().enumerate() {
1042 assert_eq!(union.value_offset(i), *id as usize);
1043 }
1044
1045 assert_eq!(
1047 *union.child(0).as_primitive::<Int32Type>().values(),
1048 [1_i32, 4, 6]
1049 );
1050 assert_eq!(
1051 *union.child(1).as_primitive::<Int32Type>().values(),
1052 [2_i32, 7]
1053 );
1054 assert_eq!(
1055 *union.child(2).as_primitive::<Int32Type>().values(),
1056 [3_i32, 5]
1057 );
1058
1059 assert_eq!(expected_array_values.len(), union.len());
1060 for (i, expected_value) in expected_array_values.iter().enumerate() {
1061 assert!(!union.is_null(i));
1062 let slot = union.value(i);
1063 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1064 assert_eq!(slot.len(), 1);
1065 let value = slot.value(0);
1066 assert_eq!(expected_value, &value);
1067 }
1068 }
1069
1070 #[test]
1071 #[cfg_attr(miri, ignore)]
1072 fn test_dense_i32_large() {
1073 let mut builder = UnionBuilder::new_dense();
1074
1075 let expected_type_ids = vec![0_i8; 1024];
1076 let expected_offsets: Vec<_> = (0..1024).collect();
1077 let expected_array_values: Vec<_> = (1..=1024).collect();
1078
1079 expected_array_values
1080 .iter()
1081 .for_each(|v| builder.append::<Int32Type>("a", *v).unwrap());
1082
1083 let union = builder.build().unwrap();
1084
1085 assert_eq!(*union.type_ids(), expected_type_ids);
1087 for (i, id) in expected_type_ids.iter().enumerate() {
1088 assert_eq!(id, &union.type_id(i));
1089 }
1090
1091 assert_eq!(*union.offsets().unwrap(), expected_offsets);
1093 for (i, id) in expected_offsets.iter().enumerate() {
1094 assert_eq!(union.value_offset(i), *id as usize);
1095 }
1096
1097 for (i, expected_value) in expected_array_values.iter().enumerate() {
1098 assert!(!union.is_null(i));
1099 let slot = union.value(i);
1100 let slot = slot.as_primitive::<Int32Type>();
1101 assert_eq!(slot.len(), 1);
1102 let value = slot.value(0);
1103 assert_eq!(expected_value, &value);
1104 }
1105 }
1106
1107 #[test]
1108 fn test_dense_mixed() {
1109 let mut builder = UnionBuilder::new_dense();
1110 builder.append::<Int32Type>("a", 1).unwrap();
1111 builder.append::<Int64Type>("c", 3).unwrap();
1112 builder.append::<Int32Type>("a", 4).unwrap();
1113 builder.append::<Int64Type>("c", 5).unwrap();
1114 builder.append::<Int32Type>("a", 6).unwrap();
1115 let union = builder.build().unwrap();
1116
1117 assert_eq!(5, union.len());
1118 for i in 0..union.len() {
1119 let slot = union.value(i);
1120 assert!(!union.is_null(i));
1121 match i {
1122 0 => {
1123 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1124 assert_eq!(slot.len(), 1);
1125 let value = slot.value(0);
1126 assert_eq!(1_i32, value);
1127 }
1128 1 => {
1129 let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
1130 assert_eq!(slot.len(), 1);
1131 let value = slot.value(0);
1132 assert_eq!(3_i64, value);
1133 }
1134 2 => {
1135 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1136 assert_eq!(slot.len(), 1);
1137 let value = slot.value(0);
1138 assert_eq!(4_i32, value);
1139 }
1140 3 => {
1141 let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
1142 assert_eq!(slot.len(), 1);
1143 let value = slot.value(0);
1144 assert_eq!(5_i64, value);
1145 }
1146 4 => {
1147 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1148 assert_eq!(slot.len(), 1);
1149 let value = slot.value(0);
1150 assert_eq!(6_i32, value);
1151 }
1152 _ => unreachable!(),
1153 }
1154 }
1155 }
1156
1157 #[test]
1158 fn test_dense_mixed_with_nulls() {
1159 let mut builder = UnionBuilder::new_dense();
1160 builder.append::<Int32Type>("a", 1).unwrap();
1161 builder.append::<Int64Type>("c", 3).unwrap();
1162 builder.append::<Int32Type>("a", 10).unwrap();
1163 builder.append_null::<Int32Type>("a").unwrap();
1164 builder.append::<Int32Type>("a", 6).unwrap();
1165 let union = builder.build().unwrap();
1166
1167 assert_eq!(5, union.len());
1168 for i in 0..union.len() {
1169 let slot = union.value(i);
1170 match i {
1171 0 => {
1172 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1173 assert!(!slot.is_null(0));
1174 assert_eq!(slot.len(), 1);
1175 let value = slot.value(0);
1176 assert_eq!(1_i32, value);
1177 }
1178 1 => {
1179 let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
1180 assert!(!slot.is_null(0));
1181 assert_eq!(slot.len(), 1);
1182 let value = slot.value(0);
1183 assert_eq!(3_i64, value);
1184 }
1185 2 => {
1186 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1187 assert!(!slot.is_null(0));
1188 assert_eq!(slot.len(), 1);
1189 let value = slot.value(0);
1190 assert_eq!(10_i32, value);
1191 }
1192 3 => assert!(slot.is_null(0)),
1193 4 => {
1194 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1195 assert!(!slot.is_null(0));
1196 assert_eq!(slot.len(), 1);
1197 let value = slot.value(0);
1198 assert_eq!(6_i32, value);
1199 }
1200 _ => unreachable!(),
1201 }
1202 }
1203 }
1204
1205 #[test]
1206 fn test_dense_mixed_with_nulls_and_offset() {
1207 let mut builder = UnionBuilder::new_dense();
1208 builder.append::<Int32Type>("a", 1).unwrap();
1209 builder.append::<Int64Type>("c", 3).unwrap();
1210 builder.append::<Int32Type>("a", 10).unwrap();
1211 builder.append_null::<Int32Type>("a").unwrap();
1212 builder.append::<Int32Type>("a", 6).unwrap();
1213 let union = builder.build().unwrap();
1214
1215 let slice = union.slice(2, 3);
1216 let new_union = slice.as_any().downcast_ref::<UnionArray>().unwrap();
1217
1218 assert_eq!(3, new_union.len());
1219 for i in 0..new_union.len() {
1220 let slot = new_union.value(i);
1221 match i {
1222 0 => {
1223 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1224 assert!(!slot.is_null(0));
1225 assert_eq!(slot.len(), 1);
1226 let value = slot.value(0);
1227 assert_eq!(10_i32, value);
1228 }
1229 1 => assert!(slot.is_null(0)),
1230 2 => {
1231 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1232 assert!(!slot.is_null(0));
1233 assert_eq!(slot.len(), 1);
1234 let value = slot.value(0);
1235 assert_eq!(6_i32, value);
1236 }
1237 _ => unreachable!(),
1238 }
1239 }
1240 }
1241
1242 #[test]
1243 fn test_dense_mixed_with_str() {
1244 let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
1245 let int_array = Int32Array::from(vec![5, 6]);
1246 let float_array = Float64Array::from(vec![10.0]);
1247
1248 let type_ids = [1, 0, 0, 2, 0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1249 let offsets = [0, 0, 1, 0, 2, 1]
1250 .into_iter()
1251 .collect::<ScalarBuffer<i32>>();
1252
1253 let fields = [
1254 (0, Arc::new(Field::new("A", DataType::Utf8, false))),
1255 (1, Arc::new(Field::new("B", DataType::Int32, false))),
1256 (2, Arc::new(Field::new("C", DataType::Float64, false))),
1257 ]
1258 .into_iter()
1259 .collect::<UnionFields>();
1260 let children = [
1261 Arc::new(string_array) as Arc<dyn Array>,
1262 Arc::new(int_array),
1263 Arc::new(float_array),
1264 ]
1265 .into_iter()
1266 .collect();
1267 let array =
1268 UnionArray::try_new(fields, type_ids.clone(), Some(offsets.clone()), children).unwrap();
1269
1270 assert_eq!(*array.type_ids(), type_ids);
1272 for (i, id) in type_ids.iter().enumerate() {
1273 assert_eq!(id, &array.type_id(i));
1274 }
1275
1276 assert_eq!(*array.offsets().unwrap(), offsets);
1278 for (i, id) in offsets.iter().enumerate() {
1279 assert_eq!(*id as usize, array.value_offset(i));
1280 }
1281
1282 assert_eq!(6, array.len());
1284
1285 let slot = array.value(0);
1286 let value = slot.as_any().downcast_ref::<Int32Array>().unwrap().value(0);
1287 assert_eq!(5, value);
1288
1289 let slot = array.value(1);
1290 let value = slot
1291 .as_any()
1292 .downcast_ref::<StringArray>()
1293 .unwrap()
1294 .value(0);
1295 assert_eq!("foo", value);
1296
1297 let slot = array.value(2);
1298 let value = slot
1299 .as_any()
1300 .downcast_ref::<StringArray>()
1301 .unwrap()
1302 .value(0);
1303 assert_eq!("bar", value);
1304
1305 let slot = array.value(3);
1306 let value = slot
1307 .as_any()
1308 .downcast_ref::<Float64Array>()
1309 .unwrap()
1310 .value(0);
1311 assert_eq!(10.0, value);
1312
1313 let slot = array.value(4);
1314 let value = slot
1315 .as_any()
1316 .downcast_ref::<StringArray>()
1317 .unwrap()
1318 .value(0);
1319 assert_eq!("baz", value);
1320
1321 let slot = array.value(5);
1322 let value = slot.as_any().downcast_ref::<Int32Array>().unwrap().value(0);
1323 assert_eq!(6, value);
1324 }
1325
1326 #[test]
1327 fn test_sparse_i32() {
1328 let mut builder = UnionBuilder::new_sparse();
1329 builder.append::<Int32Type>("a", 1).unwrap();
1330 builder.append::<Int32Type>("b", 2).unwrap();
1331 builder.append::<Int32Type>("c", 3).unwrap();
1332 builder.append::<Int32Type>("a", 4).unwrap();
1333 builder.append::<Int32Type>("c", 5).unwrap();
1334 builder.append::<Int32Type>("a", 6).unwrap();
1335 builder.append::<Int32Type>("b", 7).unwrap();
1336 let union = builder.build().unwrap();
1337
1338 let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1];
1339 let expected_array_values = [1_i32, 2, 3, 4, 5, 6, 7];
1340
1341 assert_eq!(*union.type_ids(), expected_type_ids);
1343 for (i, id) in expected_type_ids.iter().enumerate() {
1344 assert_eq!(id, &union.type_id(i));
1345 }
1346
1347 assert!(union.offsets().is_none());
1349
1350 assert_eq!(
1352 *union.child(0).as_primitive::<Int32Type>().values(),
1353 [1_i32, 0, 0, 4, 0, 6, 0],
1354 );
1355 assert_eq!(
1356 *union.child(1).as_primitive::<Int32Type>().values(),
1357 [0_i32, 2_i32, 0, 0, 0, 0, 7]
1358 );
1359 assert_eq!(
1360 *union.child(2).as_primitive::<Int32Type>().values(),
1361 [0_i32, 0, 3_i32, 0, 5, 0, 0]
1362 );
1363
1364 assert_eq!(expected_array_values.len(), union.len());
1365 for (i, expected_value) in expected_array_values.iter().enumerate() {
1366 assert!(!union.is_null(i));
1367 let slot = union.value(i);
1368 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1369 assert_eq!(slot.len(), 1);
1370 let value = slot.value(0);
1371 assert_eq!(expected_value, &value);
1372 }
1373 }
1374
1375 #[test]
1376 fn test_sparse_mixed() {
1377 let mut builder = UnionBuilder::new_sparse();
1378 builder.append::<Int32Type>("a", 1).unwrap();
1379 builder.append::<Float64Type>("c", 3.0).unwrap();
1380 builder.append::<Int32Type>("a", 4).unwrap();
1381 builder.append::<Float64Type>("c", 5.0).unwrap();
1382 builder.append::<Int32Type>("a", 6).unwrap();
1383 let union = builder.build().unwrap();
1384
1385 let expected_type_ids = vec![0_i8, 1, 0, 1, 0];
1386
1387 assert_eq!(*union.type_ids(), expected_type_ids);
1389 for (i, id) in expected_type_ids.iter().enumerate() {
1390 assert_eq!(id, &union.type_id(i));
1391 }
1392
1393 assert!(union.offsets().is_none());
1395
1396 for i in 0..union.len() {
1397 let slot = union.value(i);
1398 assert!(!union.is_null(i));
1399 match i {
1400 0 => {
1401 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1402 assert_eq!(slot.len(), 1);
1403 let value = slot.value(0);
1404 assert_eq!(1_i32, value);
1405 }
1406 1 => {
1407 let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
1408 assert_eq!(slot.len(), 1);
1409 let value = slot.value(0);
1410 assert_eq!(value, 3_f64);
1411 }
1412 2 => {
1413 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1414 assert_eq!(slot.len(), 1);
1415 let value = slot.value(0);
1416 assert_eq!(4_i32, value);
1417 }
1418 3 => {
1419 let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
1420 assert_eq!(slot.len(), 1);
1421 let value = slot.value(0);
1422 assert_eq!(5_f64, value);
1423 }
1424 4 => {
1425 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1426 assert_eq!(slot.len(), 1);
1427 let value = slot.value(0);
1428 assert_eq!(6_i32, value);
1429 }
1430 _ => unreachable!(),
1431 }
1432 }
1433 }
1434
1435 #[test]
1436 fn test_sparse_mixed_with_nulls() {
1437 let mut builder = UnionBuilder::new_sparse();
1438 builder.append::<Int32Type>("a", 1).unwrap();
1439 builder.append_null::<Int32Type>("a").unwrap();
1440 builder.append::<Float64Type>("c", 3.0).unwrap();
1441 builder.append::<Int32Type>("a", 4).unwrap();
1442 let union = builder.build().unwrap();
1443
1444 let expected_type_ids = vec![0_i8, 0, 1, 0];
1445
1446 assert_eq!(*union.type_ids(), expected_type_ids);
1448 for (i, id) in expected_type_ids.iter().enumerate() {
1449 assert_eq!(id, &union.type_id(i));
1450 }
1451
1452 assert!(union.offsets().is_none());
1454
1455 for i in 0..union.len() {
1456 let slot = union.value(i);
1457 match i {
1458 0 => {
1459 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1460 assert!(!slot.is_null(0));
1461 assert_eq!(slot.len(), 1);
1462 let value = slot.value(0);
1463 assert_eq!(1_i32, value);
1464 }
1465 1 => assert!(slot.is_null(0)),
1466 2 => {
1467 let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
1468 assert!(!slot.is_null(0));
1469 assert_eq!(slot.len(), 1);
1470 let value = slot.value(0);
1471 assert_eq!(value, 3_f64);
1472 }
1473 3 => {
1474 let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
1475 assert!(!slot.is_null(0));
1476 assert_eq!(slot.len(), 1);
1477 let value = slot.value(0);
1478 assert_eq!(4_i32, value);
1479 }
1480 _ => unreachable!(),
1481 }
1482 }
1483 }
1484
1485 #[test]
1486 fn test_sparse_mixed_with_nulls_and_offset() {
1487 let mut builder = UnionBuilder::new_sparse();
1488 builder.append::<Int32Type>("a", 1).unwrap();
1489 builder.append_null::<Int32Type>("a").unwrap();
1490 builder.append::<Float64Type>("c", 3.0).unwrap();
1491 builder.append_null::<Float64Type>("c").unwrap();
1492 builder.append::<Int32Type>("a", 4).unwrap();
1493 let union = builder.build().unwrap();
1494
1495 let slice = union.slice(1, 4);
1496 let new_union = slice.as_any().downcast_ref::<UnionArray>().unwrap();
1497
1498 assert_eq!(4, new_union.len());
1499 for i in 0..new_union.len() {
1500 let slot = new_union.value(i);
1501 match i {
1502 0 => assert!(slot.is_null(0)),
1503 1 => {
1504 let slot = slot.as_primitive::<Float64Type>();
1505 assert!(!slot.is_null(0));
1506 assert_eq!(slot.len(), 1);
1507 let value = slot.value(0);
1508 assert_eq!(value, 3_f64);
1509 }
1510 2 => assert!(slot.is_null(0)),
1511 3 => {
1512 let slot = slot.as_primitive::<Int32Type>();
1513 assert!(!slot.is_null(0));
1514 assert_eq!(slot.len(), 1);
1515 let value = slot.value(0);
1516 assert_eq!(4_i32, value);
1517 }
1518 _ => unreachable!(),
1519 }
1520 }
1521 }
1522
1523 fn test_union_validity(union_array: &UnionArray) {
1524 assert_eq!(union_array.null_count(), 0);
1525
1526 for i in 0..union_array.len() {
1527 assert!(!union_array.is_null(i));
1528 assert!(union_array.is_valid(i));
1529 }
1530 }
1531
1532 #[test]
1533 fn test_union_array_validity() {
1534 let mut builder = UnionBuilder::new_sparse();
1535 builder.append::<Int32Type>("a", 1).unwrap();
1536 builder.append_null::<Int32Type>("a").unwrap();
1537 builder.append::<Float64Type>("c", 3.0).unwrap();
1538 builder.append_null::<Float64Type>("c").unwrap();
1539 builder.append::<Int32Type>("a", 4).unwrap();
1540 let union = builder.build().unwrap();
1541
1542 test_union_validity(&union);
1543
1544 let mut builder = UnionBuilder::new_dense();
1545 builder.append::<Int32Type>("a", 1).unwrap();
1546 builder.append_null::<Int32Type>("a").unwrap();
1547 builder.append::<Float64Type>("c", 3.0).unwrap();
1548 builder.append_null::<Float64Type>("c").unwrap();
1549 builder.append::<Int32Type>("a", 4).unwrap();
1550 let union = builder.build().unwrap();
1551
1552 test_union_validity(&union);
1553 }
1554
1555 #[test]
1556 fn test_type_check() {
1557 let mut builder = UnionBuilder::new_sparse();
1558 builder.append::<Float32Type>("a", 1.0).unwrap();
1559 let err = builder.append::<Int32Type>("a", 1).unwrap_err().to_string();
1560 assert!(
1561 err.contains(
1562 "Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"
1563 ),
1564 "{}",
1565 err
1566 );
1567 }
1568
1569 #[test]
1570 fn slice_union_array() {
1571 fn create_union(mut builder: UnionBuilder) -> UnionArray {
1573 builder.append::<Int32Type>("a", 1).unwrap();
1574 builder.append_null::<Int32Type>("a").unwrap();
1575 builder.append::<Float64Type>("c", 3.0).unwrap();
1576 builder.append_null::<Float64Type>("c").unwrap();
1577 builder.append::<Int32Type>("a", 4).unwrap();
1578 builder.build().unwrap()
1579 }
1580
1581 fn create_batch(union: UnionArray) -> RecordBatch {
1582 let schema = Schema::new(vec![Field::new(
1583 "struct_array",
1584 union.data_type().clone(),
1585 true,
1586 )]);
1587
1588 RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap()
1589 }
1590
1591 fn test_slice_union(record_batch_slice: RecordBatch) {
1592 let union_slice = record_batch_slice
1593 .column(0)
1594 .as_any()
1595 .downcast_ref::<UnionArray>()
1596 .unwrap();
1597
1598 assert_eq!(union_slice.type_id(0), 0);
1599 assert_eq!(union_slice.type_id(1), 1);
1600 assert_eq!(union_slice.type_id(2), 1);
1601
1602 let slot = union_slice.value(0);
1603 let array = slot.as_primitive::<Int32Type>();
1604 assert_eq!(array.len(), 1);
1605 assert!(array.is_null(0));
1606
1607 let slot = union_slice.value(1);
1608 let array = slot.as_primitive::<Float64Type>();
1609 assert_eq!(array.len(), 1);
1610 assert!(array.is_valid(0));
1611 assert_eq!(array.value(0), 3.0);
1612
1613 let slot = union_slice.value(2);
1614 let array = slot.as_primitive::<Float64Type>();
1615 assert_eq!(array.len(), 1);
1616 assert!(array.is_null(0));
1617 }
1618
1619 let builder = UnionBuilder::new_sparse();
1621 let record_batch = create_batch(create_union(builder));
1622 let record_batch_slice = record_batch.slice(1, 3);
1624 test_slice_union(record_batch_slice);
1625
1626 let builder = UnionBuilder::new_dense();
1628 let record_batch = create_batch(create_union(builder));
1629 let record_batch_slice = record_batch.slice(1, 3);
1631 test_slice_union(record_batch_slice);
1632 }
1633
1634 #[test]
1635 fn test_custom_type_ids() {
1636 let data_type = DataType::Union(
1637 UnionFields::new(
1638 vec![8, 4, 9],
1639 vec![
1640 Field::new("strings", DataType::Utf8, false),
1641 Field::new("integers", DataType::Int32, false),
1642 Field::new("floats", DataType::Float64, false),
1643 ],
1644 ),
1645 UnionMode::Dense,
1646 );
1647
1648 let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
1649 let int_array = Int32Array::from(vec![5, 6, 4]);
1650 let float_array = Float64Array::from(vec![10.0]);
1651
1652 let type_ids = Buffer::from_vec(vec![4_i8, 8, 4, 8, 9, 4, 8]);
1653 let value_offsets = Buffer::from_vec(vec![0_i32, 0, 1, 1, 0, 2, 2]);
1654
1655 let data = ArrayData::builder(data_type)
1656 .len(7)
1657 .buffers(vec![type_ids, value_offsets])
1658 .child_data(vec![
1659 string_array.into_data(),
1660 int_array.into_data(),
1661 float_array.into_data(),
1662 ])
1663 .build()
1664 .unwrap();
1665
1666 let array = UnionArray::from(data);
1667
1668 let v = array.value(0);
1669 assert_eq!(v.data_type(), &DataType::Int32);
1670 assert_eq!(v.len(), 1);
1671 assert_eq!(v.as_primitive::<Int32Type>().value(0), 5);
1672
1673 let v = array.value(1);
1674 assert_eq!(v.data_type(), &DataType::Utf8);
1675 assert_eq!(v.len(), 1);
1676 assert_eq!(v.as_string::<i32>().value(0), "foo");
1677
1678 let v = array.value(2);
1679 assert_eq!(v.data_type(), &DataType::Int32);
1680 assert_eq!(v.len(), 1);
1681 assert_eq!(v.as_primitive::<Int32Type>().value(0), 6);
1682
1683 let v = array.value(3);
1684 assert_eq!(v.data_type(), &DataType::Utf8);
1685 assert_eq!(v.len(), 1);
1686 assert_eq!(v.as_string::<i32>().value(0), "bar");
1687
1688 let v = array.value(4);
1689 assert_eq!(v.data_type(), &DataType::Float64);
1690 assert_eq!(v.len(), 1);
1691 assert_eq!(v.as_primitive::<Float64Type>().value(0), 10.0);
1692
1693 let v = array.value(5);
1694 assert_eq!(v.data_type(), &DataType::Int32);
1695 assert_eq!(v.len(), 1);
1696 assert_eq!(v.as_primitive::<Int32Type>().value(0), 4);
1697
1698 let v = array.value(6);
1699 assert_eq!(v.data_type(), &DataType::Utf8);
1700 assert_eq!(v.len(), 1);
1701 assert_eq!(v.as_string::<i32>().value(0), "baz");
1702 }
1703
1704 #[test]
1705 fn into_parts() {
1706 let mut builder = UnionBuilder::new_dense();
1707 builder.append::<Int32Type>("a", 1).unwrap();
1708 builder.append::<Int8Type>("b", 2).unwrap();
1709 builder.append::<Int32Type>("a", 3).unwrap();
1710 let dense_union = builder.build().unwrap();
1711
1712 let field = [
1713 &Arc::new(Field::new("a", DataType::Int32, false)),
1714 &Arc::new(Field::new("b", DataType::Int8, false)),
1715 ];
1716 let (union_fields, type_ids, offsets, children) = dense_union.into_parts();
1717 assert_eq!(
1718 union_fields
1719 .iter()
1720 .map(|(_, field)| field)
1721 .collect::<Vec<_>>(),
1722 field
1723 );
1724 assert_eq!(type_ids, [0, 1, 0]);
1725 assert!(offsets.is_some());
1726 assert_eq!(offsets.as_ref().unwrap(), &[0, 0, 1]);
1727
1728 let result = UnionArray::try_new(union_fields, type_ids, offsets, children);
1729 assert!(result.is_ok());
1730 assert_eq!(result.unwrap().len(), 3);
1731
1732 let mut builder = UnionBuilder::new_sparse();
1733 builder.append::<Int32Type>("a", 1).unwrap();
1734 builder.append::<Int8Type>("b", 2).unwrap();
1735 builder.append::<Int32Type>("a", 3).unwrap();
1736 let sparse_union = builder.build().unwrap();
1737
1738 let (union_fields, type_ids, offsets, children) = sparse_union.into_parts();
1739 assert_eq!(type_ids, [0, 1, 0]);
1740 assert!(offsets.is_none());
1741
1742 let result = UnionArray::try_new(union_fields, type_ids, offsets, children);
1743 assert!(result.is_ok());
1744 assert_eq!(result.unwrap().len(), 3);
1745 }
1746
1747 #[test]
1748 fn into_parts_custom_type_ids() {
1749 let set_field_type_ids: [i8; 3] = [8, 4, 9];
1750 let data_type = DataType::Union(
1751 UnionFields::new(
1752 set_field_type_ids,
1753 [
1754 Field::new("strings", DataType::Utf8, false),
1755 Field::new("integers", DataType::Int32, false),
1756 Field::new("floats", DataType::Float64, false),
1757 ],
1758 ),
1759 UnionMode::Dense,
1760 );
1761 let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
1762 let int_array = Int32Array::from(vec![5, 6, 4]);
1763 let float_array = Float64Array::from(vec![10.0]);
1764 let type_ids = Buffer::from_vec(vec![4_i8, 8, 4, 8, 9, 4, 8]);
1765 let value_offsets = Buffer::from_vec(vec![0_i32, 0, 1, 1, 0, 2, 2]);
1766 let data = ArrayData::builder(data_type)
1767 .len(7)
1768 .buffers(vec![type_ids, value_offsets])
1769 .child_data(vec![
1770 string_array.into_data(),
1771 int_array.into_data(),
1772 float_array.into_data(),
1773 ])
1774 .build()
1775 .unwrap();
1776 let array = UnionArray::from(data);
1777
1778 let (union_fields, type_ids, offsets, children) = array.into_parts();
1779 assert_eq!(
1780 type_ids.iter().collect::<HashSet<_>>(),
1781 set_field_type_ids.iter().collect::<HashSet<_>>()
1782 );
1783 let result = UnionArray::try_new(union_fields, type_ids, offsets, children);
1784 assert!(result.is_ok());
1785 let array = result.unwrap();
1786 assert_eq!(array.len(), 7);
1787 }
1788
1789 #[test]
1790 fn test_invalid() {
1791 let fields = UnionFields::new(
1792 [3, 2],
1793 [
1794 Field::new("a", DataType::Utf8, false),
1795 Field::new("b", DataType::Utf8, false),
1796 ],
1797 );
1798 let children = vec![
1799 Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
1800 Arc::new(StringArray::from_iter_values(["c", "d"])) as _,
1801 ];
1802
1803 let type_ids = vec![3, 3, 2].into();
1804 let err =
1805 UnionArray::try_new(fields.clone(), type_ids, None, children.clone()).unwrap_err();
1806 assert_eq!(
1807 err.to_string(),
1808 "Invalid argument error: Sparse union child arrays must be equal in length to the length of the union"
1809 );
1810
1811 let type_ids = vec![1, 2].into();
1812 let err =
1813 UnionArray::try_new(fields.clone(), type_ids, None, children.clone()).unwrap_err();
1814 assert_eq!(
1815 err.to_string(),
1816 "Invalid argument error: Type Ids values must match one of the field type ids"
1817 );
1818
1819 let type_ids = vec![7, 2].into();
1820 let err = UnionArray::try_new(fields.clone(), type_ids, None, children).unwrap_err();
1821 assert_eq!(
1822 err.to_string(),
1823 "Invalid argument error: Type Ids values must match one of the field type ids"
1824 );
1825
1826 let children = vec![
1827 Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
1828 Arc::new(StringArray::from_iter_values(["c"])) as _,
1829 ];
1830 let type_ids = ScalarBuffer::from(vec![3_i8, 3, 2]);
1831 let offsets = Some(vec![0, 1, 0].into());
1832 UnionArray::try_new(fields.clone(), type_ids.clone(), offsets, children.clone()).unwrap();
1833
1834 let offsets = Some(vec![0, 1, 1].into());
1835 let err = UnionArray::try_new(fields.clone(), type_ids.clone(), offsets, children.clone())
1836 .unwrap_err();
1837
1838 assert_eq!(
1839 err.to_string(),
1840 "Invalid argument error: Offsets must be positive and within the length of the Array"
1841 );
1842
1843 let offsets = Some(vec![0, 1].into());
1844 let err =
1845 UnionArray::try_new(fields.clone(), type_ids.clone(), offsets, children).unwrap_err();
1846
1847 assert_eq!(
1848 err.to_string(),
1849 "Invalid argument error: Type Ids and Offsets lengths must match"
1850 );
1851
1852 let err = UnionArray::try_new(fields.clone(), type_ids, None, vec![]).unwrap_err();
1853
1854 assert_eq!(
1855 err.to_string(),
1856 "Invalid argument error: Union fields length must match child arrays length"
1857 );
1858 }
1859
1860 #[test]
1861 fn test_logical_nulls_fast_paths() {
1862 let array = UnionArray::try_new(UnionFields::empty(), vec![].into(), None, vec![]).unwrap();
1864
1865 assert_eq!(array.logical_nulls(), None);
1866
1867 let fields = UnionFields::new(
1868 [1, 3],
1869 [
1870 Field::new("a", DataType::Int8, false), Field::new("b", DataType::Int8, false), ],
1873 );
1874 let array = UnionArray::try_new(
1875 fields,
1876 vec![1].into(),
1877 None,
1878 vec![
1879 Arc::new(Int8Array::from_value(5, 1)),
1880 Arc::new(Int8Array::from_value(5, 1)),
1881 ],
1882 )
1883 .unwrap();
1884
1885 assert_eq!(array.logical_nulls(), None);
1886
1887 let nullable_fields = UnionFields::new(
1888 [1, 3],
1889 [
1890 Field::new("a", DataType::Int8, true), Field::new("b", DataType::Int8, true), ],
1893 );
1894 let array = UnionArray::try_new(
1895 nullable_fields.clone(),
1896 vec![1, 1].into(),
1897 None,
1898 vec![
1899 Arc::new(Int8Array::from_value(-5, 2)), Arc::new(Int8Array::from_value(-5, 2)), ],
1902 )
1903 .unwrap();
1904
1905 assert_eq!(array.logical_nulls(), None);
1906
1907 let array = UnionArray::try_new(
1908 nullable_fields.clone(),
1909 vec![1, 1].into(),
1910 None,
1911 vec![
1912 Arc::new(Int8Array::new_null(2)), Arc::new(Int8Array::new_null(2)), ],
1916 )
1917 .unwrap();
1918
1919 assert_eq!(array.logical_nulls(), Some(NullBuffer::new_null(2)));
1920
1921 let array = UnionArray::try_new(
1922 nullable_fields.clone(),
1923 vec![1, 1].into(),
1924 Some(vec![0, 1].into()),
1925 vec![
1926 Arc::new(Int8Array::new_null(3)), Arc::new(Int8Array::new_null(3)), ],
1930 )
1931 .unwrap();
1932
1933 assert_eq!(array.logical_nulls(), Some(NullBuffer::new_null(2)));
1934 }
1935
1936 #[test]
1937 fn test_dense_union_logical_nulls_gather() {
1938 let int_array = Int32Array::from(vec![1, 2]);
1940 let float_array = Float64Array::from(vec![Some(3.2), None]);
1941 let str_array = StringArray::new_null(1);
1942 let type_ids = [1, 1, 3, 3, 4, 4].into_iter().collect::<ScalarBuffer<i8>>();
1943 let offsets = [0, 1, 0, 1, 0, 0]
1944 .into_iter()
1945 .collect::<ScalarBuffer<i32>>();
1946
1947 let children = vec![
1948 Arc::new(int_array) as Arc<dyn Array>,
1949 Arc::new(float_array),
1950 Arc::new(str_array),
1951 ];
1952
1953 let array = UnionArray::try_new(union_fields(), type_ids, Some(offsets), children).unwrap();
1954
1955 let result = array.logical_nulls();
1956
1957 let expected = NullBuffer::from(vec![true, true, true, false, false, false]);
1958 assert_eq!(Some(expected), result);
1959 }
1960
1961 #[test]
1962 fn test_sparse_union_logical_nulls_mask_all_nulls_skip_one() {
1963 let fields: UnionFields = [
1965 (1, Arc::new(Field::new("A", DataType::Int32, true))),
1966 (3, Arc::new(Field::new("B", DataType::Float64, true))),
1967 ]
1968 .into_iter()
1969 .collect();
1970
1971 let int_array = Int32Array::new_null(4);
1973 let float_array = Float64Array::from(vec![None, None, Some(3.2), None]);
1974 let type_ids = [1, 1, 3, 3].into_iter().collect::<ScalarBuffer<i8>>();
1975
1976 let children = vec![Arc::new(int_array) as Arc<dyn Array>, Arc::new(float_array)];
1977
1978 let array = UnionArray::try_new(fields.clone(), type_ids, None, children).unwrap();
1979
1980 let result = array.logical_nulls();
1981
1982 let expected = NullBuffer::from(vec![false, false, true, false]);
1983 assert_eq!(Some(expected), result);
1984
1985 let len = 2 * 64 + 32;
1987
1988 let int_array = Int32Array::new_null(len);
1989 let float_array = Float64Array::from_iter([Some(3.2), None].into_iter().cycle().take(len));
1990 let type_ids = ScalarBuffer::from_iter([1, 1, 3, 3].into_iter().cycle().take(len));
1991
1992 let array = UnionArray::try_new(
1993 fields,
1994 type_ids,
1995 None,
1996 vec![Arc::new(int_array), Arc::new(float_array)],
1997 )
1998 .unwrap();
1999
2000 let result = array.logical_nulls();
2001
2002 let expected =
2003 NullBuffer::from_iter([false, false, true, false].into_iter().cycle().take(len));
2004 assert_eq!(array.len(), len);
2005 assert_eq!(Some(expected), result);
2006 }
2007
2008 #[test]
2009 fn test_sparse_union_logical_mask_mixed_nulls_skip_fully_valid() {
2010 let int_array = Int32Array::from_value(2, 6);
2012 let float_array = Float64Array::from_value(4.2, 6);
2013 let str_array = StringArray::new_null(6);
2014 let type_ids = [1, 1, 3, 3, 4, 4].into_iter().collect::<ScalarBuffer<i8>>();
2015
2016 let children = vec![
2017 Arc::new(int_array) as Arc<dyn Array>,
2018 Arc::new(float_array),
2019 Arc::new(str_array),
2020 ];
2021
2022 let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
2023
2024 let result = array.logical_nulls();
2025
2026 let expected = NullBuffer::from(vec![true, true, true, true, false, false]);
2027 assert_eq!(Some(expected), result);
2028
2029 let len = 2 * 64 + 32;
2031
2032 let int_array = Int32Array::from_value(2, len);
2033 let float_array = Float64Array::from_value(4.2, len);
2034 let str_array = StringArray::from_iter([None, Some("a")].into_iter().cycle().take(len));
2035 let type_ids = ScalarBuffer::from_iter([1, 1, 3, 3, 4, 4].into_iter().cycle().take(len));
2036
2037 let children = vec![
2038 Arc::new(int_array) as Arc<dyn Array>,
2039 Arc::new(float_array),
2040 Arc::new(str_array),
2041 ];
2042
2043 let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
2044
2045 let result = array.logical_nulls();
2046
2047 let expected = NullBuffer::from_iter(
2048 [true, true, true, true, false, true]
2049 .into_iter()
2050 .cycle()
2051 .take(len),
2052 );
2053 assert_eq!(array.len(), len);
2054 assert_eq!(Some(expected), result);
2055 }
2056
2057 #[test]
2058 fn test_sparse_union_logical_mask_mixed_nulls_skip_fully_null() {
2059 let int_array = Int32Array::new_null(6);
2061 let float_array = Float64Array::from_value(4.2, 6);
2062 let str_array = StringArray::new_null(6);
2063 let type_ids = [1, 1, 3, 3, 4, 4].into_iter().collect::<ScalarBuffer<i8>>();
2064
2065 let children = vec![
2066 Arc::new(int_array) as Arc<dyn Array>,
2067 Arc::new(float_array),
2068 Arc::new(str_array),
2069 ];
2070
2071 let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
2072
2073 let result = array.logical_nulls();
2074
2075 let expected = NullBuffer::from(vec![false, false, true, true, false, false]);
2076 assert_eq!(Some(expected), result);
2077
2078 let len = 2 * 64 + 32;
2080
2081 let int_array = Int32Array::new_null(len);
2082 let float_array = Float64Array::from_value(4.2, len);
2083 let str_array = StringArray::new_null(len);
2084 let type_ids = ScalarBuffer::from_iter([1, 1, 3, 3, 4, 4].into_iter().cycle().take(len));
2085
2086 let children = vec![
2087 Arc::new(int_array) as Arc<dyn Array>,
2088 Arc::new(float_array),
2089 Arc::new(str_array),
2090 ];
2091
2092 let array = UnionArray::try_new(union_fields(), type_ids, None, children).unwrap();
2093
2094 let result = array.logical_nulls();
2095
2096 let expected = NullBuffer::from_iter(
2097 [false, false, true, true, false, false]
2098 .into_iter()
2099 .cycle()
2100 .take(len),
2101 );
2102 assert_eq!(array.len(), len);
2103 assert_eq!(Some(expected), result);
2104 }
2105
2106 #[test]
2107 fn test_sparse_union_logical_nulls_gather() {
2108 let n_fields = 50;
2109
2110 let non_null = Int32Array::from_value(2, 4);
2111 let mixed = Int32Array::from(vec![None, None, Some(1), None]);
2112 let fully_null = Int32Array::new_null(4);
2113
2114 let array = UnionArray::try_new(
2115 (1..)
2116 .step_by(2)
2117 .map(|i| {
2118 (
2119 i,
2120 Arc::new(Field::new(format!("f{i}"), DataType::Int32, true)),
2121 )
2122 })
2123 .take(n_fields)
2124 .collect(),
2125 vec![1, 3, 3, 5].into(),
2126 None,
2127 [
2128 Arc::new(non_null) as ArrayRef,
2129 Arc::new(mixed),
2130 Arc::new(fully_null),
2131 ]
2132 .into_iter()
2133 .cycle()
2134 .take(n_fields)
2135 .collect(),
2136 )
2137 .unwrap();
2138
2139 let result = array.logical_nulls();
2140
2141 let expected = NullBuffer::from(vec![true, false, true, false]);
2142
2143 assert_eq!(Some(expected), result);
2144 }
2145
2146 fn union_fields() -> UnionFields {
2147 [
2148 (1, Arc::new(Field::new("A", DataType::Int32, true))),
2149 (3, Arc::new(Field::new("B", DataType::Float64, true))),
2150 (4, Arc::new(Field::new("C", DataType::Utf8, true))),
2151 ]
2152 .into_iter()
2153 .collect()
2154 }
2155
2156 #[test]
2157 fn test_is_nullable() {
2158 assert!(!create_union_array(false, false).is_nullable());
2159 assert!(create_union_array(true, false).is_nullable());
2160 assert!(create_union_array(false, true).is_nullable());
2161 assert!(create_union_array(true, true).is_nullable());
2162 }
2163
2164 fn create_union_array(int_nullable: bool, float_nullable: bool) -> UnionArray {
2171 let int_array = if int_nullable {
2172 Int32Array::from(vec![Some(1), None, Some(3)])
2173 } else {
2174 Int32Array::from(vec![1, 2, 3])
2175 };
2176 let float_array = if float_nullable {
2177 Float64Array::from(vec![Some(3.2), None, Some(4.2)])
2178 } else {
2179 Float64Array::from(vec![3.2, 4.2, 5.2])
2180 };
2181 let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
2182 let offsets = [0, 0, 0].into_iter().collect::<ScalarBuffer<i32>>();
2183 let union_fields = [
2184 (0, Arc::new(Field::new("A", DataType::Int32, true))),
2185 (1, Arc::new(Field::new("B", DataType::Float64, true))),
2186 ]
2187 .into_iter()
2188 .collect::<UnionFields>();
2189
2190 let children = vec![Arc::new(int_array) as Arc<dyn Array>, Arc::new(float_array)];
2191
2192 UnionArray::try_new(union_fields, type_ids, Some(offsets), children).unwrap()
2193 }
2194}