1use crate::VariantArrayBuilder;
21use crate::type_conversion::{generic_conversion_single_value, primitive_conversion_single_value};
22use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray};
23use arrow::buffer::NullBuffer;
24use arrow::compute::cast;
25use arrow::datatypes::{
26 Date32Type, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
27 TimestampMicrosecondType, TimestampNanosecondType,
28};
29use arrow_schema::extension::ExtensionType;
30use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, TimeUnit};
31use chrono::DateTime;
32use parquet_variant::{
33 Uuid, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantDecimalType as _,
34};
35
36use std::borrow::Cow;
37use std::sync::Arc;
38
39pub struct VariantType;
44
45impl ExtensionType for VariantType {
46 const NAME: &'static str = "arrow.parquet.variant";
47
48 type Metadata = &'static str;
51
52 fn metadata(&self) -> &Self::Metadata {
53 &""
54 }
55
56 fn serialize_metadata(&self) -> Option<String> {
57 Some(String::new())
58 }
59
60 fn deserialize_metadata(_metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
61 Ok("")
62 }
63
64 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
65 if matches!(data_type, DataType::Struct(_)) {
66 Ok(())
67 } else {
68 Err(ArrowError::InvalidArgumentError(format!(
69 "VariantType only supports StructArray, got {data_type}"
70 )))
71 }
72 }
73
74 fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
75 Self.supports_data_type(data_type)?;
76 Ok(Self)
77 }
78}
79
80#[derive(Debug, Clone, PartialEq)]
212pub struct VariantArray {
213 inner: StructArray,
215
216 metadata: BinaryViewArray,
218
219 shredding_state: ShreddingState,
221}
222
223impl VariantArray {
224 pub fn try_new(inner: &dyn Array) -> Result<Self, ArrowError> {
252 let inner = cast_to_binary_view_arrays(inner)?;
255
256 let Some(inner) = inner.as_struct_opt() else {
257 return Err(ArrowError::InvalidArgumentError(
258 "Invalid VariantArray: requires StructArray as input".to_string(),
259 ));
260 };
261
262 let Some(metadata_field) = inner.column_by_name("metadata") else {
266 return Err(ArrowError::InvalidArgumentError(
267 "Invalid VariantArray: StructArray must contain a 'metadata' field".to_string(),
268 ));
269 };
270 let Some(metadata) = metadata_field.as_binary_view_opt() else {
271 return Err(ArrowError::NotYetImplemented(format!(
272 "VariantArray 'metadata' field must be BinaryView, got {}",
273 metadata_field.data_type()
274 )));
275 };
276
277 Ok(Self {
279 inner: inner.clone(),
280 metadata: metadata.clone(),
281 shredding_state: ShreddingState::try_from(inner)?,
282 })
283 }
284
285 pub(crate) fn from_parts(
286 metadata: BinaryViewArray,
287 value: Option<BinaryViewArray>,
288 typed_value: Option<ArrayRef>,
289 nulls: Option<NullBuffer>,
290 ) -> Self {
291 let mut builder =
292 StructArrayBuilder::new().with_field("metadata", Arc::new(metadata.clone()), false);
293 if let Some(value) = value.clone() {
294 builder = builder.with_field("value", Arc::new(value), true);
295 }
296 if let Some(typed_value) = typed_value.clone() {
297 builder = builder.with_field("typed_value", typed_value, true);
298 }
299 if let Some(nulls) = nulls {
300 builder = builder.with_nulls(nulls);
301 }
302
303 Self {
304 inner: builder.build(),
305 metadata,
306 shredding_state: ShreddingState::new(value, typed_value),
307 }
308 }
309
310 pub fn inner(&self) -> &StructArray {
312 &self.inner
313 }
314
315 pub fn into_inner(self) -> StructArray {
317 self.inner
318 }
319
320 pub fn shredding_state(&self) -> &ShreddingState {
322 &self.shredding_state
323 }
324
325 pub fn value(&self, index: usize) -> Variant<'_, '_> {
346 match (self.typed_value_field(), self.value_field()) {
347 (Some(typed_value), value) if typed_value.is_valid(index) => {
349 typed_value_to_variant(typed_value, value, index)
350 }
351 (_, Some(value)) if value.is_valid(index) => {
353 Variant::new(self.metadata.value(index), value.value(index))
354 }
355 _ => Variant::Null,
358 }
359 }
360
361 pub fn metadata_field(&self) -> &BinaryViewArray {
363 &self.metadata
364 }
365
366 pub fn value_field(&self) -> Option<&BinaryViewArray> {
368 self.shredding_state.value_field()
369 }
370
371 pub fn typed_value_field(&self) -> Option<&ArrayRef> {
373 self.shredding_state.typed_value_field()
374 }
375
376 pub fn field(&self, name: impl Into<String>) -> Field {
379 Field::new(
380 name.into(),
381 self.data_type().clone(),
382 self.inner.is_nullable(),
383 )
384 .with_extension_type(VariantType)
385 }
386
387 pub fn data_type(&self) -> &DataType {
389 self.inner.data_type()
390 }
391
392 pub fn slice(&self, offset: usize, length: usize) -> Self {
393 let inner = self.inner.slice(offset, length);
394 let metadata = self.metadata.slice(offset, length);
395 let shredding_state = self.shredding_state.slice(offset, length);
396 Self {
397 inner,
398 metadata,
399 shredding_state,
400 }
401 }
402
403 pub fn len(&self) -> usize {
404 self.inner.len()
405 }
406
407 pub fn is_empty(&self) -> bool {
408 self.inner.is_empty()
409 }
410
411 pub fn nulls(&self) -> Option<&NullBuffer> {
412 self.inner.nulls()
413 }
414
415 pub fn is_null(&self, index: usize) -> bool {
417 self.nulls().is_some_and(|n| n.is_null(index))
418 }
419
420 pub fn is_valid(&self, index: usize) -> bool {
422 !self.is_null(index)
423 }
424
425 pub fn iter(&self) -> VariantArrayIter<'_> {
427 VariantArrayIter::new(self)
428 }
429}
430
431impl From<VariantArray> for StructArray {
432 fn from(variant_array: VariantArray) -> Self {
433 variant_array.into_inner()
434 }
435}
436
437impl From<VariantArray> for ArrayRef {
438 fn from(variant_array: VariantArray) -> Self {
439 Arc::new(variant_array.into_inner())
440 }
441}
442
443impl<'m, 'v> FromIterator<Option<Variant<'m, 'v>>> for VariantArray {
444 fn from_iter<T: IntoIterator<Item = Option<Variant<'m, 'v>>>>(iter: T) -> Self {
445 let iter = iter.into_iter();
446
447 let mut b = VariantArrayBuilder::new(iter.size_hint().0);
448 b.extend(iter);
449 b.build()
450 }
451}
452
453impl<'m, 'v> FromIterator<Variant<'m, 'v>> for VariantArray {
454 fn from_iter<T: IntoIterator<Item = Variant<'m, 'v>>>(iter: T) -> Self {
455 Self::from_iter(iter.into_iter().map(Some))
456 }
457}
458
459#[derive(Debug)]
484pub struct VariantArrayIter<'a> {
485 array: &'a VariantArray,
486 head_i: usize,
487 tail_i: usize,
488}
489
490impl<'a> VariantArrayIter<'a> {
491 pub fn new(array: &'a VariantArray) -> Self {
493 Self {
494 array,
495 head_i: 0,
496 tail_i: array.len(),
497 }
498 }
499
500 fn value_opt(&self, i: usize) -> Option<Variant<'a, 'a>> {
501 self.array.is_valid(i).then(|| self.array.value(i))
502 }
503}
504
505impl<'a> Iterator for VariantArrayIter<'a> {
506 type Item = Option<Variant<'a, 'a>>;
507
508 #[inline]
509 fn next(&mut self) -> Option<Self::Item> {
510 if self.head_i == self.tail_i {
511 return None;
512 }
513
514 let out = self.value_opt(self.head_i);
515
516 self.head_i += 1;
517
518 Some(out)
519 }
520
521 fn size_hint(&self) -> (usize, Option<usize>) {
522 let remainder = self.tail_i - self.head_i;
523
524 (remainder, Some(remainder))
525 }
526}
527
528impl<'a> DoubleEndedIterator for VariantArrayIter<'a> {
529 fn next_back(&mut self) -> Option<Self::Item> {
530 if self.head_i == self.tail_i {
531 return None;
532 }
533
534 self.tail_i -= 1;
535
536 Some(self.value_opt(self.tail_i))
537 }
538}
539
540impl<'a> ExactSizeIterator for VariantArrayIter<'a> {}
541
542#[derive(Debug)]
577pub struct ShreddedVariantFieldArray {
578 inner: StructArray,
580 shredding_state: ShreddingState,
581}
582
583#[allow(unused)]
584impl ShreddedVariantFieldArray {
585 pub fn try_new(inner: &dyn Array) -> Result<Self, ArrowError> {
606 let Some(inner_struct) = inner.as_struct_opt() else {
607 return Err(ArrowError::InvalidArgumentError(
608 "Invalid ShreddedVariantFieldArray: requires StructArray as input".to_string(),
609 ));
610 };
611
612 Ok(Self {
614 inner: inner_struct.clone(),
615 shredding_state: ShreddingState::try_from(inner_struct)?,
616 })
617 }
618
619 pub fn shredding_state(&self) -> &ShreddingState {
621 &self.shredding_state
622 }
623
624 pub fn value_field(&self) -> Option<&BinaryViewArray> {
626 self.shredding_state.value_field()
627 }
628
629 pub fn typed_value_field(&self) -> Option<&ArrayRef> {
631 self.shredding_state.typed_value_field()
632 }
633
634 pub fn inner(&self) -> &StructArray {
636 &self.inner
637 }
638
639 pub(crate) fn from_parts(
640 value: Option<BinaryViewArray>,
641 typed_value: Option<ArrayRef>,
642 nulls: Option<NullBuffer>,
643 ) -> Self {
644 let mut builder = StructArrayBuilder::new();
645 if let Some(value) = value.clone() {
646 builder = builder.with_field("value", Arc::new(value), true);
647 }
648 if let Some(typed_value) = typed_value.clone() {
649 builder = builder.with_field("typed_value", typed_value, true);
650 }
651 if let Some(nulls) = nulls {
652 builder = builder.with_nulls(nulls);
653 }
654
655 Self {
656 inner: builder.build(),
657 shredding_state: ShreddingState::new(value, typed_value),
658 }
659 }
660
661 pub fn into_inner(self) -> StructArray {
663 self.inner
664 }
665
666 pub fn data_type(&self) -> &DataType {
667 self.inner.data_type()
668 }
669
670 pub fn len(&self) -> usize {
671 self.inner.len()
672 }
673
674 pub fn is_empty(&self) -> bool {
675 self.inner.is_empty()
676 }
677
678 pub fn offset(&self) -> usize {
679 self.inner.offset()
680 }
681
682 pub fn nulls(&self) -> Option<&NullBuffer> {
683 None
687 }
688 pub fn is_null(&self, index: usize) -> bool {
690 self.nulls().is_some_and(|n| n.is_null(index))
691 }
692
693 pub fn is_valid(&self, index: usize) -> bool {
695 !self.is_null(index)
696 }
697}
698
699impl From<ShreddedVariantFieldArray> for ArrayRef {
700 fn from(array: ShreddedVariantFieldArray) -> Self {
701 Arc::new(array.into_inner())
702 }
703}
704
705impl From<ShreddedVariantFieldArray> for StructArray {
706 fn from(array: ShreddedVariantFieldArray) -> Self {
707 array.into_inner()
708 }
709}
710
711#[derive(Debug, Clone, PartialEq)]
745pub struct ShreddingState {
746 value: Option<BinaryViewArray>,
747 typed_value: Option<ArrayRef>,
748}
749
750impl ShreddingState {
751 pub fn new(value: Option<BinaryViewArray>, typed_value: Option<ArrayRef>) -> Self {
766 Self { value, typed_value }
767 }
768
769 pub fn value_field(&self) -> Option<&BinaryViewArray> {
771 self.value.as_ref()
772 }
773
774 pub fn typed_value_field(&self) -> Option<&ArrayRef> {
776 self.typed_value.as_ref()
777 }
778
779 pub fn borrow(&self) -> BorrowedShreddingState<'_> {
781 BorrowedShreddingState {
782 value: self.value_field(),
783 typed_value: self.typed_value_field(),
784 }
785 }
786
787 pub fn slice(&self, offset: usize, length: usize) -> Self {
789 Self {
790 value: self.value.as_ref().map(|v| v.slice(offset, length)),
791 typed_value: self.typed_value.as_ref().map(|tv| tv.slice(offset, length)),
792 }
793 }
794}
795
796#[derive(Clone, Debug)]
799pub struct BorrowedShreddingState<'a> {
800 value: Option<&'a BinaryViewArray>,
801 typed_value: Option<&'a ArrayRef>,
802}
803
804impl<'a> BorrowedShreddingState<'a> {
805 pub fn new(value: Option<&'a BinaryViewArray>, typed_value: Option<&'a ArrayRef>) -> Self {
820 Self { value, typed_value }
821 }
822
823 pub fn value_field(&self) -> Option<&'a BinaryViewArray> {
825 self.value
826 }
827
828 pub fn typed_value_field(&self) -> Option<&'a ArrayRef> {
830 self.typed_value
831 }
832}
833
834impl<'a> TryFrom<&'a StructArray> for BorrowedShreddingState<'a> {
835 type Error = ArrowError;
836
837 fn try_from(inner_struct: &'a StructArray) -> Result<Self, ArrowError> {
838 let value = if let Some(value_col) = inner_struct.column_by_name("value") {
840 let Some(binary_view) = value_col.as_binary_view_opt() else {
841 return Err(ArrowError::NotYetImplemented(format!(
842 "VariantArray 'value' field must be BinaryView, got {}",
843 value_col.data_type()
844 )));
845 };
846 Some(binary_view)
847 } else {
848 None
849 };
850 let typed_value = inner_struct.column_by_name("typed_value");
851 Ok(BorrowedShreddingState::new(value, typed_value))
852 }
853}
854
855impl TryFrom<&StructArray> for ShreddingState {
856 type Error = ArrowError;
857
858 fn try_from(inner_struct: &StructArray) -> Result<Self, ArrowError> {
859 Ok(BorrowedShreddingState::try_from(inner_struct)?.into())
860 }
861}
862
863impl From<BorrowedShreddingState<'_>> for ShreddingState {
864 fn from(state: BorrowedShreddingState<'_>) -> Self {
865 ShreddingState {
866 value: state.value_field().cloned(),
867 typed_value: state.typed_value_field().cloned(),
868 }
869 }
870}
871
872#[derive(Debug, Default, Clone)]
876pub(crate) struct StructArrayBuilder {
877 fields: Vec<FieldRef>,
878 arrays: Vec<ArrayRef>,
879 nulls: Option<NullBuffer>,
880}
881
882impl StructArrayBuilder {
883 pub fn new() -> Self {
884 Default::default()
885 }
886
887 pub fn with_field(mut self, field_name: &str, array: ArrayRef, nullable: bool) -> Self {
889 let field = Field::new(field_name, array.data_type().clone(), nullable);
890 self.fields.push(Arc::new(field));
891 self.arrays.push(array);
892 self
893 }
894
895 pub fn with_nulls(mut self, nulls: NullBuffer) -> Self {
897 self.nulls = Some(nulls);
898 self
899 }
900
901 pub fn build(self) -> StructArray {
902 let Self {
903 fields,
904 arrays,
905 nulls,
906 } = self;
907 StructArray::new(Fields::from(fields), arrays, nulls)
908 }
909}
910
911fn typed_value_to_variant<'a>(
913 typed_value: &'a ArrayRef,
914 value: Option<&BinaryViewArray>,
915 index: usize,
916) -> Variant<'a, 'a> {
917 let data_type = typed_value.data_type();
918 if value.is_some_and(|v| !matches!(data_type, DataType::Struct(_)) && v.is_valid(index)) {
919 panic!("Invalid variant, conflicting value and typed_value");
921 }
922 match data_type {
923 DataType::Boolean => {
924 let boolean_array = typed_value.as_boolean();
925 let value = boolean_array.value(index);
926 Variant::from(value)
927 }
928 DataType::Date32 => {
929 let array = typed_value.as_primitive::<Date32Type>();
930 let value = array.value(index);
931 let date = Date32Type::to_naive_date(value);
932 Variant::from(date)
933 }
934 DataType::FixedSizeBinary(16) => {
936 let array = typed_value.as_fixed_size_binary();
937 let value = array.value(index);
938 Uuid::from_slice(value).unwrap().into() }
940 DataType::BinaryView => {
941 let array = typed_value.as_binary_view();
942 let value = array.value(index);
943 Variant::from(value)
944 }
945 DataType::Utf8 => {
946 let array = typed_value.as_string::<i32>();
947 let value = array.value(index);
948 Variant::from(value)
949 }
950 DataType::Int8 => {
951 primitive_conversion_single_value!(Int8Type, typed_value, index)
952 }
953 DataType::Int16 => {
954 primitive_conversion_single_value!(Int16Type, typed_value, index)
955 }
956 DataType::Int32 => {
957 primitive_conversion_single_value!(Int32Type, typed_value, index)
958 }
959 DataType::Int64 => {
960 primitive_conversion_single_value!(Int64Type, typed_value, index)
961 }
962 DataType::Float16 => {
963 primitive_conversion_single_value!(Float16Type, typed_value, index)
964 }
965 DataType::Float32 => {
966 primitive_conversion_single_value!(Float32Type, typed_value, index)
967 }
968 DataType::Float64 => {
969 primitive_conversion_single_value!(Float64Type, typed_value, index)
970 }
971 DataType::Timestamp(TimeUnit::Microsecond, Some(_)) => {
972 generic_conversion_single_value!(
973 TimestampMicrosecondType,
974 as_primitive,
975 |v| DateTime::from_timestamp_micros(v).unwrap(),
976 typed_value,
977 index
978 )
979 }
980 DataType::Timestamp(TimeUnit::Microsecond, None) => {
981 generic_conversion_single_value!(
982 TimestampMicrosecondType,
983 as_primitive,
984 |v| DateTime::from_timestamp_micros(v).unwrap().naive_utc(),
985 typed_value,
986 index
987 )
988 }
989 DataType::Timestamp(TimeUnit::Nanosecond, Some(_)) => {
990 generic_conversion_single_value!(
991 TimestampNanosecondType,
992 as_primitive,
993 DateTime::from_timestamp_nanos,
994 typed_value,
995 index
996 )
997 }
998 DataType::Timestamp(TimeUnit::Nanosecond, None) => {
999 generic_conversion_single_value!(
1000 TimestampNanosecondType,
1001 as_primitive,
1002 |v| DateTime::from_timestamp_nanos(v).naive_utc(),
1003 typed_value,
1004 index
1005 )
1006 }
1007 _ => {
1010 debug_assert!(
1014 false,
1015 "Unsupported typed_value type: {}",
1016 typed_value.data_type()
1017 );
1018 Variant::Null
1019 }
1020 }
1021}
1022
1023fn cast_to_binary_view_arrays(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
1034 let new_type = canonicalize_and_verify_data_type(array.data_type())?;
1035 if let Cow::Borrowed(_) = new_type {
1036 if let Some(array) = array.as_struct_opt() {
1037 return Ok(Arc::new(array.clone())); }
1039 }
1040 cast(array, new_type.as_ref())
1041}
1042
1043fn canonicalize_and_verify_data_type(
1047 data_type: &DataType,
1048) -> Result<Cow<'_, DataType>, ArrowError> {
1049 use DataType::*;
1050
1051 macro_rules! fail {
1053 () => {
1054 return Err(ArrowError::InvalidArgumentError(format!(
1055 "Illegal shredded value type: {data_type}"
1056 )))
1057 };
1058 }
1059 macro_rules! borrow {
1060 () => {
1061 Cow::Borrowed(data_type)
1062 };
1063 }
1064
1065 let new_data_type = match data_type {
1066 Null | Boolean => borrow!(),
1068 Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => borrow!(),
1069
1070 UInt8 | UInt16 | UInt32 | UInt64 | Float16 => fail!(),
1072
1073 Decimal64(p, s) | Decimal128(p, s)
1078 if VariantDecimal4::is_valid_precision_and_scale(p, s) =>
1079 {
1080 Cow::Owned(Decimal32(*p, *s))
1081 }
1082 Decimal128(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => {
1083 Cow::Owned(Decimal64(*p, *s))
1084 }
1085 Decimal32(p, s) if VariantDecimal4::is_valid_precision_and_scale(p, s) => borrow!(),
1086 Decimal64(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => borrow!(),
1087 Decimal128(p, s) if VariantDecimal16::is_valid_precision_and_scale(p, s) => borrow!(),
1088 Decimal32(..) | Decimal64(..) | Decimal128(..) | Decimal256(..) => fail!(),
1089
1090 Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _) => borrow!(),
1092 Timestamp(TimeUnit::Millisecond | TimeUnit::Second, _) => fail!(),
1093
1094 Date32 | Time64(TimeUnit::Microsecond) => borrow!(),
1096 Date64 | Time32(_) | Time64(_) | Duration(_) | Interval(_) => fail!(),
1097
1098 Binary => Cow::Owned(DataType::BinaryView),
1101 BinaryView | Utf8 => borrow!(),
1102
1103 FixedSizeBinary(16) => borrow!(),
1105 FixedSizeBinary(_) | FixedSizeList(..) => fail!(),
1106
1107 LargeBinary | LargeUtf8 | Utf8View | ListView(_) | LargeList(_) | LargeListView(_) => {
1109 fail!()
1110 }
1111
1112 List(field) => match canonicalize_and_verify_field(field)? {
1114 Cow::Borrowed(_) => borrow!(),
1115 Cow::Owned(new_field) => Cow::Owned(DataType::List(new_field)),
1116 },
1117 Struct(fields) => {
1119 let mut new_fields = std::collections::HashMap::new();
1122 for (i, field) in fields.iter().enumerate() {
1123 if let Cow::Owned(new_field) = canonicalize_and_verify_field(field)? {
1124 new_fields.insert(i, new_field);
1125 }
1126 }
1127
1128 if new_fields.is_empty() {
1129 borrow!()
1130 } else {
1131 let new_fields = fields
1132 .iter()
1133 .enumerate()
1134 .map(|(i, field)| new_fields.remove(&i).unwrap_or_else(|| field.clone()));
1135 Cow::Owned(DataType::Struct(new_fields.collect()))
1136 }
1137 }
1138 Map(..) | Union(..) => fail!(),
1139
1140 Dictionary(..) | RunEndEncoded(..) => fail!(),
1142 };
1143 Ok(new_data_type)
1144}
1145
1146fn canonicalize_and_verify_field(field: &Arc<Field>) -> Result<Cow<'_, Arc<Field>>, ArrowError> {
1147 let Cow::Owned(new_data_type) = canonicalize_and_verify_data_type(field.data_type())? else {
1148 return Ok(Cow::Borrowed(field));
1149 };
1150 let new_field = field.as_ref().clone().with_data_type(new_data_type);
1151 Ok(Cow::Owned(Arc::new(new_field)))
1152}
1153
1154#[cfg(test)]
1155mod test {
1156 use crate::VariantArrayBuilder;
1157
1158 use super::*;
1159 use arrow::array::{BinaryViewArray, Int32Array};
1160 use arrow_schema::{Field, Fields};
1161 use parquet_variant::ShortString;
1162
1163 #[test]
1164 fn invalid_not_a_struct_array() {
1165 let array = make_binary_view_array();
1166 let err = VariantArray::try_new(&array);
1168 assert_eq!(
1169 err.unwrap_err().to_string(),
1170 "Invalid argument error: Invalid VariantArray: requires StructArray as input"
1171 );
1172 }
1173
1174 #[test]
1175 fn invalid_missing_metadata() {
1176 let fields = Fields::from(vec![Field::new("value", DataType::BinaryView, true)]);
1177 let array = StructArray::new(fields, vec![make_binary_view_array()], None);
1178 let err = VariantArray::try_new(&array);
1180 assert_eq!(
1181 err.unwrap_err().to_string(),
1182 "Invalid argument error: Invalid VariantArray: StructArray must contain a 'metadata' field"
1183 );
1184 }
1185
1186 #[test]
1187 fn all_null_missing_value_and_typed_value() {
1188 let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
1189 let array = StructArray::new(fields, vec![make_binary_view_array()], None);
1190
1191 let variant_array = VariantArray::try_new(&array).unwrap();
1195
1196 assert!(matches!(
1198 variant_array.shredding_state(),
1199 ShreddingState {
1200 value: None,
1201 typed_value: None
1202 }
1203 ));
1204
1205 for i in 0..variant_array.len() {
1207 if variant_array.is_valid(i) {
1208 assert_eq!(variant_array.value(i), parquet_variant::Variant::Null);
1209 }
1210 }
1211 }
1212
1213 #[test]
1214 fn invalid_metadata_field_type() {
1215 let fields = Fields::from(vec![
1216 Field::new("metadata", DataType::Int32, true), Field::new("value", DataType::BinaryView, true),
1218 ]);
1219 let array = StructArray::new(
1220 fields,
1221 vec![make_int32_array(), make_binary_view_array()],
1222 None,
1223 );
1224 let err = VariantArray::try_new(&array);
1225 assert_eq!(
1226 err.unwrap_err().to_string(),
1227 "Not yet implemented: VariantArray 'metadata' field must be BinaryView, got Int32"
1228 );
1229 }
1230
1231 #[test]
1232 fn invalid_value_field_type() {
1233 let fields = Fields::from(vec![
1234 Field::new("metadata", DataType::BinaryView, true),
1235 Field::new("value", DataType::Int32, true), ]);
1237 let array = StructArray::new(
1238 fields,
1239 vec![make_binary_view_array(), make_int32_array()],
1240 None,
1241 );
1242 let err = VariantArray::try_new(&array);
1243 assert_eq!(
1244 err.unwrap_err().to_string(),
1245 "Not yet implemented: VariantArray 'value' field must be BinaryView, got Int32"
1246 );
1247 }
1248
1249 fn make_binary_view_array() -> ArrayRef {
1250 Arc::new(BinaryViewArray::from(vec![b"test" as &[u8]]))
1251 }
1252
1253 fn make_int32_array() -> ArrayRef {
1254 Arc::new(Int32Array::from(vec![1]))
1255 }
1256
1257 #[test]
1258 fn all_null_shredding_state() {
1259 assert!(matches!(
1261 ShreddingState::new(None, None),
1262 ShreddingState {
1263 value: None,
1264 typed_value: None
1265 }
1266 ));
1267 }
1268
1269 #[test]
1270 fn all_null_variant_array_construction() {
1271 let metadata = BinaryViewArray::from(vec![b"test" as &[u8]; 3]);
1272 let nulls = NullBuffer::from(vec![false, false, false]); let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
1275 let struct_array = StructArray::new(fields, vec![Arc::new(metadata)], Some(nulls));
1276
1277 let variant_array = VariantArray::try_new(&struct_array).unwrap();
1278
1279 assert!(matches!(
1281 variant_array.shredding_state(),
1282 ShreddingState {
1283 value: None,
1284 typed_value: None
1285 }
1286 ));
1287
1288 assert_eq!(variant_array.len(), 3);
1290 assert!(!variant_array.is_valid(0));
1291 assert!(!variant_array.is_valid(1));
1292 assert!(!variant_array.is_valid(2));
1293
1294 for i in 0..variant_array.len() {
1296 assert!(
1297 !variant_array.is_valid(i),
1298 "Expected value at index {i} to be null"
1299 );
1300 }
1301 }
1302
1303 #[test]
1304 fn value_field_present_but_all_null_should_be_unshredded() {
1305 let metadata = BinaryViewArray::from(vec![b"test" as &[u8]; 3]);
1308
1309 let value_nulls = NullBuffer::from(vec![false, false, false]); let value_array = BinaryViewArray::from_iter_values(vec![""; 3]);
1312 let value_data = value_array
1313 .to_data()
1314 .into_builder()
1315 .nulls(Some(value_nulls))
1316 .build()
1317 .unwrap();
1318 let value = BinaryViewArray::from(value_data);
1319
1320 let fields = Fields::from(vec![
1321 Field::new("metadata", DataType::BinaryView, false),
1322 Field::new("value", DataType::BinaryView, true), ]);
1324 let struct_array = StructArray::new(
1325 fields,
1326 vec![Arc::new(metadata), Arc::new(value)],
1327 None, );
1329
1330 let variant_array = VariantArray::try_new(&struct_array).unwrap();
1331
1332 assert!(matches!(
1334 variant_array.shredding_state(),
1335 ShreddingState {
1336 value: Some(_),
1337 typed_value: None
1338 }
1339 ));
1340 }
1341
1342 #[test]
1343 fn test_variant_array_iterable() {
1344 let mut b = VariantArrayBuilder::new(6);
1345
1346 b.append_null();
1347 b.append_variant(Variant::from(1_i8));
1348 b.append_variant(Variant::Null);
1349 b.append_variant(Variant::from(2_i32));
1350 b.append_variant(Variant::from(3_i64));
1351 b.append_null();
1352
1353 let v = b.build();
1354
1355 let variants = v.iter().collect::<Vec<_>>();
1356
1357 assert_eq!(
1358 variants,
1359 vec![
1360 None,
1361 Some(Variant::Int8(1)),
1362 Some(Variant::Null),
1363 Some(Variant::Int32(2)),
1364 Some(Variant::Int64(3)),
1365 None,
1366 ]
1367 );
1368 }
1369
1370 #[test]
1371 fn test_variant_array_iter_double_ended() {
1372 let mut b = VariantArrayBuilder::new(5);
1373
1374 b.append_variant(Variant::from(0_i32));
1375 b.append_null();
1376 b.append_variant(Variant::from(2_i32));
1377 b.append_null();
1378 b.append_variant(Variant::from(4_i32));
1379
1380 let array = b.build();
1381 let mut iter = array.iter();
1382
1383 assert_eq!(iter.next(), Some(Some(Variant::from(0_i32))));
1384 assert_eq!(iter.next(), Some(None));
1385
1386 assert_eq!(iter.next_back(), Some(Some(Variant::from(4_i32))));
1387 assert_eq!(iter.next_back(), Some(None));
1388 assert_eq!(iter.next_back(), Some(Some(Variant::from(2_i32))));
1389
1390 assert_eq!(iter.next_back(), None);
1391 assert_eq!(iter.next(), None);
1392 }
1393
1394 #[test]
1395 fn test_variant_array_iter_reverse() {
1396 let mut b = VariantArrayBuilder::new(5);
1397
1398 b.append_variant(Variant::from("a"));
1399 b.append_null();
1400 b.append_variant(Variant::from("aaa"));
1401 b.append_null();
1402 b.append_variant(Variant::from("aaaaa"));
1403
1404 let array = b.build();
1405
1406 let result: Vec<_> = array.iter().rev().collect();
1407 assert_eq!(
1408 result,
1409 vec![
1410 Some(Variant::from("aaaaa")),
1411 None,
1412 Some(Variant::from("aaa")),
1413 None,
1414 Some(Variant::from("a")),
1415 ]
1416 );
1417 }
1418
1419 #[test]
1420 fn test_variant_array_iter_empty() {
1421 let v = VariantArrayBuilder::new(0).build();
1422 let mut i = v.iter();
1423 assert!(i.next().is_none());
1424 assert!(i.next_back().is_none());
1425 }
1426
1427 #[test]
1428 fn test_from_variant_opts_into_variant_array() {
1429 let v = vec![None, Some(Variant::Null), Some(Variant::BooleanFalse), None];
1430
1431 let variant_array = VariantArray::from_iter(v);
1432
1433 assert_eq!(variant_array.len(), 4);
1434
1435 assert!(variant_array.is_null(0));
1436
1437 assert!(!variant_array.is_null(1));
1438 assert_eq!(variant_array.value(1), Variant::Null);
1439
1440 assert!(!variant_array.is_null(2));
1441 assert_eq!(variant_array.value(2), Variant::BooleanFalse);
1442
1443 assert!(variant_array.is_null(3));
1444 }
1445
1446 #[test]
1447 fn test_from_variants_into_variant_array() {
1448 let v = vec![
1449 Variant::Null,
1450 Variant::BooleanFalse,
1451 Variant::ShortString(ShortString::try_new("norm").unwrap()),
1452 ];
1453
1454 let variant_array = VariantArray::from_iter(v);
1455
1456 assert_eq!(variant_array.len(), 3);
1457
1458 assert!(!variant_array.is_null(0));
1459 assert_eq!(variant_array.value(0), Variant::Null);
1460
1461 assert!(!variant_array.is_null(1));
1462 assert_eq!(variant_array.value(1), Variant::BooleanFalse);
1463
1464 assert!(!variant_array.is_null(3));
1465 assert_eq!(
1466 variant_array.value(2),
1467 Variant::ShortString(ShortString::try_new("norm").unwrap())
1468 );
1469 }
1470
1471 #[test]
1472 fn test_variant_equality() {
1473 let v_iter = [None, Some(Variant::BooleanFalse), Some(Variant::Null), None];
1474 let v = VariantArray::from_iter(v_iter.clone());
1475
1476 {
1477 let v_copy = v.clone();
1478 assert_eq!(v, v_copy);
1479 }
1480
1481 {
1482 let v_iter_reversed = v_iter.iter().cloned().rev();
1483 let v_reversed = VariantArray::from_iter(v_iter_reversed);
1484
1485 assert_ne!(v, v_reversed);
1486 }
1487
1488 {
1489 let v_sliced = v.slice(0, 1);
1490 assert_ne!(v, v_sliced);
1491 }
1492 }
1493}