parquet_variant_compute/
variant_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`VariantArray`] implementation
19
20use crate::VariantArrayBuilder;
21use crate::type_conversion::{generic_conversion_single_value, primitive_conversion_single_value};
22use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray};
23use arrow::buffer::NullBuffer;
24use arrow::compute::cast;
25use arrow::datatypes::{
26    Date32Type, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
27    TimestampMicrosecondType, TimestampNanosecondType,
28};
29use arrow_schema::extension::ExtensionType;
30use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, TimeUnit};
31use chrono::DateTime;
32use parquet_variant::{
33    Uuid, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantDecimalType as _,
34};
35
36use std::borrow::Cow;
37use std::sync::Arc;
38
39/// Arrow Variant [`ExtensionType`].
40///
41/// Represents the canonical Arrow Extension Type for storing variants.
42/// See [`VariantArray`] for more examples of using this extension type.
43pub struct VariantType;
44
45impl ExtensionType for VariantType {
46    const NAME: &'static str = "arrow.parquet.variant";
47
48    // Variants extension metadata is an empty string
49    // <https://github.com/apache/arrow/blob/d803afcc43f5d132506318fd9e162d33b2c3d4cd/docs/source/format/CanonicalExtensions.rst?plain=1#L473>
50    type Metadata = &'static str;
51
52    fn metadata(&self) -> &Self::Metadata {
53        &""
54    }
55
56    fn serialize_metadata(&self) -> Option<String> {
57        Some(String::new())
58    }
59
60    fn deserialize_metadata(_metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
61        Ok("")
62    }
63
64    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
65        if matches!(data_type, DataType::Struct(_)) {
66            Ok(())
67        } else {
68            Err(ArrowError::InvalidArgumentError(format!(
69                "VariantType only supports StructArray, got {data_type}"
70            )))
71        }
72    }
73
74    fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
75        Self.supports_data_type(data_type)?;
76        Ok(Self)
77    }
78}
79
80/// An array of Parquet [`Variant`] values
81///
82/// A [`VariantArray`] wraps an Arrow [`StructArray`] that stores the underlying
83/// `metadata` and `value` fields, and adds convenience methods to access
84/// the [`Variant`]s.
85///
86/// See [`VariantArrayBuilder`] for constructing `VariantArray` row by row.
87///
88/// See the examples below from converting between `VariantArray` and
89/// `StructArray`.
90///
91/// [`VariantArrayBuilder`]: crate::VariantArrayBuilder
92///
93/// # Documentation
94///
95/// At the time of this writing, Variant has been accepted as an official
96/// extension type but not been published to the [official list of extension
97/// types] on the Apache Arrow website. See the [Extension Type for Parquet
98/// Variant arrow] ticket for more details.
99///
100/// [Extension Type for Parquet Variant arrow]: https://github.com/apache/arrow/issues/46908
101/// [official list of extension types]: https://arrow.apache.org/docs/format/CanonicalExtensions.html
102///
103/// # Example: Check if a [`StructArray`] has the [`VariantType`] extension
104///
105/// Arrow Arrays only provide [`DataType`], but the extension type information
106/// is stored on a [`Field`]. Thus, you must have access to the [`Schema`] or
107/// [`Field`] to check for the extension type.
108///
109/// [`Schema`]: arrow_schema::Schema
110/// ```
111/// # use arrow::array::StructArray;
112/// # use arrow_schema::{Schema, Field, DataType};
113/// # use parquet_variant::Variant;
114/// # use parquet_variant_compute::{VariantArrayBuilder, VariantArray, VariantType};
115/// # fn get_variant_array() -> VariantArray {
116/// #   let mut builder = VariantArrayBuilder::new(10);
117/// #   builder.append_variant(Variant::from("such wow"));
118/// #   builder.build()
119/// # }
120/// # fn get_schema() -> Schema {
121/// #   Schema::new(vec![
122/// #     Field::new("id", DataType::Int32, false),
123/// #     get_variant_array().field("var"),
124/// #   ])
125/// # }
126/// let schema = get_schema();
127/// assert_eq!(schema.fields().len(), 2);
128/// // first field is not a Variant
129/// assert!(schema.field(0).try_extension_type::<VariantType>().is_err());
130/// // second field is a Variant
131/// assert!(schema.field(1).try_extension_type::<VariantType>().is_ok());
132/// ```
133///
134/// # Example: Constructing the correct [`Field`] for a [`VariantArray`]
135///
136/// You can construct the correct [`Field`] for a [`VariantArray`] using the
137/// [`VariantArray::field`] method.
138///
139/// ```
140/// # use arrow_schema::{Schema, Field, DataType};
141/// # use parquet_variant::Variant;
142/// # use parquet_variant_compute::{VariantArrayBuilder, VariantArray, VariantType};
143/// # fn get_variant_array() -> VariantArray {
144/// #   let mut builder = VariantArrayBuilder::new(10);
145/// #   builder.append_variant(Variant::from("such wow"));
146/// #   builder.build()
147/// # }
148/// let variant_array = get_variant_array();
149/// // First field is an integer id, second field is a variant
150/// let schema = Schema::new(vec![
151///   Field::new("id", DataType::Int32, false),
152///   // call VariantArray::field to get the correct Field
153///   variant_array.field("var"),
154/// ]);
155/// ```
156///
157/// You can also construct the [`Field`] using [`VariantType`] directly
158///
159/// ```
160/// # use arrow_schema::{Schema, Field, DataType};
161/// # use parquet_variant::Variant;
162/// # use parquet_variant_compute::{VariantArrayBuilder, VariantArray, VariantType};
163/// # fn get_variant_array() -> VariantArray {
164/// #   let mut builder = VariantArrayBuilder::new(10);
165/// #   builder.append_variant(Variant::from("such wow"));
166/// #   builder.build()
167/// # }
168/// # let variant_array = get_variant_array();
169/// // The DataType of a VariantArray varies depending on how it is shredded
170/// let data_type = variant_array.data_type().clone();
171/// // First field is an integer id, second field is a variant
172/// let schema = Schema::new(vec![
173///   Field::new("id", DataType::Int32, false),
174///   Field::new("var", data_type, false)
175///     // Add extension metadata to the field using `VariantType`
176///     .with_extension_type(VariantType),
177/// ]);
178/// ```
179///
180/// # Example: Converting a [`VariantArray`] to a [`StructArray`]
181///
182/// ```
183/// # use arrow::array::StructArray;
184/// # use parquet_variant::Variant;
185/// # use parquet_variant_compute::VariantArrayBuilder;
186/// // Create Variant Array
187/// let mut builder = VariantArrayBuilder::new(10);
188/// builder.append_variant(Variant::from("such wow"));
189/// let variant_array = builder.build();
190/// // convert to StructArray
191/// let struct_array: StructArray = variant_array.into();
192/// ```
193///
194/// # Example: Converting a [`StructArray`] to a [`VariantArray`]
195///
196/// ```
197/// # use arrow::array::StructArray;
198/// # use parquet_variant::Variant;
199/// # use parquet_variant_compute::{VariantArrayBuilder, VariantArray};
200/// # fn get_struct_array() -> StructArray {
201/// #   let mut builder = VariantArrayBuilder::new(10);
202/// #   builder.append_variant(Variant::from("such wow"));
203/// #   builder.build().into()
204/// # }
205/// let struct_array: StructArray = get_struct_array();
206/// // try and create a VariantArray from it
207/// let variant_array = VariantArray::try_new(&struct_array).unwrap();
208/// assert_eq!(variant_array.value(0), Variant::from("such wow"));
209/// ```
210///
211#[derive(Debug, Clone, PartialEq)]
212pub struct VariantArray {
213    /// Reference to the underlying StructArray
214    inner: StructArray,
215
216    /// The metadata column of this variant
217    metadata: BinaryViewArray,
218
219    /// how is this variant array shredded?
220    shredding_state: ShreddingState,
221}
222
223impl VariantArray {
224    /// Creates a new `VariantArray` from a [`StructArray`].
225    ///
226    /// # Arguments
227    /// - `inner` - The underlying [`StructArray`] that contains the variant data.
228    ///
229    /// # Returns
230    /// - A new instance of `VariantArray`.
231    ///
232    /// # Errors:
233    /// - If the `StructArray` does not contain the required fields
234    ///
235    /// # Requirements of the `StructArray`
236    ///
237    /// 1. A required field named `metadata` which is binary, large_binary, or
238    ///    binary_view
239    ///
240    /// 2. An optional field named `value` that is binary, large_binary, or
241    ///    binary_view
242    ///
243    /// 3. An optional field named `typed_value` which can be any primitive type
244    ///    or be a list, large_list, list_view or struct
245    ///
246    /// NOTE: It is also permissible for the metadata field to be
247    /// Dictionary-Encoded, preferably (but not required) with an index type of
248    /// int8.
249    ///
250    /// Currently, only [`BinaryViewArray`] are supported.
251    pub fn try_new(inner: &dyn Array) -> Result<Self, ArrowError> {
252        // Workaround lack of support for Binary
253        // https://github.com/apache/arrow-rs/issues/8387
254        let inner = cast_to_binary_view_arrays(inner)?;
255
256        let Some(inner) = inner.as_struct_opt() else {
257            return Err(ArrowError::InvalidArgumentError(
258                "Invalid VariantArray: requires StructArray as input".to_string(),
259            ));
260        };
261
262        // Note the specification allows for any order so we must search by name
263
264        // Ensure the StructArray has a metadata field of BinaryView
265        let Some(metadata_field) = inner.column_by_name("metadata") else {
266            return Err(ArrowError::InvalidArgumentError(
267                "Invalid VariantArray: StructArray must contain a 'metadata' field".to_string(),
268            ));
269        };
270        let Some(metadata) = metadata_field.as_binary_view_opt() else {
271            return Err(ArrowError::NotYetImplemented(format!(
272                "VariantArray 'metadata' field must be BinaryView, got {}",
273                metadata_field.data_type()
274            )));
275        };
276
277        // Note these clones are cheap, they just bump the ref count
278        Ok(Self {
279            inner: inner.clone(),
280            metadata: metadata.clone(),
281            shredding_state: ShreddingState::try_from(inner)?,
282        })
283    }
284
285    pub(crate) fn from_parts(
286        metadata: BinaryViewArray,
287        value: Option<BinaryViewArray>,
288        typed_value: Option<ArrayRef>,
289        nulls: Option<NullBuffer>,
290    ) -> Self {
291        let mut builder =
292            StructArrayBuilder::new().with_field("metadata", Arc::new(metadata.clone()), false);
293        if let Some(value) = value.clone() {
294            builder = builder.with_field("value", Arc::new(value), true);
295        }
296        if let Some(typed_value) = typed_value.clone() {
297            builder = builder.with_field("typed_value", typed_value, true);
298        }
299        if let Some(nulls) = nulls {
300            builder = builder.with_nulls(nulls);
301        }
302
303        Self {
304            inner: builder.build(),
305            metadata,
306            shredding_state: ShreddingState::new(value, typed_value),
307        }
308    }
309
310    /// Returns a reference to the underlying [`StructArray`].
311    pub fn inner(&self) -> &StructArray {
312        &self.inner
313    }
314
315    /// Returns the inner [`StructArray`], consuming self
316    pub fn into_inner(self) -> StructArray {
317        self.inner
318    }
319
320    /// Return the shredding state of this `VariantArray`
321    pub fn shredding_state(&self) -> &ShreddingState {
322        &self.shredding_state
323    }
324
325    /// Return the [`Variant`] instance stored at the given row
326    ///
327    /// Note: This method does not check for nulls and the value is arbitrary
328    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
329    ///
330    /// # Panics
331    /// * if the index is out of bounds
332    /// * if the array value is null
333    ///
334    /// If this is a shredded variant but has no value at the shredded location, it
335    /// will return [`Variant::Null`].
336    ///
337    ///
338    /// # Performance Note
339    ///
340    /// This is certainly not the most efficient way to access values in a
341    /// `VariantArray`, but it is useful for testing and debugging.
342    ///
343    /// Note: Does not do deep validation of the [`Variant`], so it is up to the
344    /// caller to ensure that the metadata and value were constructed correctly.
345    pub fn value(&self, index: usize) -> Variant<'_, '_> {
346        match (self.typed_value_field(), self.value_field()) {
347            // Always prefer typed_value, if available
348            (Some(typed_value), value) if typed_value.is_valid(index) => {
349                typed_value_to_variant(typed_value, value, index)
350            }
351            // Otherwise fall back to value, if available
352            (_, Some(value)) if value.is_valid(index) => {
353                Variant::new(self.metadata.value(index), value.value(index))
354            }
355            // It is technically invalid for neither value nor typed_value fields to be available,
356            // but the spec specifically requires readers to return Variant::Null in this case.
357            _ => Variant::Null,
358        }
359    }
360
361    /// Return a reference to the metadata field of the [`StructArray`]
362    pub fn metadata_field(&self) -> &BinaryViewArray {
363        &self.metadata
364    }
365
366    /// Return a reference to the value field of the `StructArray`
367    pub fn value_field(&self) -> Option<&BinaryViewArray> {
368        self.shredding_state.value_field()
369    }
370
371    /// Return a reference to the typed_value field of the `StructArray`, if present
372    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
373        self.shredding_state.typed_value_field()
374    }
375
376    /// Return a field to represent this VariantArray in a `Schema` with
377    /// a particular name
378    pub fn field(&self, name: impl Into<String>) -> Field {
379        Field::new(
380            name.into(),
381            self.data_type().clone(),
382            self.inner.is_nullable(),
383        )
384        .with_extension_type(VariantType)
385    }
386
387    /// Returns a new DataType representing this VariantArray's inner type
388    pub fn data_type(&self) -> &DataType {
389        self.inner.data_type()
390    }
391
392    pub fn slice(&self, offset: usize, length: usize) -> Self {
393        let inner = self.inner.slice(offset, length);
394        let metadata = self.metadata.slice(offset, length);
395        let shredding_state = self.shredding_state.slice(offset, length);
396        Self {
397            inner,
398            metadata,
399            shredding_state,
400        }
401    }
402
403    pub fn len(&self) -> usize {
404        self.inner.len()
405    }
406
407    pub fn is_empty(&self) -> bool {
408        self.inner.is_empty()
409    }
410
411    pub fn nulls(&self) -> Option<&NullBuffer> {
412        self.inner.nulls()
413    }
414
415    /// Is the element at index null?
416    pub fn is_null(&self, index: usize) -> bool {
417        self.nulls().is_some_and(|n| n.is_null(index))
418    }
419
420    /// Is the element at index valid (not null)?
421    pub fn is_valid(&self, index: usize) -> bool {
422        !self.is_null(index)
423    }
424
425    /// Returns an iterator over the values in this array
426    pub fn iter(&self) -> VariantArrayIter<'_> {
427        VariantArrayIter::new(self)
428    }
429}
430
431impl From<VariantArray> for StructArray {
432    fn from(variant_array: VariantArray) -> Self {
433        variant_array.into_inner()
434    }
435}
436
437impl From<VariantArray> for ArrayRef {
438    fn from(variant_array: VariantArray) -> Self {
439        Arc::new(variant_array.into_inner())
440    }
441}
442
443impl<'m, 'v> FromIterator<Option<Variant<'m, 'v>>> for VariantArray {
444    fn from_iter<T: IntoIterator<Item = Option<Variant<'m, 'v>>>>(iter: T) -> Self {
445        let iter = iter.into_iter();
446
447        let mut b = VariantArrayBuilder::new(iter.size_hint().0);
448        b.extend(iter);
449        b.build()
450    }
451}
452
453impl<'m, 'v> FromIterator<Variant<'m, 'v>> for VariantArray {
454    fn from_iter<T: IntoIterator<Item = Variant<'m, 'v>>>(iter: T) -> Self {
455        Self::from_iter(iter.into_iter().map(Some))
456    }
457}
458
459/// An iterator over [`VariantArray`]
460///
461/// This iterator returns `Option<Option<Variant<'a, 'a>>>` where:
462/// - `None` indicates the end of iteration
463/// - `Some(None)` indicates a null value at this position
464/// - `Some(Some(variant))` indicates a valid variant value
465///
466/// # Example
467///
468/// ```
469/// # use parquet_variant::Variant;
470/// # use parquet_variant_compute::VariantArrayBuilder;
471/// let mut builder = VariantArrayBuilder::new(10);
472/// builder.append_variant(Variant::from(42));
473/// builder.append_null();
474/// builder.append_variant(Variant::from("hello"));
475/// let array = builder.build();
476///
477/// let values = array.iter().collect::<Vec<_>>();
478/// assert_eq!(values.len(), 3);
479/// assert_eq!(values[0], Some(Variant::from(42)));
480/// assert_eq!(values[1], None);
481/// assert_eq!(values[2], Some(Variant::from("hello")));
482/// ```
483#[derive(Debug)]
484pub struct VariantArrayIter<'a> {
485    array: &'a VariantArray,
486    head_i: usize,
487    tail_i: usize,
488}
489
490impl<'a> VariantArrayIter<'a> {
491    /// Creates a new iterator over the given [`VariantArray`]
492    pub fn new(array: &'a VariantArray) -> Self {
493        Self {
494            array,
495            head_i: 0,
496            tail_i: array.len(),
497        }
498    }
499
500    fn value_opt(&self, i: usize) -> Option<Variant<'a, 'a>> {
501        self.array.is_valid(i).then(|| self.array.value(i))
502    }
503}
504
505impl<'a> Iterator for VariantArrayIter<'a> {
506    type Item = Option<Variant<'a, 'a>>;
507
508    #[inline]
509    fn next(&mut self) -> Option<Self::Item> {
510        if self.head_i == self.tail_i {
511            return None;
512        }
513
514        let out = self.value_opt(self.head_i);
515
516        self.head_i += 1;
517
518        Some(out)
519    }
520
521    fn size_hint(&self) -> (usize, Option<usize>) {
522        let remainder = self.tail_i - self.head_i;
523
524        (remainder, Some(remainder))
525    }
526}
527
528impl<'a> DoubleEndedIterator for VariantArrayIter<'a> {
529    fn next_back(&mut self) -> Option<Self::Item> {
530        if self.head_i == self.tail_i {
531            return None;
532        }
533
534        self.tail_i -= 1;
535
536        Some(self.value_opt(self.tail_i))
537    }
538}
539
540impl<'a> ExactSizeIterator for VariantArrayIter<'a> {}
541
542/// One shredded field of a partially or prefectly shredded variant. For example, suppose the
543/// shredding schema for variant `v` treats it as an object with a single field `a`, where `a` is
544/// itself a struct with the single field `b` of type INT. Then the physical layout of the column
545/// is:
546///
547/// ```text
548/// v: VARIANT {
549///     metadata: BINARY,
550///     value: BINARY,
551///     typed_value: STRUCT {
552///         a: SHREDDED_VARIANT_FIELD {
553///             value: BINARY,
554///             typed_value: STRUCT {
555///                 a: SHREDDED_VARIANT_FIELD {
556///                     value: BINARY,
557///                     typed_value: INT,
558///                 },
559///             },
560///         },
561///     },
562/// }
563/// ```
564///
565/// In the above, each row of `v.value` is either a variant value (shredding failed, `v` was not an
566/// object at all) or a variant object (partial shredding, `v` was an object but included unexpected
567/// fields other than `a`), or is NULL (perfect shredding, `v` was an object containing only the
568/// single expected field `a`).
569///
570/// A similar story unfolds for each `v.typed_value.a.value` -- a variant value if shredding failed
571/// (`v:a` was not an object at all), or a variant object (`v:a` was an object with unexpected
572/// additional fields), or NULL (`v:a` was an object containing only the single expected field `b`).
573///
574/// Finally, `v.typed_value.a.typed_value.b.value` is either NULL (`v:a.b` was an integer) or else a
575/// variant value (which could be `Variant::Null`).
576#[derive(Debug)]
577pub struct ShreddedVariantFieldArray {
578    /// Reference to the underlying StructArray
579    inner: StructArray,
580    shredding_state: ShreddingState,
581}
582
583#[allow(unused)]
584impl ShreddedVariantFieldArray {
585    /// Creates a new `ShreddedVariantFieldArray` from a [`StructArray`].
586    ///
587    /// # Arguments
588    /// - `inner` - The underlying [`StructArray`] that contains the variant data.
589    ///
590    /// # Returns
591    /// - A new instance of `ShreddedVariantFieldArray`.
592    ///
593    /// # Errors:
594    /// - If the `StructArray` does not contain the required fields
595    ///
596    /// # Requirements of the `StructArray`
597    ///
598    /// 1. An optional field named `value` that is binary, large_binary, or
599    ///    binary_view
600    ///
601    /// 2. An optional field named `typed_value` which can be any primitive type
602    ///    or be a list, large_list, list_view or struct
603    ///
604    /// Currently, only `value` columns of type [`BinaryViewArray`] are supported.
605    pub fn try_new(inner: &dyn Array) -> Result<Self, ArrowError> {
606        let Some(inner_struct) = inner.as_struct_opt() else {
607            return Err(ArrowError::InvalidArgumentError(
608                "Invalid ShreddedVariantFieldArray: requires StructArray as input".to_string(),
609            ));
610        };
611
612        // Note this clone is cheap, it just bumps the ref count
613        Ok(Self {
614            inner: inner_struct.clone(),
615            shredding_state: ShreddingState::try_from(inner_struct)?,
616        })
617    }
618
619    /// Return the shredding state of this `VariantArray`
620    pub fn shredding_state(&self) -> &ShreddingState {
621        &self.shredding_state
622    }
623
624    /// Return a reference to the value field of the `StructArray`
625    pub fn value_field(&self) -> Option<&BinaryViewArray> {
626        self.shredding_state.value_field()
627    }
628
629    /// Return a reference to the typed_value field of the `StructArray`, if present
630    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
631        self.shredding_state.typed_value_field()
632    }
633
634    /// Returns a reference to the underlying [`StructArray`].
635    pub fn inner(&self) -> &StructArray {
636        &self.inner
637    }
638
639    pub(crate) fn from_parts(
640        value: Option<BinaryViewArray>,
641        typed_value: Option<ArrayRef>,
642        nulls: Option<NullBuffer>,
643    ) -> Self {
644        let mut builder = StructArrayBuilder::new();
645        if let Some(value) = value.clone() {
646            builder = builder.with_field("value", Arc::new(value), true);
647        }
648        if let Some(typed_value) = typed_value.clone() {
649            builder = builder.with_field("typed_value", typed_value, true);
650        }
651        if let Some(nulls) = nulls {
652            builder = builder.with_nulls(nulls);
653        }
654
655        Self {
656            inner: builder.build(),
657            shredding_state: ShreddingState::new(value, typed_value),
658        }
659    }
660
661    /// Returns the inner [`StructArray`], consuming self
662    pub fn into_inner(self) -> StructArray {
663        self.inner
664    }
665
666    pub fn data_type(&self) -> &DataType {
667        self.inner.data_type()
668    }
669
670    pub fn len(&self) -> usize {
671        self.inner.len()
672    }
673
674    pub fn is_empty(&self) -> bool {
675        self.inner.is_empty()
676    }
677
678    pub fn offset(&self) -> usize {
679        self.inner.offset()
680    }
681
682    pub fn nulls(&self) -> Option<&NullBuffer> {
683        // According to the shredding spec, ShreddedVariantFieldArray should be
684        // physically non-nullable - SQL NULL is inferred by both value and
685        // typed_value being physically NULL
686        None
687    }
688    /// Is the element at index null?
689    pub fn is_null(&self, index: usize) -> bool {
690        self.nulls().is_some_and(|n| n.is_null(index))
691    }
692
693    /// Is the element at index valid (not null)?
694    pub fn is_valid(&self, index: usize) -> bool {
695        !self.is_null(index)
696    }
697}
698
699impl From<ShreddedVariantFieldArray> for ArrayRef {
700    fn from(array: ShreddedVariantFieldArray) -> Self {
701        Arc::new(array.into_inner())
702    }
703}
704
705impl From<ShreddedVariantFieldArray> for StructArray {
706    fn from(array: ShreddedVariantFieldArray) -> Self {
707        array.into_inner()
708    }
709}
710
711/// Represents the shredding state of a [`VariantArray`]
712///
713/// [`VariantArray`]s can be shredded according to the [Parquet Variant
714/// Shredding Spec]. Shredding means that the actual value is stored in a typed
715/// `typed_field` instead of the generic `value` field.
716///
717/// Both value and typed_value are optional fields used together to encode a
718/// single value. Values in the two fields must be interpreted according to the
719/// following table (see [Parquet Variant Shredding Spec] for more details):
720///
721/// | value    | typed_value  | Meaning |
722/// |----------|--------------|---------|
723/// | NULL     | NULL         | The value is missing; only valid for shredded object fields |
724/// | non-NULL | NULL         | The value is present and may be any type, including [`Variant::Null`] |
725/// | NULL     | non-NULL     | The value is present and is the shredded type |
726/// | non-NULL | non-NULL     | The value is present and is a partially shredded object |
727///
728///
729/// Applying the above rules to entire columns, we obtain the following:
730///
731/// | value  | typed_value  | Meaning |
732/// |--------|-------------|---------|
733/// | --     | --          | **Missing**: The value is always missing; only valid for shredded object fields |
734/// | exists | --          | **Unshredded**: If present, the value may be any type, including [`Variant::Null`]
735/// | --     | exists      | **Perfectly shredded**: If present, the value is always the shredded type |
736/// | exists | exists      | **Imperfectly shredded**: The value might (not) be present and might (not) be the shredded type |
737///
738/// NOTE: Partial shredding is a row-wise situation that can arise under imperfect shredding (a
739/// column-wise situation): When both columns exist (imperfect shredding) and the typed_value column
740/// is a struct, then both columns can be non-NULL for the same row if value is a variant object
741/// (partial shredding).
742///
743/// [Parquet Variant Shredding Spec]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md#value-shredding
744#[derive(Debug, Clone, PartialEq)]
745pub struct ShreddingState {
746    value: Option<BinaryViewArray>,
747    typed_value: Option<ArrayRef>,
748}
749
750impl ShreddingState {
751    /// Create a new `ShreddingState` from the given `value` and `typed_value` fields
752    ///
753    /// Note you can create a `ShreddingState` from a &[`StructArray`] using
754    /// `ShreddingState::try_from(&struct_array)`, for example:
755    ///
756    /// ```no_run
757    /// # use arrow::array::StructArray;
758    /// # use parquet_variant_compute::ShreddingState;
759    /// # fn get_struct_array() -> StructArray {
760    /// #   unimplemented!()
761    /// # }
762    /// let struct_array: StructArray = get_struct_array();
763    /// let shredding_state = ShreddingState::try_from(&struct_array).unwrap();
764    /// ```
765    pub fn new(value: Option<BinaryViewArray>, typed_value: Option<ArrayRef>) -> Self {
766        Self { value, typed_value }
767    }
768
769    /// Return a reference to the value field, if present
770    pub fn value_field(&self) -> Option<&BinaryViewArray> {
771        self.value.as_ref()
772    }
773
774    /// Return a reference to the typed_value field, if present
775    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
776        self.typed_value.as_ref()
777    }
778
779    /// Returns a borrowed version of this shredding state
780    pub fn borrow(&self) -> BorrowedShreddingState<'_> {
781        BorrowedShreddingState {
782            value: self.value_field(),
783            typed_value: self.typed_value_field(),
784        }
785    }
786
787    /// Slice all the underlying arrays
788    pub fn slice(&self, offset: usize, length: usize) -> Self {
789        Self {
790            value: self.value.as_ref().map(|v| v.slice(offset, length)),
791            typed_value: self.typed_value.as_ref().map(|tv| tv.slice(offset, length)),
792        }
793    }
794}
795
796/// Similar to [`ShreddingState`] except it holds borrowed references of the target arrays. Useful
797/// for avoiding clone operations when the caller does not need a self-standing shredding state.
798#[derive(Clone, Debug)]
799pub struct BorrowedShreddingState<'a> {
800    value: Option<&'a BinaryViewArray>,
801    typed_value: Option<&'a ArrayRef>,
802}
803
804impl<'a> BorrowedShreddingState<'a> {
805    /// Create a new `BorrowedShreddingState` from the given `value` and `typed_value` fields
806    ///
807    /// Note you can create a `BorrowedShreddingState` from a &[`StructArray`] using
808    /// `BorrowedShreddingState::try_from(&struct_array)`, for example:
809    ///
810    /// ```no_run
811    /// # use arrow::array::StructArray;
812    /// # use parquet_variant_compute::BorrowedShreddingState;
813    /// # fn get_struct_array() -> StructArray {
814    /// #   unimplemented!()
815    /// # }
816    /// let struct_array: StructArray = get_struct_array();
817    /// let shredding_state = BorrowedShreddingState::try_from(&struct_array).unwrap();
818    /// ```
819    pub fn new(value: Option<&'a BinaryViewArray>, typed_value: Option<&'a ArrayRef>) -> Self {
820        Self { value, typed_value }
821    }
822
823    /// Return a reference to the value field, if present
824    pub fn value_field(&self) -> Option<&'a BinaryViewArray> {
825        self.value
826    }
827
828    /// Return a reference to the typed_value field, if present
829    pub fn typed_value_field(&self) -> Option<&'a ArrayRef> {
830        self.typed_value
831    }
832}
833
834impl<'a> TryFrom<&'a StructArray> for BorrowedShreddingState<'a> {
835    type Error = ArrowError;
836
837    fn try_from(inner_struct: &'a StructArray) -> Result<Self, ArrowError> {
838        // The `value` column need not exist, but if it does it must be a binary view.
839        let value = if let Some(value_col) = inner_struct.column_by_name("value") {
840            let Some(binary_view) = value_col.as_binary_view_opt() else {
841                return Err(ArrowError::NotYetImplemented(format!(
842                    "VariantArray 'value' field must be BinaryView, got {}",
843                    value_col.data_type()
844                )));
845            };
846            Some(binary_view)
847        } else {
848            None
849        };
850        let typed_value = inner_struct.column_by_name("typed_value");
851        Ok(BorrowedShreddingState::new(value, typed_value))
852    }
853}
854
855impl TryFrom<&StructArray> for ShreddingState {
856    type Error = ArrowError;
857
858    fn try_from(inner_struct: &StructArray) -> Result<Self, ArrowError> {
859        Ok(BorrowedShreddingState::try_from(inner_struct)?.into())
860    }
861}
862
863impl From<BorrowedShreddingState<'_>> for ShreddingState {
864    fn from(state: BorrowedShreddingState<'_>) -> Self {
865        ShreddingState {
866            value: state.value_field().cloned(),
867            typed_value: state.typed_value_field().cloned(),
868        }
869    }
870}
871
872/// Builds struct arrays from component fields
873///
874/// TODO: move to arrow crate
875#[derive(Debug, Default, Clone)]
876pub(crate) struct StructArrayBuilder {
877    fields: Vec<FieldRef>,
878    arrays: Vec<ArrayRef>,
879    nulls: Option<NullBuffer>,
880}
881
882impl StructArrayBuilder {
883    pub fn new() -> Self {
884        Default::default()
885    }
886
887    /// Add an array to this struct array as a field with the specified name.
888    pub fn with_field(mut self, field_name: &str, array: ArrayRef, nullable: bool) -> Self {
889        let field = Field::new(field_name, array.data_type().clone(), nullable);
890        self.fields.push(Arc::new(field));
891        self.arrays.push(array);
892        self
893    }
894
895    /// Set the null buffer for this struct array.
896    pub fn with_nulls(mut self, nulls: NullBuffer) -> Self {
897        self.nulls = Some(nulls);
898        self
899    }
900
901    pub fn build(self) -> StructArray {
902        let Self {
903            fields,
904            arrays,
905            nulls,
906        } = self;
907        StructArray::new(Fields::from(fields), arrays, nulls)
908    }
909}
910
911/// returns the non-null element at index as a Variant
912fn typed_value_to_variant<'a>(
913    typed_value: &'a ArrayRef,
914    value: Option<&BinaryViewArray>,
915    index: usize,
916) -> Variant<'a, 'a> {
917    let data_type = typed_value.data_type();
918    if value.is_some_and(|v| !matches!(data_type, DataType::Struct(_)) && v.is_valid(index)) {
919        // Only a partially shredded struct is allowed to have values for both columns
920        panic!("Invalid variant, conflicting value and typed_value");
921    }
922    match data_type {
923        DataType::Boolean => {
924            let boolean_array = typed_value.as_boolean();
925            let value = boolean_array.value(index);
926            Variant::from(value)
927        }
928        DataType::Date32 => {
929            let array = typed_value.as_primitive::<Date32Type>();
930            let value = array.value(index);
931            let date = Date32Type::to_naive_date(value);
932            Variant::from(date)
933        }
934        // 16-byte FixedSizeBinary alway corresponds to a UUID; all other sizes are illegal.
935        DataType::FixedSizeBinary(16) => {
936            let array = typed_value.as_fixed_size_binary();
937            let value = array.value(index);
938            Uuid::from_slice(value).unwrap().into() // unwrap is safe: slice is always 16 bytes
939        }
940        DataType::BinaryView => {
941            let array = typed_value.as_binary_view();
942            let value = array.value(index);
943            Variant::from(value)
944        }
945        DataType::Utf8 => {
946            let array = typed_value.as_string::<i32>();
947            let value = array.value(index);
948            Variant::from(value)
949        }
950        DataType::Int8 => {
951            primitive_conversion_single_value!(Int8Type, typed_value, index)
952        }
953        DataType::Int16 => {
954            primitive_conversion_single_value!(Int16Type, typed_value, index)
955        }
956        DataType::Int32 => {
957            primitive_conversion_single_value!(Int32Type, typed_value, index)
958        }
959        DataType::Int64 => {
960            primitive_conversion_single_value!(Int64Type, typed_value, index)
961        }
962        DataType::Float16 => {
963            primitive_conversion_single_value!(Float16Type, typed_value, index)
964        }
965        DataType::Float32 => {
966            primitive_conversion_single_value!(Float32Type, typed_value, index)
967        }
968        DataType::Float64 => {
969            primitive_conversion_single_value!(Float64Type, typed_value, index)
970        }
971        DataType::Timestamp(TimeUnit::Microsecond, Some(_)) => {
972            generic_conversion_single_value!(
973                TimestampMicrosecondType,
974                as_primitive,
975                |v| DateTime::from_timestamp_micros(v).unwrap(),
976                typed_value,
977                index
978            )
979        }
980        DataType::Timestamp(TimeUnit::Microsecond, None) => {
981            generic_conversion_single_value!(
982                TimestampMicrosecondType,
983                as_primitive,
984                |v| DateTime::from_timestamp_micros(v).unwrap().naive_utc(),
985                typed_value,
986                index
987            )
988        }
989        DataType::Timestamp(TimeUnit::Nanosecond, Some(_)) => {
990            generic_conversion_single_value!(
991                TimestampNanosecondType,
992                as_primitive,
993                DateTime::from_timestamp_nanos,
994                typed_value,
995                index
996            )
997        }
998        DataType::Timestamp(TimeUnit::Nanosecond, None) => {
999            generic_conversion_single_value!(
1000                TimestampNanosecondType,
1001                as_primitive,
1002                |v| DateTime::from_timestamp_nanos(v).naive_utc(),
1003                typed_value,
1004                index
1005            )
1006        }
1007        // todo other types here (note this is very similar to cast_to_variant.rs)
1008        // so it would be great to figure out how to share this code
1009        _ => {
1010            // We shouldn't panic in production code, but this is a
1011            // placeholder until we implement more types
1012            // https://github.com/apache/arrow-rs/issues/8091
1013            debug_assert!(
1014                false,
1015                "Unsupported typed_value type: {}",
1016                typed_value.data_type()
1017            );
1018            Variant::Null
1019        }
1020    }
1021}
1022
1023/// Workaround for lack of direct support for BinaryArray
1024/// <https://github.com/apache/arrow-rs/issues/8387>
1025///
1026/// The values are read as
1027/// * `StructArray<metadata: Binary, value: Binary>`
1028///
1029/// but VariantArray needs them as
1030/// * `StructArray<metadata: BinaryView, value: BinaryView>`
1031///
1032/// So cast them to get the right type.
1033fn cast_to_binary_view_arrays(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
1034    let new_type = canonicalize_and_verify_data_type(array.data_type())?;
1035    if let Cow::Borrowed(_) = new_type {
1036        if let Some(array) = array.as_struct_opt() {
1037            return Ok(Arc::new(array.clone())); // bypass the unnecessary cast
1038        }
1039    }
1040    cast(array, new_type.as_ref())
1041}
1042
1043/// Recursively visits a data type, ensuring that it only contains data types that can legally
1044/// appear in a (possibly shredded) variant array. It also replaces Binary fields with BinaryView,
1045/// since that's what comes back from the parquet reader and what the variant code expects to find.
1046fn canonicalize_and_verify_data_type(
1047    data_type: &DataType,
1048) -> Result<Cow<'_, DataType>, ArrowError> {
1049    use DataType::*;
1050
1051    // helper macros
1052    macro_rules! fail {
1053        () => {
1054            return Err(ArrowError::InvalidArgumentError(format!(
1055                "Illegal shredded value type: {data_type}"
1056            )))
1057        };
1058    }
1059    macro_rules! borrow {
1060        () => {
1061            Cow::Borrowed(data_type)
1062        };
1063    }
1064
1065    let new_data_type = match data_type {
1066        // Primitive arrow types that have a direct variant counterpart are allowed
1067        Null | Boolean => borrow!(),
1068        Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => borrow!(),
1069
1070        // Unsigned integers and half-float are not allowed
1071        UInt8 | UInt16 | UInt32 | UInt64 | Float16 => fail!(),
1072
1073        // Most decimal types are allowed, with restrictions on precision and scale
1074        //
1075        // NOTE: arrow-parquet reads widens 32- and 64-bit decimals to 128-bit, but the variant spec
1076        // requires using the narrowest decimal type for a given precision. Fix those up first.
1077        Decimal64(p, s) | Decimal128(p, s)
1078            if VariantDecimal4::is_valid_precision_and_scale(p, s) =>
1079        {
1080            Cow::Owned(Decimal32(*p, *s))
1081        }
1082        Decimal128(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => {
1083            Cow::Owned(Decimal64(*p, *s))
1084        }
1085        Decimal32(p, s) if VariantDecimal4::is_valid_precision_and_scale(p, s) => borrow!(),
1086        Decimal64(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => borrow!(),
1087        Decimal128(p, s) if VariantDecimal16::is_valid_precision_and_scale(p, s) => borrow!(),
1088        Decimal32(..) | Decimal64(..) | Decimal128(..) | Decimal256(..) => fail!(),
1089
1090        // Only micro and nano timestamps are allowed
1091        Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _) => borrow!(),
1092        Timestamp(TimeUnit::Millisecond | TimeUnit::Second, _) => fail!(),
1093
1094        // Only 32-bit dates and 64-bit microsecond time are allowed.
1095        Date32 | Time64(TimeUnit::Microsecond) => borrow!(),
1096        Date64 | Time32(_) | Time64(_) | Duration(_) | Interval(_) => fail!(),
1097
1098        // Binary and string are allowed. Force Binary to BinaryView because that's what the parquet
1099        // reader returns and what the rest of the variant code expects.
1100        Binary => Cow::Owned(DataType::BinaryView),
1101        BinaryView | Utf8 => borrow!(),
1102
1103        // UUID maps to 16-byte fixed-size binary; no other width is allowed
1104        FixedSizeBinary(16) => borrow!(),
1105        FixedSizeBinary(_) | FixedSizeList(..) => fail!(),
1106
1107        // We can _possibly_ allow (some of) these some day?
1108        LargeBinary | LargeUtf8 | Utf8View | ListView(_) | LargeList(_) | LargeListView(_) => {
1109            fail!()
1110        }
1111
1112        // Lists and struct are allowed, maps and unions are not
1113        List(field) => match canonicalize_and_verify_field(field)? {
1114            Cow::Borrowed(_) => borrow!(),
1115            Cow::Owned(new_field) => Cow::Owned(DataType::List(new_field)),
1116        },
1117        // Struct is used by the internal layout, and can also represent a shredded variant object.
1118        Struct(fields) => {
1119            // Avoid allocation unless at least one field changes, to avoid unnecessary deep cloning
1120            // of the data type. Even if some fields change, the others are shallow arc clones.
1121            let mut new_fields = std::collections::HashMap::new();
1122            for (i, field) in fields.iter().enumerate() {
1123                if let Cow::Owned(new_field) = canonicalize_and_verify_field(field)? {
1124                    new_fields.insert(i, new_field);
1125                }
1126            }
1127
1128            if new_fields.is_empty() {
1129                borrow!()
1130            } else {
1131                let new_fields = fields
1132                    .iter()
1133                    .enumerate()
1134                    .map(|(i, field)| new_fields.remove(&i).unwrap_or_else(|| field.clone()));
1135                Cow::Owned(DataType::Struct(new_fields.collect()))
1136            }
1137        }
1138        Map(..) | Union(..) => fail!(),
1139
1140        // We can _possibly_ support (some of) these some day?
1141        Dictionary(..) | RunEndEncoded(..) => fail!(),
1142    };
1143    Ok(new_data_type)
1144}
1145
1146fn canonicalize_and_verify_field(field: &Arc<Field>) -> Result<Cow<'_, Arc<Field>>, ArrowError> {
1147    let Cow::Owned(new_data_type) = canonicalize_and_verify_data_type(field.data_type())? else {
1148        return Ok(Cow::Borrowed(field));
1149    };
1150    let new_field = field.as_ref().clone().with_data_type(new_data_type);
1151    Ok(Cow::Owned(Arc::new(new_field)))
1152}
1153
1154#[cfg(test)]
1155mod test {
1156    use crate::VariantArrayBuilder;
1157
1158    use super::*;
1159    use arrow::array::{BinaryViewArray, Int32Array};
1160    use arrow_schema::{Field, Fields};
1161    use parquet_variant::ShortString;
1162
1163    #[test]
1164    fn invalid_not_a_struct_array() {
1165        let array = make_binary_view_array();
1166        // Should fail because the input is not a StructArray
1167        let err = VariantArray::try_new(&array);
1168        assert_eq!(
1169            err.unwrap_err().to_string(),
1170            "Invalid argument error: Invalid VariantArray: requires StructArray as input"
1171        );
1172    }
1173
1174    #[test]
1175    fn invalid_missing_metadata() {
1176        let fields = Fields::from(vec![Field::new("value", DataType::BinaryView, true)]);
1177        let array = StructArray::new(fields, vec![make_binary_view_array()], None);
1178        // Should fail because the StructArray does not contain a 'metadata' field
1179        let err = VariantArray::try_new(&array);
1180        assert_eq!(
1181            err.unwrap_err().to_string(),
1182            "Invalid argument error: Invalid VariantArray: StructArray must contain a 'metadata' field"
1183        );
1184    }
1185
1186    #[test]
1187    fn all_null_missing_value_and_typed_value() {
1188        let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
1189        let array = StructArray::new(fields, vec![make_binary_view_array()], None);
1190
1191        // NOTE: By strict spec interpretation, this case (top-level variant with null/null)
1192        // should be invalid, but we currently allow it and treat it as Variant::Null.
1193        // This is a pragmatic decision to handle missing data gracefully.
1194        let variant_array = VariantArray::try_new(&array).unwrap();
1195
1196        // Verify the shredding state is AllNull
1197        assert!(matches!(
1198            variant_array.shredding_state(),
1199            ShreddingState {
1200                value: None,
1201                typed_value: None
1202            }
1203        ));
1204
1205        // Verify that value() returns Variant::Null (compensating for spec violation)
1206        for i in 0..variant_array.len() {
1207            if variant_array.is_valid(i) {
1208                assert_eq!(variant_array.value(i), parquet_variant::Variant::Null);
1209            }
1210        }
1211    }
1212
1213    #[test]
1214    fn invalid_metadata_field_type() {
1215        let fields = Fields::from(vec![
1216            Field::new("metadata", DataType::Int32, true), // not supported
1217            Field::new("value", DataType::BinaryView, true),
1218        ]);
1219        let array = StructArray::new(
1220            fields,
1221            vec![make_int32_array(), make_binary_view_array()],
1222            None,
1223        );
1224        let err = VariantArray::try_new(&array);
1225        assert_eq!(
1226            err.unwrap_err().to_string(),
1227            "Not yet implemented: VariantArray 'metadata' field must be BinaryView, got Int32"
1228        );
1229    }
1230
1231    #[test]
1232    fn invalid_value_field_type() {
1233        let fields = Fields::from(vec![
1234            Field::new("metadata", DataType::BinaryView, true),
1235            Field::new("value", DataType::Int32, true), // Not yet supported
1236        ]);
1237        let array = StructArray::new(
1238            fields,
1239            vec![make_binary_view_array(), make_int32_array()],
1240            None,
1241        );
1242        let err = VariantArray::try_new(&array);
1243        assert_eq!(
1244            err.unwrap_err().to_string(),
1245            "Not yet implemented: VariantArray 'value' field must be BinaryView, got Int32"
1246        );
1247    }
1248
1249    fn make_binary_view_array() -> ArrayRef {
1250        Arc::new(BinaryViewArray::from(vec![b"test" as &[u8]]))
1251    }
1252
1253    fn make_int32_array() -> ArrayRef {
1254        Arc::new(Int32Array::from(vec![1]))
1255    }
1256
1257    #[test]
1258    fn all_null_shredding_state() {
1259        // Verify the shredding state is AllNull
1260        assert!(matches!(
1261            ShreddingState::new(None, None),
1262            ShreddingState {
1263                value: None,
1264                typed_value: None
1265            }
1266        ));
1267    }
1268
1269    #[test]
1270    fn all_null_variant_array_construction() {
1271        let metadata = BinaryViewArray::from(vec![b"test" as &[u8]; 3]);
1272        let nulls = NullBuffer::from(vec![false, false, false]); // all null
1273
1274        let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
1275        let struct_array = StructArray::new(fields, vec![Arc::new(metadata)], Some(nulls));
1276
1277        let variant_array = VariantArray::try_new(&struct_array).unwrap();
1278
1279        // Verify the shredding state is AllNull
1280        assert!(matches!(
1281            variant_array.shredding_state(),
1282            ShreddingState {
1283                value: None,
1284                typed_value: None
1285            }
1286        ));
1287
1288        // Verify all values are null
1289        assert_eq!(variant_array.len(), 3);
1290        assert!(!variant_array.is_valid(0));
1291        assert!(!variant_array.is_valid(1));
1292        assert!(!variant_array.is_valid(2));
1293
1294        // Verify that value() returns Variant::Null for all indices
1295        for i in 0..variant_array.len() {
1296            assert!(
1297                !variant_array.is_valid(i),
1298                "Expected value at index {i} to be null"
1299            );
1300        }
1301    }
1302
1303    #[test]
1304    fn value_field_present_but_all_null_should_be_unshredded() {
1305        // This test demonstrates the issue: when a value field exists in schema
1306        // but all its values are null, it should remain Unshredded, not AllNull
1307        let metadata = BinaryViewArray::from(vec![b"test" as &[u8]; 3]);
1308
1309        // Create a value field with all null values
1310        let value_nulls = NullBuffer::from(vec![false, false, false]); // all null
1311        let value_array = BinaryViewArray::from_iter_values(vec![""; 3]);
1312        let value_data = value_array
1313            .to_data()
1314            .into_builder()
1315            .nulls(Some(value_nulls))
1316            .build()
1317            .unwrap();
1318        let value = BinaryViewArray::from(value_data);
1319
1320        let fields = Fields::from(vec![
1321            Field::new("metadata", DataType::BinaryView, false),
1322            Field::new("value", DataType::BinaryView, true), // Field exists in schema
1323        ]);
1324        let struct_array = StructArray::new(
1325            fields,
1326            vec![Arc::new(metadata), Arc::new(value)],
1327            None, // struct itself is not null, just the value field is all null
1328        );
1329
1330        let variant_array = VariantArray::try_new(&struct_array).unwrap();
1331
1332        // This should be Unshredded, not AllNull, because value field exists in schema
1333        assert!(matches!(
1334            variant_array.shredding_state(),
1335            ShreddingState {
1336                value: Some(_),
1337                typed_value: None
1338            }
1339        ));
1340    }
1341
1342    #[test]
1343    fn test_variant_array_iterable() {
1344        let mut b = VariantArrayBuilder::new(6);
1345
1346        b.append_null();
1347        b.append_variant(Variant::from(1_i8));
1348        b.append_variant(Variant::Null);
1349        b.append_variant(Variant::from(2_i32));
1350        b.append_variant(Variant::from(3_i64));
1351        b.append_null();
1352
1353        let v = b.build();
1354
1355        let variants = v.iter().collect::<Vec<_>>();
1356
1357        assert_eq!(
1358            variants,
1359            vec![
1360                None,
1361                Some(Variant::Int8(1)),
1362                Some(Variant::Null),
1363                Some(Variant::Int32(2)),
1364                Some(Variant::Int64(3)),
1365                None,
1366            ]
1367        );
1368    }
1369
1370    #[test]
1371    fn test_variant_array_iter_double_ended() {
1372        let mut b = VariantArrayBuilder::new(5);
1373
1374        b.append_variant(Variant::from(0_i32));
1375        b.append_null();
1376        b.append_variant(Variant::from(2_i32));
1377        b.append_null();
1378        b.append_variant(Variant::from(4_i32));
1379
1380        let array = b.build();
1381        let mut iter = array.iter();
1382
1383        assert_eq!(iter.next(), Some(Some(Variant::from(0_i32))));
1384        assert_eq!(iter.next(), Some(None));
1385
1386        assert_eq!(iter.next_back(), Some(Some(Variant::from(4_i32))));
1387        assert_eq!(iter.next_back(), Some(None));
1388        assert_eq!(iter.next_back(), Some(Some(Variant::from(2_i32))));
1389
1390        assert_eq!(iter.next_back(), None);
1391        assert_eq!(iter.next(), None);
1392    }
1393
1394    #[test]
1395    fn test_variant_array_iter_reverse() {
1396        let mut b = VariantArrayBuilder::new(5);
1397
1398        b.append_variant(Variant::from("a"));
1399        b.append_null();
1400        b.append_variant(Variant::from("aaa"));
1401        b.append_null();
1402        b.append_variant(Variant::from("aaaaa"));
1403
1404        let array = b.build();
1405
1406        let result: Vec<_> = array.iter().rev().collect();
1407        assert_eq!(
1408            result,
1409            vec![
1410                Some(Variant::from("aaaaa")),
1411                None,
1412                Some(Variant::from("aaa")),
1413                None,
1414                Some(Variant::from("a")),
1415            ]
1416        );
1417    }
1418
1419    #[test]
1420    fn test_variant_array_iter_empty() {
1421        let v = VariantArrayBuilder::new(0).build();
1422        let mut i = v.iter();
1423        assert!(i.next().is_none());
1424        assert!(i.next_back().is_none());
1425    }
1426
1427    #[test]
1428    fn test_from_variant_opts_into_variant_array() {
1429        let v = vec![None, Some(Variant::Null), Some(Variant::BooleanFalse), None];
1430
1431        let variant_array = VariantArray::from_iter(v);
1432
1433        assert_eq!(variant_array.len(), 4);
1434
1435        assert!(variant_array.is_null(0));
1436
1437        assert!(!variant_array.is_null(1));
1438        assert_eq!(variant_array.value(1), Variant::Null);
1439
1440        assert!(!variant_array.is_null(2));
1441        assert_eq!(variant_array.value(2), Variant::BooleanFalse);
1442
1443        assert!(variant_array.is_null(3));
1444    }
1445
1446    #[test]
1447    fn test_from_variants_into_variant_array() {
1448        let v = vec![
1449            Variant::Null,
1450            Variant::BooleanFalse,
1451            Variant::ShortString(ShortString::try_new("norm").unwrap()),
1452        ];
1453
1454        let variant_array = VariantArray::from_iter(v);
1455
1456        assert_eq!(variant_array.len(), 3);
1457
1458        assert!(!variant_array.is_null(0));
1459        assert_eq!(variant_array.value(0), Variant::Null);
1460
1461        assert!(!variant_array.is_null(1));
1462        assert_eq!(variant_array.value(1), Variant::BooleanFalse);
1463
1464        assert!(!variant_array.is_null(3));
1465        assert_eq!(
1466            variant_array.value(2),
1467            Variant::ShortString(ShortString::try_new("norm").unwrap())
1468        );
1469    }
1470
1471    #[test]
1472    fn test_variant_equality() {
1473        let v_iter = [None, Some(Variant::BooleanFalse), Some(Variant::Null), None];
1474        let v = VariantArray::from_iter(v_iter.clone());
1475
1476        {
1477            let v_copy = v.clone();
1478            assert_eq!(v, v_copy);
1479        }
1480
1481        {
1482            let v_iter_reversed = v_iter.iter().cloned().rev();
1483            let v_reversed = VariantArray::from_iter(v_iter_reversed);
1484
1485            assert_ne!(v, v_reversed);
1486        }
1487
1488        {
1489            let v_sliced = v.slice(0, 1);
1490            assert_ne!(v, v_sliced);
1491        }
1492    }
1493}