Skip to main content

vortex_array/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Encodings that enable zero-copy sharing of data with Arrow.
5
6use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::ArraySlots;
17use crate::Executable;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::array::ArrayView;
21use crate::array::child_to_validity;
22use crate::arrays::Bool;
23use crate::arrays::BoolArray;
24use crate::arrays::Decimal;
25use crate::arrays::DecimalArray;
26use crate::arrays::Extension;
27use crate::arrays::ExtensionArray;
28use crate::arrays::FixedSizeList;
29use crate::arrays::FixedSizeListArray;
30use crate::arrays::ListView;
31use crate::arrays::ListViewArray;
32use crate::arrays::Null;
33use crate::arrays::NullArray;
34use crate::arrays::Primitive;
35use crate::arrays::PrimitiveArray;
36use crate::arrays::Struct;
37use crate::arrays::StructArray;
38use crate::arrays::VarBinView;
39use crate::arrays::VarBinViewArray;
40use crate::arrays::Variant;
41use crate::arrays::VariantArray;
42use crate::arrays::bool::BoolDataParts;
43use crate::arrays::decimal::DecimalDataParts;
44use crate::arrays::extension::ExtensionArrayExt;
45use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
46use crate::arrays::listview::ListViewDataParts;
47use crate::arrays::listview::ListViewRebuildMode;
48use crate::arrays::primitive::PrimitiveDataParts;
49use crate::arrays::struct_::StructDataParts;
50use crate::arrays::varbinview::VarBinViewDataParts;
51use crate::arrays::variant::VariantArrayExt;
52use crate::dtype::DType;
53use crate::dtype::NativePType;
54use crate::dtype::Nullability;
55use crate::dtype::PType;
56use crate::match_each_decimal_value_type;
57use crate::match_each_native_ptype;
58use crate::matcher::Matcher;
59use crate::validity::Validity;
60
61/// An enum capturing the default uncompressed encodings for each [Vortex type](DType).
62///
63/// Any array can be decoded into canonical form via the `to_canonical`
64/// trait method. This is the simplest encoding for a type, and will not be compressed but may
65/// contain compressed child arrays.
66///
67/// Canonical form is useful for doing type-specific compute where you need to know that all
68/// elements are laid out decompressed and contiguous in memory.
69///
70/// Each `Canonical` variant has a corresponding [`DType`] variant, with the notable exception of
71/// [`Canonical::VarBinView`], which is the canonical encoding for both [`DType::Utf8`] and
72/// [`DType::Binary`]. [`DType::Union`] does not yet have a public canonical array.
73///
74/// # Laziness
75///
76/// Canonical form is not recursive, so while a `StructArray` is the canonical format for any
77/// `Struct` type, individual column child arrays may still be compressed. This allows
78/// compute over Vortex arrays to push decoding as late as possible, and ideally many child arrays
79/// never need to be decoded into canonical form at all depending on the compute.
80///
81/// # Arrow interoperability
82///
83/// All of the Vortex canonical encodings have an equivalent Arrow encoding that can be built
84/// zero-copy, and the corresponding Arrow array types can also be built directly.
85///
86/// The full list of canonical types and their equivalent Arrow array types are:
87///
88/// * `NullArray`: [`arrow_array::NullArray`]
89/// * `BoolArray`: [`arrow_array::BooleanArray`]
90/// * `PrimitiveArray`: [`arrow_array::PrimitiveArray`]
91/// * `DecimalArray`: [`arrow_array::Decimal128Array`] and [`arrow_array::Decimal256Array`]
92/// * `VarBinViewArray`: [`arrow_array::GenericByteViewArray`]
93/// * `ListViewArray`: [`arrow_array::ListViewArray`]
94/// * `FixedSizeListArray`: [`arrow_array::FixedSizeListArray`]
95/// * `StructArray`: [`arrow_array::StructArray`]
96///
97/// Vortex uses a logical type system, unlike Arrow which uses physical encodings for its types.
98/// As an example, there are at least six valid physical encodings for a `Utf8` array. This can
99/// create ambiguity.
100/// Thus, if you receive an Arrow array, compress it using Vortex, and then
101/// decompress it later to pass to a compute kernel, there are multiple suitable Arrow array
102/// variants to hold the data.
103///
104/// To disambiguate, we choose a canonical physical encoding for every Vortex [`DType`], which
105/// will correspond to an arrow-rs [`arrow_schema::DataType`].
106///
107/// # Views support
108///
109/// Binary and String views, also known as "German strings" are a better encoding format for
110/// nearly all use-cases. Variable-length binary views are part of the Apache Arrow spec, and are
111/// fully supported by the Datafusion query engine. We use them as our canonical string encoding
112/// for all `Utf8` and `Binary` typed arrays in Vortex. They provide considerably faster filter
113/// execution than the core `StringArray` and `BinaryArray` types, at the expense of potentially
114/// needing [garbage collection][arrow_array::GenericByteViewArray::gc] to clear unreferenced items
115/// from memory.
116///
117/// # For Developers
118///
119/// If you add another variant to this enum, make sure to update `dyn Array::is_canonical`,
120/// and the fuzzer in `fuzz/fuzz_targets/array_ops.rs`.
121#[derive(Debug, Clone)]
122pub enum Canonical {
123    Null(NullArray),
124    Bool(BoolArray),
125    Primitive(PrimitiveArray),
126    Decimal(DecimalArray),
127    VarBinView(VarBinViewArray),
128    List(ListViewArray),
129    FixedSizeList(FixedSizeListArray),
130    Struct(StructArray),
131    /// Canonical storage for extension dtypes, wrapping the canonical form of the storage dtype.
132    Extension(ExtensionArray),
133    /// Canonical storage for dynamic variant values, optionally with typed shredded paths.
134    Variant(VariantArray),
135}
136
137/// Match on every canonical variant and evaluate a code block on all variants
138macro_rules! match_each_canonical {
139    ($self:expr, | $ident:ident | $eval:expr) => {{
140        match $self {
141            Canonical::Null($ident) => $eval,
142            Canonical::Bool($ident) => $eval,
143            Canonical::Primitive($ident) => $eval,
144            Canonical::Decimal($ident) => $eval,
145            Canonical::VarBinView($ident) => $eval,
146            Canonical::List($ident) => $eval,
147            Canonical::FixedSizeList($ident) => $eval,
148            Canonical::Struct($ident) => $eval,
149            Canonical::Variant($ident) => $eval,
150            Canonical::Extension($ident) => $eval,
151        }
152    }};
153}
154
155impl Canonical {
156    /// Create an empty canonical array of the given dtype.
157    pub fn empty(dtype: &DType) -> Canonical {
158        match dtype {
159            DType::Null => Canonical::Null(NullArray::new(0)),
160            DType::Bool(n) => Canonical::Bool(unsafe {
161                BoolArray::new_unchecked(BitBuffer::empty(), Validity::from(n))
162            }),
163            DType::Primitive(ptype, n) => {
164                match_each_native_ptype!(ptype, |P| {
165                    Canonical::Primitive(unsafe {
166                        PrimitiveArray::new_unchecked(Buffer::<P>::empty(), Validity::from(n))
167                    })
168                })
169            }
170            DType::Decimal(decimal_type, n) => {
171                match_each_decimal_value_type!(
172                    DecimalType::smallest_decimal_value_type(decimal_type),
173                    |D| {
174                        Canonical::Decimal(unsafe {
175                            DecimalArray::new_unchecked::<D>(
176                                Buffer::empty(),
177                                *decimal_type,
178                                Validity::from(n),
179                            )
180                        })
181                    }
182                )
183            }
184            DType::Utf8(n) => Canonical::VarBinView(unsafe {
185                VarBinViewArray::new_unchecked(
186                    Buffer::empty(),
187                    Arc::new([]),
188                    dtype.clone(),
189                    Validity::from(n),
190                )
191            }),
192            DType::Binary(n) => Canonical::VarBinView(unsafe {
193                VarBinViewArray::new_unchecked(
194                    Buffer::empty(),
195                    Arc::new([]),
196                    dtype.clone(),
197                    Validity::from(n),
198                )
199            }),
200            DType::List(dtype, n) => Canonical::List(unsafe {
201                ListViewArray::new_unchecked(
202                    Canonical::empty(dtype).into_array(),
203                    Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
204                        .into_array(),
205                    Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
206                        .into_array(),
207                    Validity::from(n),
208                )
209                // An empty list view is trivially copyable to a list.
210                .with_zero_copy_to_list(true)
211            }),
212            DType::FixedSizeList(elem_dtype, list_size, null) => Canonical::FixedSizeList(unsafe {
213                FixedSizeListArray::new_unchecked(
214                    Canonical::empty(elem_dtype).into_array(),
215                    *list_size,
216                    Validity::from(null),
217                    0,
218                )
219            }),
220            DType::Struct(struct_dtype, n) => Canonical::Struct(unsafe {
221                StructArray::new_unchecked(
222                    struct_dtype
223                        .fields()
224                        .map(|f| Canonical::empty(&f).into_array())
225                        .collect::<Arc<[_]>>(),
226                    struct_dtype.clone(),
227                    0,
228                    Validity::from(n),
229                )
230            }),
231            DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
232            DType::Variant(_) => {
233                vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant")
234            }
235            DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new(
236                ext_dtype.clone(),
237                Canonical::empty(ext_dtype.storage_dtype()).into_array(),
238            )),
239        }
240    }
241
242    pub fn len(&self) -> usize {
243        match_each_canonical!(self, |arr| arr.len())
244    }
245
246    pub fn dtype(&self) -> &DType {
247        match_each_canonical!(self, |arr| arr.dtype())
248    }
249
250    pub fn is_empty(&self) -> bool {
251        match_each_canonical!(self, |arr| arr.is_empty())
252    }
253}
254
255impl Canonical {
256    /// Performs a (potentially expensive) compaction operation on the array before it is complete.
257    ///
258    /// This is mostly relevant for the variable-length types such as Utf8, Binary or List where
259    /// they can accumulate wasted space after slicing and taking operations.
260    ///
261    /// This operation is very expensive and can result in things like allocations, full-scans
262    /// and copy operations.
263    pub fn compact(&self, ctx: &mut ExecutionCtx) -> VortexResult<Canonical> {
264        match self {
265            Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
266            Canonical::List(array) => Ok(Canonical::List(
267                array.rebuild(ListViewRebuildMode::TrimElements, ctx)?,
268            )),
269            _ => Ok(self.clone()),
270        }
271    }
272}
273
274// Unwrap canonical type back down to specialized type.
275impl Canonical {
276    pub fn as_null(&self) -> &NullArray {
277        if let Canonical::Null(a) = self {
278            a
279        } else {
280            vortex_panic!("Cannot get NullArray from {:?}", &self)
281        }
282    }
283
284    pub fn into_null(self) -> NullArray {
285        if let Canonical::Null(a) = self {
286            a
287        } else {
288            vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
289        }
290    }
291
292    pub fn as_bool(&self) -> &BoolArray {
293        if let Canonical::Bool(a) = self {
294            a
295        } else {
296            vortex_panic!("Cannot get BoolArray from {:?}", &self)
297        }
298    }
299
300    pub fn into_bool(self) -> BoolArray {
301        if let Canonical::Bool(a) = self {
302            a
303        } else {
304            vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
305        }
306    }
307
308    pub fn as_primitive(&self) -> &PrimitiveArray {
309        if let Canonical::Primitive(a) = self {
310            a
311        } else {
312            vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
313        }
314    }
315
316    pub fn into_primitive(self) -> PrimitiveArray {
317        if let Canonical::Primitive(a) = self {
318            a
319        } else {
320            vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
321        }
322    }
323
324    pub fn as_decimal(&self) -> &DecimalArray {
325        if let Canonical::Decimal(a) = self {
326            a
327        } else {
328            vortex_panic!("Cannot get DecimalArray from {:?}", &self)
329        }
330    }
331
332    pub fn into_decimal(self) -> DecimalArray {
333        if let Canonical::Decimal(a) = self {
334            a
335        } else {
336            vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
337        }
338    }
339
340    pub fn as_varbinview(&self) -> &VarBinViewArray {
341        if let Canonical::VarBinView(a) = self {
342            a
343        } else {
344            vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
345        }
346    }
347
348    pub fn into_varbinview(self) -> VarBinViewArray {
349        if let Canonical::VarBinView(a) = self {
350            a
351        } else {
352            vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
353        }
354    }
355
356    pub fn as_listview(&self) -> &ListViewArray {
357        if let Canonical::List(a) = self {
358            a
359        } else {
360            vortex_panic!("Cannot get ListArray from {:?}", &self)
361        }
362    }
363
364    pub fn into_listview(self) -> ListViewArray {
365        if let Canonical::List(a) = self {
366            a
367        } else {
368            vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
369        }
370    }
371
372    pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
373        if let Canonical::FixedSizeList(a) = self {
374            a
375        } else {
376            vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
377        }
378    }
379
380    pub fn into_fixed_size_list(self) -> FixedSizeListArray {
381        if let Canonical::FixedSizeList(a) = self {
382            a
383        } else {
384            vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
385        }
386    }
387
388    pub fn as_struct(&self) -> &StructArray {
389        if let Canonical::Struct(a) = self {
390            a
391        } else {
392            vortex_panic!("Cannot get StructArray from {:?}", &self)
393        }
394    }
395
396    pub fn into_struct(self) -> StructArray {
397        if let Canonical::Struct(a) = self {
398            a
399        } else {
400            vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
401        }
402    }
403
404    pub fn as_extension(&self) -> &ExtensionArray {
405        if let Canonical::Extension(a) = self {
406            a
407        } else {
408            vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
409        }
410    }
411
412    pub fn into_extension(self) -> ExtensionArray {
413        if let Canonical::Extension(a) = self {
414            a
415        } else {
416            vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
417        }
418    }
419}
420
421impl IntoArray for Canonical {
422    fn into_array(self) -> ArrayRef {
423        match_each_canonical!(self, |arr| arr.into_array())
424    }
425}
426
427/// Trait for types that can be converted from an owned type into an owned array variant.
428///
429/// # Canonicalization
430///
431/// This trait has a blanket implementation for all types implementing [ToCanonical].
432#[deprecated(note = "use `array.execute::<T>(ctx)` instead")]
433pub trait ToCanonical {
434    /// Canonicalize into a [`NullArray`] if the target is [`Null`](DType::Null) typed.
435    #[deprecated(note = "use `array.execute::<NullArray>(ctx)` instead")]
436    fn to_null(&self) -> NullArray;
437
438    /// Canonicalize into a [`BoolArray`] if the target is [`Bool`](DType::Bool) typed.
439    #[deprecated(note = "use `array.execute::<BoolArray>(ctx)` instead")]
440    fn to_bool(&self) -> BoolArray;
441
442    /// Canonicalize into a [`PrimitiveArray`] if the target is [`Primitive`](DType::Primitive)
443    /// typed.
444    #[deprecated(note = "use `array.execute::<PrimitiveArray>(ctx)` instead")]
445    fn to_primitive(&self) -> PrimitiveArray;
446
447    /// Canonicalize into a [`DecimalArray`] if the target is [`Decimal`](DType::Decimal)
448    /// typed.
449    #[deprecated(note = "use `array.execute::<DecimalArray>(ctx)` instead")]
450    fn to_decimal(&self) -> DecimalArray;
451
452    /// Canonicalize into a [`StructArray`] if the target is [`Struct`](DType::Struct) typed.
453    #[deprecated(note = "use `array.execute::<StructArray>(ctx)` instead")]
454    fn to_struct(&self) -> StructArray;
455
456    /// Canonicalize into a [`ListViewArray`] if the target is [`List`](DType::List) typed.
457    #[deprecated(note = "use `array.execute::<ListViewArray>(ctx)` instead")]
458    fn to_listview(&self) -> ListViewArray;
459
460    /// Canonicalize into a [`FixedSizeListArray`] if the target is [`List`](DType::FixedSizeList)
461    /// typed.
462    #[deprecated(note = "use `array.execute::<FixedSizeListArray>(ctx)` instead")]
463    fn to_fixed_size_list(&self) -> FixedSizeListArray;
464
465    /// Canonicalize into a [`VarBinViewArray`] if the target is [`Utf8`](DType::Utf8)
466    /// or [`Binary`](DType::Binary) typed.
467    #[deprecated(note = "use `array.execute::<VarBinViewArray>(ctx)` instead")]
468    fn to_varbinview(&self) -> VarBinViewArray;
469
470    /// Canonicalize into an [`ExtensionArray`] if the array is [`Extension`](DType::Extension)
471    /// typed.
472    #[deprecated(note = "use `array.execute::<ExtensionArray>(ctx)` instead")]
473    fn to_extension(&self) -> ExtensionArray;
474}
475
476// Blanket impl for all Array encodings.
477#[expect(deprecated)]
478impl ToCanonical for ArrayRef {
479    fn to_null(&self) -> NullArray {
480        #[expect(deprecated)]
481        let result = self.to_canonical().vortex_expect("to_canonical failed");
482        result.into_null()
483    }
484
485    fn to_bool(&self) -> BoolArray {
486        #[expect(deprecated)]
487        let result = self.to_canonical().vortex_expect("to_canonical failed");
488        result.into_bool()
489    }
490
491    fn to_primitive(&self) -> PrimitiveArray {
492        #[expect(deprecated)]
493        let result = self.to_canonical().vortex_expect("to_canonical failed");
494        result.into_primitive()
495    }
496
497    fn to_decimal(&self) -> DecimalArray {
498        #[expect(deprecated)]
499        let result = self.to_canonical().vortex_expect("to_canonical failed");
500        result.into_decimal()
501    }
502
503    fn to_struct(&self) -> StructArray {
504        #[expect(deprecated)]
505        let result = self.to_canonical().vortex_expect("to_canonical failed");
506        result.into_struct()
507    }
508
509    fn to_listview(&self) -> ListViewArray {
510        #[expect(deprecated)]
511        let result = self.to_canonical().vortex_expect("to_canonical failed");
512        result.into_listview()
513    }
514
515    fn to_fixed_size_list(&self) -> FixedSizeListArray {
516        #[expect(deprecated)]
517        let result = self.to_canonical().vortex_expect("to_canonical failed");
518        result.into_fixed_size_list()
519    }
520
521    fn to_varbinview(&self) -> VarBinViewArray {
522        #[expect(deprecated)]
523        let result = self.to_canonical().vortex_expect("to_canonical failed");
524        result.into_varbinview()
525    }
526
527    fn to_extension(&self) -> ExtensionArray {
528        #[expect(deprecated)]
529        let result = self.to_canonical().vortex_expect("to_canonical failed");
530        result.into_extension()
531    }
532}
533
534impl From<Canonical> for ArrayRef {
535    fn from(value: Canonical) -> Self {
536        match_each_canonical!(value, |arr| arr.into_array())
537    }
538}
539
540/// Execute into [`Canonical`] by running `execute_until` with the [`AnyCanonical`] matcher.
541///
542/// Unlike executing into [`crate::Columnar`], this will fully expand constant arrays into their
543/// canonical form. Callers should prefer to execute into `Columnar` if they are able to optimize
544/// their use for constant arrays.
545impl Executable for Canonical {
546    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
547        let result = array.execute_until::<AnyCanonical>(ctx)?;
548        Ok(result
549            .as_opt::<AnyCanonical>()
550            .map(Canonical::from)
551            .vortex_expect("execute_until::<AnyCanonical> must return a canonical array"))
552    }
553}
554
555/// Recursively execute the array until it reaches canonical form along with its validity.
556///
557/// Callers should prefer to execute into `Columnar` instead of this specific target.
558/// This target is useful when preparing arrays for writing.
559pub struct CanonicalValidity(pub Canonical);
560
561impl Executable for CanonicalValidity {
562    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
563        match array.execute::<Canonical>(ctx)? {
564            n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
565            Canonical::Bool(b) => {
566                let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
567                let len = b.len();
568                let BoolDataParts { bits, meta } = b.into_data().into_parts(len);
569                Ok(CanonicalValidity(Canonical::Bool(
570                    BoolArray::try_new_from_handle(
571                        bits,
572                        meta.offset(),
573                        meta.len(),
574                        validity.execute(ctx)?,
575                    )?,
576                )))
577            }
578            Canonical::Primitive(p) => {
579                let PrimitiveDataParts {
580                    ptype,
581                    buffer,
582                    validity,
583                } = p.into_data_parts();
584                Ok(CanonicalValidity(Canonical::Primitive(unsafe {
585                    PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
586                })))
587            }
588            Canonical::Decimal(d) => {
589                let DecimalDataParts {
590                    decimal_dtype,
591                    values,
592                    values_type,
593                    validity,
594                } = d.into_data_parts();
595                Ok(CanonicalValidity(Canonical::Decimal(unsafe {
596                    DecimalArray::new_unchecked_handle(
597                        values,
598                        values_type,
599                        decimal_dtype,
600                        validity.execute(ctx)?,
601                    )
602                })))
603            }
604            Canonical::VarBinView(vbv) => {
605                let VarBinViewDataParts {
606                    dtype,
607                    buffers,
608                    views,
609                    validity,
610                } = vbv.into_data_parts();
611                Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
612                    VarBinViewArray::new_handle_unchecked(
613                        views,
614                        buffers,
615                        dtype,
616                        validity.execute(ctx)?,
617                    )
618                })))
619            }
620            Canonical::List(l) => {
621                let zctl = l.is_zero_copy_to_list();
622                let ListViewDataParts {
623                    elements,
624                    offsets,
625                    sizes,
626                    validity,
627                    ..
628                } = l.into_data_parts();
629                Ok(CanonicalValidity(Canonical::List(unsafe {
630                    ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
631                        .with_zero_copy_to_list(zctl)
632                })))
633            }
634            Canonical::FixedSizeList(fsl) => {
635                let list_size = fsl.list_size();
636                let len = fsl.len();
637                let parts = fsl.into_data_parts();
638                let elements = parts.elements;
639                let validity = parts.validity;
640                Ok(CanonicalValidity(Canonical::FixedSizeList(
641                    FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
642                )))
643            }
644            Canonical::Struct(st) => {
645                let len = st.len();
646                let StructDataParts {
647                    struct_fields,
648                    fields,
649                    validity,
650                } = st.into_data_parts();
651                Ok(CanonicalValidity(Canonical::Struct(unsafe {
652                    StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
653                })))
654            }
655            Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
656                ExtensionArray::new(
657                    ext.ext_dtype().clone(),
658                    ext.storage_array()
659                        .clone()
660                        .execute::<CanonicalValidity>(ctx)?
661                        .0
662                        .into_array(),
663                ),
664            ))),
665            Canonical::Variant(variant) => {
666                let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
667                let shredded = variant
668                    .shredded()
669                    .map(|shredded| {
670                        if shredded.is::<Variant>() {
671                            recursively_canonicalize_slots(shredded, ctx)
672                        } else {
673                            shredded
674                                .clone()
675                                .execute::<CanonicalValidity>(ctx)
676                                .map(|canonical| canonical.0.into_array())
677                        }
678                    })
679                    .transpose()?;
680                Ok(CanonicalValidity(Canonical::Variant(
681                    VariantArray::try_new(core_storage, shredded)?,
682                )))
683            }
684        }
685    }
686}
687
688/// Recursively execute the array until all of its children are canonical.
689///
690/// This method is useful to guarantee that all operators are fully executed,
691/// callers should prefer an execution target that's suitable for their use case instead of this one.
692pub struct RecursiveCanonical(pub Canonical);
693
694// TODO: Currently only used for Variant, in the future
695// can probably be used for more canonical types like Struct.
696fn recursively_canonicalize_slots(
697    array: &ArrayRef,
698    ctx: &mut ExecutionCtx,
699) -> VortexResult<ArrayRef> {
700    let slots = array
701        .slots()
702        .iter()
703        .map(|slot| {
704            slot.as_ref()
705                .map(|child| {
706                    child
707                        .clone()
708                        .execute::<RecursiveCanonical>(ctx)
709                        .map(|canonical| canonical.0.into_array())
710                })
711                .transpose()
712        })
713        .collect::<VortexResult<ArraySlots>>()?;
714    // SAFETY: recursive canonicalization rewrites child slots to equivalent canonical
715    // representations, preserving the parent array's logical values and statistics.
716    unsafe { array.clone().with_slots(slots) }
717}
718impl Executable for RecursiveCanonical {
719    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
720        match array.execute::<Canonical>(ctx)? {
721            n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
722            Canonical::Bool(b) => {
723                let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
724                let len = b.len();
725                let BoolDataParts { bits, meta } = b.into_data().into_parts(len);
726                Ok(RecursiveCanonical(Canonical::Bool(
727                    BoolArray::try_new_from_handle(
728                        bits,
729                        meta.offset(),
730                        meta.len(),
731                        validity.execute(ctx)?,
732                    )?,
733                )))
734            }
735            Canonical::Primitive(p) => {
736                let PrimitiveDataParts {
737                    ptype,
738                    buffer,
739                    validity,
740                } = p.into_data_parts();
741                Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
742                    PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
743                })))
744            }
745            Canonical::Decimal(d) => {
746                let DecimalDataParts {
747                    decimal_dtype,
748                    values,
749                    values_type,
750                    validity,
751                } = d.into_data_parts();
752                Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
753                    DecimalArray::new_unchecked_handle(
754                        values,
755                        values_type,
756                        decimal_dtype,
757                        validity.execute(ctx)?,
758                    )
759                })))
760            }
761            Canonical::VarBinView(vbv) => {
762                let VarBinViewDataParts {
763                    dtype,
764                    buffers,
765                    views,
766                    validity,
767                } = vbv.into_data_parts();
768                Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
769                    VarBinViewArray::new_handle_unchecked(
770                        views,
771                        buffers,
772                        dtype,
773                        validity.execute(ctx)?,
774                    )
775                })))
776            }
777            Canonical::List(l) => {
778                let zctl = l.is_zero_copy_to_list();
779                let ListViewDataParts {
780                    elements,
781                    offsets,
782                    sizes,
783                    validity,
784                    ..
785                } = l.into_data_parts();
786                Ok(RecursiveCanonical(Canonical::List(unsafe {
787                    ListViewArray::new_unchecked(
788                        elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
789                        offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
790                        sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
791                        validity.execute(ctx)?,
792                    )
793                    .with_zero_copy_to_list(zctl)
794                })))
795            }
796            Canonical::FixedSizeList(fsl) => {
797                let list_size = fsl.list_size();
798                let len = fsl.len();
799                let parts = fsl.into_data_parts();
800                let elements = parts.elements;
801                let validity = parts.validity;
802                Ok(RecursiveCanonical(Canonical::FixedSizeList(
803                    FixedSizeListArray::new(
804                        elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
805                        list_size,
806                        validity.execute(ctx)?,
807                        len,
808                    ),
809                )))
810            }
811            Canonical::Struct(st) => {
812                let len = st.len();
813                let StructDataParts {
814                    struct_fields,
815                    fields,
816                    validity,
817                } = st.into_data_parts();
818                let executed_fields = fields
819                    .iter()
820                    .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
821                    .collect::<VortexResult<Arc<[_]>>>()?;
822
823                Ok(RecursiveCanonical(Canonical::Struct(unsafe {
824                    StructArray::new_unchecked(
825                        executed_fields,
826                        struct_fields,
827                        len,
828                        validity.execute(ctx)?,
829                    )
830                })))
831            }
832            Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
833                ExtensionArray::new(
834                    ext.ext_dtype().clone(),
835                    ext.storage_array()
836                        .clone()
837                        .execute::<RecursiveCanonical>(ctx)?
838                        .0
839                        .into_array(),
840                ),
841            ))),
842            Canonical::Variant(variant) => {
843                let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
844                let shredded = variant
845                    .shredded()
846                    .map(|shredded| {
847                        if shredded.is::<Variant>() {
848                            recursively_canonicalize_slots(shredded, ctx)
849                        } else {
850                            shredded
851                                .clone()
852                                .execute::<RecursiveCanonical>(ctx)
853                                .map(|canonical| canonical.0.into_array())
854                        }
855                    })
856                    .transpose()?;
857                Ok(RecursiveCanonical(Canonical::Variant(
858                    VariantArray::try_new(core_storage, shredded)?,
859                )))
860            }
861        }
862    }
863}
864
865/// Execute a primitive typed array into a buffer of native values, assuming all values are valid.
866///
867/// # Errors
868///
869/// Returns a `VortexError` if the array is not all-valid (has any nulls).
870impl<T: NativePType> Executable for Buffer<T> {
871    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
872        let array = PrimitiveArray::execute(array, ctx)?;
873        vortex_ensure!(
874            matches!(
875                array.validity()?,
876                Validity::NonNullable | Validity::AllValid
877            ),
878            "Cannot execute to native buffer: array is not all-valid."
879        );
880        Ok(array.into_buffer())
881    }
882}
883
884/// Execute the array to canonical form and unwrap as a [`PrimitiveArray`].
885///
886/// This will panic if the array's dtype is not primitive.
887impl Executable for PrimitiveArray {
888    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
889        match array.try_downcast::<Primitive>() {
890            Ok(primitive) => Ok(primitive),
891            Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
892        }
893    }
894}
895
896/// Execute the array to canonical form and unwrap as a [`BoolArray`].
897///
898/// This will panic if the array's dtype is not bool.
899impl Executable for BoolArray {
900    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
901        match array.try_downcast::<Bool>() {
902            Ok(bool_array) => Ok(bool_array),
903            Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
904        }
905    }
906}
907
908/// Execute the array to a [`BitBuffer`], aka a non-nullable  [`BoolArray`].
909///
910/// This will panic if the array's dtype is not non-nullable bool.
911impl Executable for BitBuffer {
912    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
913        let bool = BoolArray::execute(array, ctx)?;
914        assert!(
915            !bool.dtype().is_nullable(),
916            "bit buffer execute only works with non-nullable bool arrays"
917        );
918        Ok(bool.into_bit_buffer())
919    }
920}
921
922/// Execute the array to canonical form and unwrap as a [`NullArray`].
923///
924/// This will panic if the array's dtype is not null.
925impl Executable for NullArray {
926    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
927        match array.try_downcast::<Null>() {
928            Ok(null_array) => Ok(null_array),
929            Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
930        }
931    }
932}
933
934/// Execute the array to canonical form and unwrap as a [`VarBinViewArray`].
935///
936/// This will panic if the array's dtype is not utf8 or binary.
937impl Executable for VarBinViewArray {
938    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
939        match array.try_downcast::<VarBinView>() {
940            Ok(varbinview) => Ok(varbinview),
941            Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
942        }
943    }
944}
945
946/// Execute the array to canonical form and unwrap as an [`ExtensionArray`].
947///
948/// This will panic if the array's dtype is not an extension type.
949impl Executable for ExtensionArray {
950    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
951        match array.try_downcast::<Extension>() {
952            Ok(ext_array) => Ok(ext_array),
953            Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
954        }
955    }
956}
957
958/// Execute the array to canonical form and unwrap as a [`DecimalArray`].
959///
960/// This will panic if the array's dtype is not decimal.
961impl Executable for DecimalArray {
962    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
963        match array.try_downcast::<Decimal>() {
964            Ok(decimal) => Ok(decimal),
965            Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
966        }
967    }
968}
969
970/// Execute the array to canonical form and unwrap as a [`ListViewArray`].
971///
972/// This will panic if the array's dtype is not list.
973impl Executable for ListViewArray {
974    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
975        match array.try_downcast::<ListView>() {
976            Ok(list) => Ok(list),
977            Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
978        }
979    }
980}
981
982/// Execute the array to canonical form and unwrap as a [`FixedSizeListArray`].
983///
984/// This will panic if the array's dtype is not fixed size list.
985impl Executable for FixedSizeListArray {
986    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
987        match array.try_downcast::<FixedSizeList>() {
988            Ok(fsl) => Ok(fsl),
989            Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
990        }
991    }
992}
993
994/// Execute the array to canonical form and unwrap as a [`StructArray`].
995///
996/// This will panic if the array's dtype is not struct.
997impl Executable for StructArray {
998    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
999        match array.try_downcast::<Struct>() {
1000            Ok(struct_array) => Ok(struct_array),
1001            Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
1002        }
1003    }
1004}
1005
1006/// Execute the array to canonical form and unwrap as a [`VariantArray`].
1007///
1008/// This will panic if the array's dtype is not variant.
1009impl Executable for VariantArray {
1010    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
1011        match array.try_downcast::<Variant>() {
1012            Ok(variant_array) => Ok(variant_array),
1013            Err(array) => match Canonical::execute(array, ctx)? {
1014                Canonical::Variant(variant_array) => Ok(variant_array),
1015                canonical => vortex_panic!("Cannot unwrap VariantArray from {:?}", canonical),
1016            },
1017        }
1018    }
1019}
1020
1021/// A view into a canonical array type.
1022///
1023/// Uses `ArrayView<V>` because these are obtained by
1024/// downcasting through the `Matcher` trait which returns `ArrayView<V>`.
1025#[derive(Debug, Clone, Copy)]
1026pub enum CanonicalView<'a> {
1027    Null(ArrayView<'a, Null>),
1028    Bool(ArrayView<'a, Bool>),
1029    Primitive(ArrayView<'a, Primitive>),
1030    Decimal(ArrayView<'a, Decimal>),
1031    VarBinView(ArrayView<'a, VarBinView>),
1032    List(ArrayView<'a, ListView>),
1033    FixedSizeList(ArrayView<'a, FixedSizeList>),
1034    Struct(ArrayView<'a, Struct>),
1035    Extension(ArrayView<'a, Extension>),
1036    Variant(ArrayView<'a, Variant>),
1037}
1038
1039impl From<CanonicalView<'_>> for Canonical {
1040    fn from(value: CanonicalView<'_>) -> Self {
1041        match value {
1042            CanonicalView::Null(a) => Canonical::Null(a.into_owned()),
1043            CanonicalView::Bool(a) => Canonical::Bool(a.into_owned()),
1044            CanonicalView::Primitive(a) => Canonical::Primitive(a.into_owned()),
1045            CanonicalView::Decimal(a) => Canonical::Decimal(a.into_owned()),
1046            CanonicalView::VarBinView(a) => Canonical::VarBinView(a.into_owned()),
1047            CanonicalView::List(a) => Canonical::List(a.into_owned()),
1048            CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.into_owned()),
1049            CanonicalView::Struct(a) => Canonical::Struct(a.into_owned()),
1050            CanonicalView::Extension(a) => Canonical::Extension(a.into_owned()),
1051            CanonicalView::Variant(a) => Canonical::Variant(a.into_owned()),
1052        }
1053    }
1054}
1055
1056impl CanonicalView<'_> {
1057    /// Convert to a type-erased [`ArrayRef`].
1058    pub fn to_array_ref(&self) -> ArrayRef {
1059        match self {
1060            CanonicalView::Null(a) => a.array().clone(),
1061            CanonicalView::Bool(a) => a.array().clone(),
1062            CanonicalView::Primitive(a) => a.array().clone(),
1063            CanonicalView::Decimal(a) => a.array().clone(),
1064            CanonicalView::VarBinView(a) => a.array().clone(),
1065            CanonicalView::List(a) => a.array().clone(),
1066            CanonicalView::FixedSizeList(a) => a.array().clone(),
1067            CanonicalView::Struct(a) => a.array().clone(),
1068            CanonicalView::Extension(a) => a.array().clone(),
1069            CanonicalView::Variant(a) => a.array().clone(),
1070        }
1071    }
1072}
1073
1074/// A matcher for any canonical array type.
1075pub struct AnyCanonical;
1076impl Matcher for AnyCanonical {
1077    type Match<'a> = CanonicalView<'a>;
1078
1079    #[inline]
1080    fn matches(array: &ArrayRef) -> bool {
1081        array.is::<Null>()
1082            || array.is::<Bool>()
1083            || array.is::<Primitive>()
1084            || array.is::<Decimal>()
1085            || array.is::<Struct>()
1086            || array.is::<ListView>()
1087            || array.is::<FixedSizeList>()
1088            || array.is::<VarBinView>()
1089            || array.is::<Variant>()
1090            || array.is::<Extension>()
1091    }
1092
1093    #[inline]
1094    fn try_match(array: &ArrayRef) -> Option<Self::Match<'_>> {
1095        if let Some(a) = array.as_opt::<Null>() {
1096            Some(CanonicalView::Null(a))
1097        } else if let Some(a) = array.as_opt::<Bool>() {
1098            Some(CanonicalView::Bool(a))
1099        } else if let Some(a) = array.as_opt::<Primitive>() {
1100            Some(CanonicalView::Primitive(a))
1101        } else if let Some(a) = array.as_opt::<Decimal>() {
1102            Some(CanonicalView::Decimal(a))
1103        } else if let Some(a) = array.as_opt::<Struct>() {
1104            Some(CanonicalView::Struct(a))
1105        } else if let Some(a) = array.as_opt::<ListView>() {
1106            Some(CanonicalView::List(a))
1107        } else if let Some(a) = array.as_opt::<FixedSizeList>() {
1108            Some(CanonicalView::FixedSizeList(a))
1109        } else if let Some(a) = array.as_opt::<VarBinView>() {
1110            Some(CanonicalView::VarBinView(a))
1111        } else if let Some(a) = array.as_opt::<Variant>() {
1112            Some(CanonicalView::Variant(a))
1113        } else {
1114            array.as_opt::<Extension>().map(CanonicalView::Extension)
1115        }
1116    }
1117}
1118
1119#[cfg(test)]
1120mod test {
1121    use std::sync::Arc;
1122    use std::sync::LazyLock;
1123
1124    use arrow_array::Array as ArrowArray;
1125    use arrow_array::ArrayRef as ArrowArrayRef;
1126    use arrow_array::ListArray as ArrowListArray;
1127    use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
1128    use arrow_array::StringArray;
1129    use arrow_array::StringViewArray;
1130    use arrow_array::StructArray as ArrowStructArray;
1131    use arrow_array::cast::AsArray;
1132    use arrow_array::types::Int32Type;
1133    use arrow_array::types::Int64Type;
1134    use arrow_array::types::UInt64Type;
1135    use arrow_buffer::NullBufferBuilder;
1136    use arrow_buffer::OffsetBuffer;
1137    use arrow_schema::DataType;
1138    use arrow_schema::Field;
1139    use vortex_buffer::buffer;
1140    use vortex_error::VortexResult;
1141    use vortex_error::vortex_err;
1142    use vortex_session::VortexSession;
1143
1144    use crate::ArrayRef;
1145    use crate::Canonical;
1146    use crate::CanonicalValidity;
1147    use crate::IntoArray;
1148    use crate::VortexSessionExecute;
1149    use crate::arrays::Constant;
1150    use crate::arrays::ConstantArray;
1151    use crate::arrays::Primitive;
1152    use crate::arrays::Struct;
1153    use crate::arrays::Variant;
1154    use crate::arrays::VariantArray;
1155    use crate::arrays::struct_::StructArrayExt;
1156    use crate::arrays::variant::VariantArrayExt;
1157    use crate::arrow::ArrowSessionExt;
1158    use crate::arrow::FromArrowArray;
1159    use crate::canonical::StructArray;
1160    use crate::dtype::Nullability;
1161    use crate::scalar::Scalar;
1162
1163    /// A shared session for these canonical tests, used to create execution contexts.
1164    static SESSION: LazyLock<VortexSession> = LazyLock::new(crate::array_session);
1165
1166    fn variant_core_storage(len: usize) -> ArrayRef {
1167        ConstantArray::new(
1168            Scalar::variant(Scalar::primitive(1i32, Nullability::NonNullable)),
1169            len,
1170        )
1171        .into_array()
1172    }
1173
1174    #[test]
1175    fn canonical_validity_canonicalizes_variant_shredded_physical_slots() -> VortexResult<()> {
1176        let len = 2;
1177        let nested_shredded =
1178            StructArray::try_from_iter([("value", ConstantArray::new(10i32, len).into_array())])?;
1179        let inner_variant = VariantArray::try_new(
1180            variant_core_storage(len),
1181            Some(nested_shredded.into_array()),
1182        )?;
1183        let outer_variant =
1184            VariantArray::try_new(variant_core_storage(len), Some(inner_variant.into_array()))?;
1185
1186        let mut ctx = SESSION.create_execution_ctx();
1187        let Canonical::Variant(canonical) = outer_variant
1188            .into_array()
1189            .execute::<CanonicalValidity>(&mut ctx)?
1190            .0
1191        else {
1192            return Err(vortex_err!("expected canonical variant"));
1193        };
1194
1195        let nested_variant = canonical
1196            .shredded()
1197            .and_then(|shredded| shredded.as_opt::<Variant>())
1198            .ok_or_else(|| vortex_err!("expected nested variant shredded child"))?;
1199        let nested_struct = nested_variant
1200            .shredded()
1201            .and_then(|shredded| shredded.as_opt::<Struct>())
1202            .ok_or_else(|| vortex_err!("expected nested struct shredded child"))?;
1203        let value = nested_struct.unmasked_field_by_name("value")?;
1204
1205        assert!(value.is::<Primitive>());
1206        assert!(!value.is::<Constant>());
1207
1208        Ok(())
1209    }
1210
1211    #[test]
1212    fn test_canonicalize_nested_struct() {
1213        let mut ctx = SESSION.create_execution_ctx();
1214        // Create a struct array with multiple internal components.
1215        let nested_struct_array = StructArray::from_fields(&[
1216            ("a", buffer![1u64].into_array()),
1217            (
1218                "b",
1219                StructArray::from_fields(&[(
1220                    "inner_a",
1221                    // The nested struct contains a ConstantArray representing the primitive array
1222                    //   [100i64]
1223                    // ConstantArray is not a canonical type, so converting `into_arrow()` should
1224                    // map this to the nearest canonical type (PrimitiveArray).
1225                    ConstantArray::new(100i64, 1).into_array(),
1226                )])
1227                .unwrap()
1228                .into_array(),
1229            ),
1230        ])
1231        .unwrap();
1232
1233        let arrow_struct = SESSION
1234            .arrow()
1235            .execute_arrow(nested_struct_array.into_array(), None, &mut ctx)
1236            .unwrap()
1237            .as_any()
1238            .downcast_ref::<ArrowStructArray>()
1239            .cloned()
1240            .unwrap();
1241
1242        assert!(
1243            arrow_struct
1244                .column(0)
1245                .as_any()
1246                .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
1247                .is_some()
1248        );
1249
1250        let inner_struct = Arc::clone(arrow_struct.column(1))
1251            .as_any()
1252            .downcast_ref::<ArrowStructArray>()
1253            .cloned()
1254            .unwrap();
1255
1256        let inner_a = inner_struct
1257            .column(0)
1258            .as_any()
1259            .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1260        assert!(inner_a.is_some());
1261
1262        assert_eq!(
1263            inner_a.cloned().unwrap(),
1264            ArrowPrimitiveArray::from_iter([100i64])
1265        );
1266    }
1267
1268    #[test]
1269    fn roundtrip_struct() {
1270        let mut ctx = SESSION.create_execution_ctx();
1271        let mut nulls = NullBufferBuilder::new(6);
1272        nulls.append_n_non_nulls(4);
1273        nulls.append_null();
1274        nulls.append_non_null();
1275        let names = Arc::new(StringViewArray::from_iter(vec![
1276            Some("Joseph"),
1277            None,
1278            Some("Angela"),
1279            Some("Mikhail"),
1280            None,
1281            None,
1282        ]));
1283        let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1284            Some(25),
1285            Some(31),
1286            None,
1287            Some(57),
1288            None,
1289            None,
1290        ]));
1291
1292        let arrow_struct = ArrowStructArray::new(
1293            vec![
1294                Arc::new(Field::new("name", DataType::Utf8View, true)),
1295                Arc::new(Field::new("age", DataType::Int32, true)),
1296            ]
1297            .into(),
1298            vec![names, ages],
1299            nulls.finish(),
1300        );
1301
1302        let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1303        let vortex_struct = SESSION
1304            .arrow()
1305            .execute_arrow(vortex_struct, None, &mut ctx)
1306            .unwrap();
1307        assert_eq!(&arrow_struct, vortex_struct.as_struct());
1308    }
1309
1310    #[test]
1311    fn roundtrip_list() {
1312        let mut ctx = SESSION.create_execution_ctx();
1313        let names = Arc::new(StringArray::from_iter(vec![
1314            Some("Joseph"),
1315            Some("Angela"),
1316            Some("Mikhail"),
1317        ]));
1318
1319        let arrow_list = ArrowListArray::new(
1320            Arc::new(Field::new_list_field(DataType::Utf8, true)),
1321            OffsetBuffer::from_lengths(vec![0, 2, 1]),
1322            names,
1323            None,
1324        );
1325        let list_data_type = arrow_list.data_type();
1326        let list_field = Field::new(String::new(), list_data_type.clone(), true);
1327
1328        let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1329
1330        let rt_arrow_list = SESSION
1331            .arrow()
1332            .execute_arrow(vortex_list, Some(&list_field), &mut ctx)
1333            .unwrap();
1334
1335        assert_eq!(
1336            (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1337            rt_arrow_list.as_ref()
1338        );
1339    }
1340}