Skip to main content

vortex_array/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Encodings that enable zero-copy sharing of data with Arrow.
5
6use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::ArraySlots;
17use crate::Executable;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::array::ArrayView;
21use crate::array::child_to_validity;
22use crate::arrays::Bool;
23use crate::arrays::BoolArray;
24use crate::arrays::Decimal;
25use crate::arrays::DecimalArray;
26use crate::arrays::Extension;
27use crate::arrays::ExtensionArray;
28use crate::arrays::FixedSizeList;
29use crate::arrays::FixedSizeListArray;
30use crate::arrays::ListView;
31use crate::arrays::ListViewArray;
32use crate::arrays::Null;
33use crate::arrays::NullArray;
34use crate::arrays::Primitive;
35use crate::arrays::PrimitiveArray;
36use crate::arrays::Struct;
37use crate::arrays::StructArray;
38use crate::arrays::VarBinView;
39use crate::arrays::VarBinViewArray;
40use crate::arrays::Variant;
41use crate::arrays::VariantArray;
42use crate::arrays::bool::BoolDataParts;
43use crate::arrays::decimal::DecimalDataParts;
44use crate::arrays::extension::ExtensionArrayExt;
45use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
46use crate::arrays::listview::ListViewDataParts;
47use crate::arrays::listview::ListViewRebuildMode;
48use crate::arrays::primitive::PrimitiveDataParts;
49use crate::arrays::struct_::StructDataParts;
50use crate::arrays::varbinview::VarBinViewDataParts;
51use crate::arrays::variant::VariantArrayExt;
52use crate::dtype::DType;
53use crate::dtype::NativePType;
54use crate::dtype::Nullability;
55use crate::dtype::PType;
56use crate::match_each_decimal_value_type;
57use crate::match_each_native_ptype;
58use crate::matcher::Matcher;
59use crate::validity::Validity;
60
61/// An enum capturing the default uncompressed encodings for each [Vortex type](DType).
62///
63/// Any array can be decoded into canonical form via the `to_canonical`
64/// trait method. This is the simplest encoding for a type, and will not be compressed but may
65/// contain compressed child arrays.
66///
67/// Canonical form is useful for doing type-specific compute where you need to know that all
68/// elements are laid out decompressed and contiguous in memory.
69///
70/// Each `Canonical` variant has a corresponding [`DType`] variant, with the notable exception of
71/// [`Canonical::VarBinView`], which is the canonical encoding for both [`DType::Utf8`] and
72/// [`DType::Binary`].
73///
74/// # Laziness
75///
76/// Canonical form is not recursive, so while a `StructArray` is the canonical format for any
77/// `Struct` type, individual column child arrays may still be compressed. This allows
78/// compute over Vortex arrays to push decoding as late as possible, and ideally many child arrays
79/// never need to be decoded into canonical form at all depending on the compute.
80///
81/// # Arrow interoperability
82///
83/// All of the Vortex canonical encodings have an equivalent Arrow encoding that can be built
84/// zero-copy, and the corresponding Arrow array types can also be built directly.
85///
86/// The full list of canonical types and their equivalent Arrow array types are:
87///
88/// * `NullArray`: [`arrow_array::NullArray`]
89/// * `BoolArray`: [`arrow_array::BooleanArray`]
90/// * `PrimitiveArray`: [`arrow_array::PrimitiveArray`]
91/// * `DecimalArray`: [`arrow_array::Decimal128Array`] and [`arrow_array::Decimal256Array`]
92/// * `VarBinViewArray`: [`arrow_array::GenericByteViewArray`]
93/// * `ListViewArray`: [`arrow_array::ListViewArray`]
94/// * `FixedSizeListArray`: [`arrow_array::FixedSizeListArray`]
95/// * `StructArray`: [`arrow_array::StructArray`]
96///
97/// Vortex uses a logical type system, unlike Arrow which uses physical encodings for its types.
98/// As an example, there are at least six valid physical encodings for a `Utf8` array. This can
99/// create ambiguity.
100/// Thus, if you receive an Arrow array, compress it using Vortex, and then
101/// decompress it later to pass to a compute kernel, there are multiple suitable Arrow array
102/// variants to hold the data.
103///
104/// To disambiguate, we choose a canonical physical encoding for every Vortex [`DType`], which
105/// will correspond to an arrow-rs [`arrow_schema::DataType`].
106///
107/// # Views support
108///
109/// Binary and String views, also known as "German strings" are a better encoding format for
110/// nearly all use-cases. Variable-length binary views are part of the Apache Arrow spec, and are
111/// fully supported by the Datafusion query engine. We use them as our canonical string encoding
112/// for all `Utf8` and `Binary` typed arrays in Vortex. They provide considerably faster filter
113/// execution than the core `StringArray` and `BinaryArray` types, at the expense of potentially
114/// needing [garbage collection][arrow_array::GenericByteViewArray::gc] to clear unreferenced items
115/// from memory.
116///
117/// # For Developers
118///
119/// If you add another variant to this enum, make sure to update `dyn Array::is_canonical`,
120/// and the fuzzer in `fuzz/fuzz_targets/array_ops.rs`.
121#[derive(Debug, Clone)]
122pub enum Canonical {
123    Null(NullArray),
124    Bool(BoolArray),
125    Primitive(PrimitiveArray),
126    Decimal(DecimalArray),
127    VarBinView(VarBinViewArray),
128    List(ListViewArray),
129    FixedSizeList(FixedSizeListArray),
130    Struct(StructArray),
131    Extension(ExtensionArray),
132    Variant(VariantArray),
133}
134
135/// Match on every canonical variant and evaluate a code block on all variants
136macro_rules! match_each_canonical {
137    ($self:expr, | $ident:ident | $eval:expr) => {{
138        match $self {
139            Canonical::Null($ident) => $eval,
140            Canonical::Bool($ident) => $eval,
141            Canonical::Primitive($ident) => $eval,
142            Canonical::Decimal($ident) => $eval,
143            Canonical::VarBinView($ident) => $eval,
144            Canonical::List($ident) => $eval,
145            Canonical::FixedSizeList($ident) => $eval,
146            Canonical::Struct($ident) => $eval,
147            Canonical::Variant($ident) => $eval,
148            Canonical::Extension($ident) => $eval,
149        }
150    }};
151}
152
153impl Canonical {
154    /// Create an empty canonical array of the given dtype.
155    pub fn empty(dtype: &DType) -> Canonical {
156        match dtype {
157            DType::Null => Canonical::Null(NullArray::new(0)),
158            DType::Bool(n) => Canonical::Bool(unsafe {
159                BoolArray::new_unchecked(BitBuffer::empty(), Validity::from(n))
160            }),
161            DType::Primitive(ptype, n) => {
162                match_each_native_ptype!(ptype, |P| {
163                    Canonical::Primitive(unsafe {
164                        PrimitiveArray::new_unchecked(Buffer::<P>::empty(), Validity::from(n))
165                    })
166                })
167            }
168            DType::Decimal(decimal_type, n) => {
169                match_each_decimal_value_type!(
170                    DecimalType::smallest_decimal_value_type(decimal_type),
171                    |D| {
172                        Canonical::Decimal(unsafe {
173                            DecimalArray::new_unchecked::<D>(
174                                Buffer::empty(),
175                                *decimal_type,
176                                Validity::from(n),
177                            )
178                        })
179                    }
180                )
181            }
182            DType::Utf8(n) => Canonical::VarBinView(unsafe {
183                VarBinViewArray::new_unchecked(
184                    Buffer::empty(),
185                    Arc::new([]),
186                    dtype.clone(),
187                    Validity::from(n),
188                )
189            }),
190            DType::Binary(n) => Canonical::VarBinView(unsafe {
191                VarBinViewArray::new_unchecked(
192                    Buffer::empty(),
193                    Arc::new([]),
194                    dtype.clone(),
195                    Validity::from(n),
196                )
197            }),
198            DType::List(dtype, n) => Canonical::List(unsafe {
199                ListViewArray::new_unchecked(
200                    Canonical::empty(dtype).into_array(),
201                    Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
202                        .into_array(),
203                    Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
204                        .into_array(),
205                    Validity::from(n),
206                )
207                // An empty list view is trivially copyable to a list.
208                .with_zero_copy_to_list(true)
209            }),
210            DType::FixedSizeList(elem_dtype, list_size, null) => Canonical::FixedSizeList(unsafe {
211                FixedSizeListArray::new_unchecked(
212                    Canonical::empty(elem_dtype).into_array(),
213                    *list_size,
214                    Validity::from(null),
215                    0,
216                )
217            }),
218            DType::Struct(struct_dtype, n) => Canonical::Struct(unsafe {
219                StructArray::new_unchecked(
220                    struct_dtype
221                        .fields()
222                        .map(|f| Canonical::empty(&f).into_array())
223                        .collect::<Arc<[_]>>(),
224                    struct_dtype.clone(),
225                    0,
226                    Validity::from(n),
227                )
228            }),
229            DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
230            DType::Variant(_) => {
231                vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant")
232            }
233            DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new(
234                ext_dtype.clone(),
235                Canonical::empty(ext_dtype.storage_dtype()).into_array(),
236            )),
237        }
238    }
239
240    pub fn len(&self) -> usize {
241        match_each_canonical!(self, |arr| arr.len())
242    }
243
244    pub fn dtype(&self) -> &DType {
245        match_each_canonical!(self, |arr| arr.dtype())
246    }
247
248    pub fn is_empty(&self) -> bool {
249        match_each_canonical!(self, |arr| arr.is_empty())
250    }
251}
252
253impl Canonical {
254    /// Performs a (potentially expensive) compaction operation on the array before it is complete.
255    ///
256    /// This is mostly relevant for the variable-length types such as Utf8, Binary or List where
257    /// they can accumulate wasted space after slicing and taking operations.
258    ///
259    /// This operation is very expensive and can result in things like allocations, full-scans
260    /// and copy operations.
261    pub fn compact(&self, ctx: &mut ExecutionCtx) -> VortexResult<Canonical> {
262        match self {
263            Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
264            Canonical::List(array) => Ok(Canonical::List(
265                array.rebuild(ListViewRebuildMode::TrimElements, ctx)?,
266            )),
267            _ => Ok(self.clone()),
268        }
269    }
270}
271
272// Unwrap canonical type back down to specialized type.
273impl Canonical {
274    pub fn as_null(&self) -> &NullArray {
275        if let Canonical::Null(a) = self {
276            a
277        } else {
278            vortex_panic!("Cannot get NullArray from {:?}", &self)
279        }
280    }
281
282    pub fn into_null(self) -> NullArray {
283        if let Canonical::Null(a) = self {
284            a
285        } else {
286            vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
287        }
288    }
289
290    pub fn as_bool(&self) -> &BoolArray {
291        if let Canonical::Bool(a) = self {
292            a
293        } else {
294            vortex_panic!("Cannot get BoolArray from {:?}", &self)
295        }
296    }
297
298    pub fn into_bool(self) -> BoolArray {
299        if let Canonical::Bool(a) = self {
300            a
301        } else {
302            vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
303        }
304    }
305
306    pub fn as_primitive(&self) -> &PrimitiveArray {
307        if let Canonical::Primitive(a) = self {
308            a
309        } else {
310            vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
311        }
312    }
313
314    pub fn into_primitive(self) -> PrimitiveArray {
315        if let Canonical::Primitive(a) = self {
316            a
317        } else {
318            vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
319        }
320    }
321
322    pub fn as_decimal(&self) -> &DecimalArray {
323        if let Canonical::Decimal(a) = self {
324            a
325        } else {
326            vortex_panic!("Cannot get DecimalArray from {:?}", &self)
327        }
328    }
329
330    pub fn into_decimal(self) -> DecimalArray {
331        if let Canonical::Decimal(a) = self {
332            a
333        } else {
334            vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
335        }
336    }
337
338    pub fn as_varbinview(&self) -> &VarBinViewArray {
339        if let Canonical::VarBinView(a) = self {
340            a
341        } else {
342            vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
343        }
344    }
345
346    pub fn into_varbinview(self) -> VarBinViewArray {
347        if let Canonical::VarBinView(a) = self {
348            a
349        } else {
350            vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
351        }
352    }
353
354    pub fn as_listview(&self) -> &ListViewArray {
355        if let Canonical::List(a) = self {
356            a
357        } else {
358            vortex_panic!("Cannot get ListArray from {:?}", &self)
359        }
360    }
361
362    pub fn into_listview(self) -> ListViewArray {
363        if let Canonical::List(a) = self {
364            a
365        } else {
366            vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
367        }
368    }
369
370    pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
371        if let Canonical::FixedSizeList(a) = self {
372            a
373        } else {
374            vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
375        }
376    }
377
378    pub fn into_fixed_size_list(self) -> FixedSizeListArray {
379        if let Canonical::FixedSizeList(a) = self {
380            a
381        } else {
382            vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
383        }
384    }
385
386    pub fn as_struct(&self) -> &StructArray {
387        if let Canonical::Struct(a) = self {
388            a
389        } else {
390            vortex_panic!("Cannot get StructArray from {:?}", &self)
391        }
392    }
393
394    pub fn into_struct(self) -> StructArray {
395        if let Canonical::Struct(a) = self {
396            a
397        } else {
398            vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
399        }
400    }
401
402    pub fn as_extension(&self) -> &ExtensionArray {
403        if let Canonical::Extension(a) = self {
404            a
405        } else {
406            vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
407        }
408    }
409
410    pub fn into_extension(self) -> ExtensionArray {
411        if let Canonical::Extension(a) = self {
412            a
413        } else {
414            vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
415        }
416    }
417}
418
419impl IntoArray for Canonical {
420    fn into_array(self) -> ArrayRef {
421        match_each_canonical!(self, |arr| arr.into_array())
422    }
423}
424
425/// Trait for types that can be converted from an owned type into an owned array variant.
426///
427/// # Canonicalization
428///
429/// This trait has a blanket implementation for all types implementing [ToCanonical].
430#[deprecated(note = "use `array.execute::<T>(ctx)` instead")]
431pub trait ToCanonical {
432    /// Canonicalize into a [`NullArray`] if the target is [`Null`](DType::Null) typed.
433    #[deprecated(note = "use `array.execute::<NullArray>(ctx)` instead")]
434    fn to_null(&self) -> NullArray;
435
436    /// Canonicalize into a [`BoolArray`] if the target is [`Bool`](DType::Bool) typed.
437    #[deprecated(note = "use `array.execute::<BoolArray>(ctx)` instead")]
438    fn to_bool(&self) -> BoolArray;
439
440    /// Canonicalize into a [`PrimitiveArray`] if the target is [`Primitive`](DType::Primitive)
441    /// typed.
442    #[deprecated(note = "use `array.execute::<PrimitiveArray>(ctx)` instead")]
443    fn to_primitive(&self) -> PrimitiveArray;
444
445    /// Canonicalize into a [`DecimalArray`] if the target is [`Decimal`](DType::Decimal)
446    /// typed.
447    #[deprecated(note = "use `array.execute::<DecimalArray>(ctx)` instead")]
448    fn to_decimal(&self) -> DecimalArray;
449
450    /// Canonicalize into a [`StructArray`] if the target is [`Struct`](DType::Struct) typed.
451    #[deprecated(note = "use `array.execute::<StructArray>(ctx)` instead")]
452    fn to_struct(&self) -> StructArray;
453
454    /// Canonicalize into a [`ListViewArray`] if the target is [`List`](DType::List) typed.
455    #[deprecated(note = "use `array.execute::<ListViewArray>(ctx)` instead")]
456    fn to_listview(&self) -> ListViewArray;
457
458    /// Canonicalize into a [`FixedSizeListArray`] if the target is [`List`](DType::FixedSizeList)
459    /// typed.
460    #[deprecated(note = "use `array.execute::<FixedSizeListArray>(ctx)` instead")]
461    fn to_fixed_size_list(&self) -> FixedSizeListArray;
462
463    /// Canonicalize into a [`VarBinViewArray`] if the target is [`Utf8`](DType::Utf8)
464    /// or [`Binary`](DType::Binary) typed.
465    #[deprecated(note = "use `array.execute::<VarBinViewArray>(ctx)` instead")]
466    fn to_varbinview(&self) -> VarBinViewArray;
467
468    /// Canonicalize into an [`ExtensionArray`] if the array is [`Extension`](DType::Extension)
469    /// typed.
470    #[deprecated(note = "use `array.execute::<ExtensionArray>(ctx)` instead")]
471    fn to_extension(&self) -> ExtensionArray;
472}
473
474// Blanket impl for all Array encodings.
475#[expect(deprecated)]
476impl ToCanonical for ArrayRef {
477    fn to_null(&self) -> NullArray {
478        #[expect(deprecated)]
479        let result = self.to_canonical().vortex_expect("to_canonical failed");
480        result.into_null()
481    }
482
483    fn to_bool(&self) -> BoolArray {
484        #[expect(deprecated)]
485        let result = self.to_canonical().vortex_expect("to_canonical failed");
486        result.into_bool()
487    }
488
489    fn to_primitive(&self) -> PrimitiveArray {
490        #[expect(deprecated)]
491        let result = self.to_canonical().vortex_expect("to_canonical failed");
492        result.into_primitive()
493    }
494
495    fn to_decimal(&self) -> DecimalArray {
496        #[expect(deprecated)]
497        let result = self.to_canonical().vortex_expect("to_canonical failed");
498        result.into_decimal()
499    }
500
501    fn to_struct(&self) -> StructArray {
502        #[expect(deprecated)]
503        let result = self.to_canonical().vortex_expect("to_canonical failed");
504        result.into_struct()
505    }
506
507    fn to_listview(&self) -> ListViewArray {
508        #[expect(deprecated)]
509        let result = self.to_canonical().vortex_expect("to_canonical failed");
510        result.into_listview()
511    }
512
513    fn to_fixed_size_list(&self) -> FixedSizeListArray {
514        #[expect(deprecated)]
515        let result = self.to_canonical().vortex_expect("to_canonical failed");
516        result.into_fixed_size_list()
517    }
518
519    fn to_varbinview(&self) -> VarBinViewArray {
520        #[expect(deprecated)]
521        let result = self.to_canonical().vortex_expect("to_canonical failed");
522        result.into_varbinview()
523    }
524
525    fn to_extension(&self) -> ExtensionArray {
526        #[expect(deprecated)]
527        let result = self.to_canonical().vortex_expect("to_canonical failed");
528        result.into_extension()
529    }
530}
531
532impl From<Canonical> for ArrayRef {
533    fn from(value: Canonical) -> Self {
534        match_each_canonical!(value, |arr| arr.into_array())
535    }
536}
537
538/// Execute into [`Canonical`] by running `execute_until` with the [`AnyCanonical`] matcher.
539///
540/// Unlike executing into [`crate::Columnar`], this will fully expand constant arrays into their
541/// canonical form. Callers should prefer to execute into `Columnar` if they are able to optimize
542/// their use for constant arrays.
543impl Executable for Canonical {
544    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
545        let result = array.execute_until::<AnyCanonical>(ctx)?;
546        Ok(result
547            .as_opt::<AnyCanonical>()
548            .map(Canonical::from)
549            .vortex_expect("execute_until::<AnyCanonical> must return a canonical array"))
550    }
551}
552
553/// Recursively execute the array until it reaches canonical form along with its validity.
554///
555/// Callers should prefer to execute into `Columnar` instead of this specific target.
556/// This target is useful when preparing arrays for writing.
557pub struct CanonicalValidity(pub Canonical);
558
559impl Executable for CanonicalValidity {
560    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
561        match array.execute::<Canonical>(ctx)? {
562            n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
563            Canonical::Bool(b) => {
564                let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
565                let len = b.len();
566                let BoolDataParts { bits, meta } = b.into_data().into_parts(len);
567                Ok(CanonicalValidity(Canonical::Bool(
568                    BoolArray::try_new_from_handle(
569                        bits,
570                        meta.offset(),
571                        meta.len(),
572                        validity.execute(ctx)?,
573                    )?,
574                )))
575            }
576            Canonical::Primitive(p) => {
577                let PrimitiveDataParts {
578                    ptype,
579                    buffer,
580                    validity,
581                } = p.into_data_parts();
582                Ok(CanonicalValidity(Canonical::Primitive(unsafe {
583                    PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
584                })))
585            }
586            Canonical::Decimal(d) => {
587                let DecimalDataParts {
588                    decimal_dtype,
589                    values,
590                    values_type,
591                    validity,
592                } = d.into_data_parts();
593                Ok(CanonicalValidity(Canonical::Decimal(unsafe {
594                    DecimalArray::new_unchecked_handle(
595                        values,
596                        values_type,
597                        decimal_dtype,
598                        validity.execute(ctx)?,
599                    )
600                })))
601            }
602            Canonical::VarBinView(vbv) => {
603                let VarBinViewDataParts {
604                    dtype,
605                    buffers,
606                    views,
607                    validity,
608                } = vbv.into_data_parts();
609                Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
610                    VarBinViewArray::new_handle_unchecked(
611                        views,
612                        buffers,
613                        dtype,
614                        validity.execute(ctx)?,
615                    )
616                })))
617            }
618            Canonical::List(l) => {
619                let zctl = l.is_zero_copy_to_list();
620                let ListViewDataParts {
621                    elements,
622                    offsets,
623                    sizes,
624                    validity,
625                    ..
626                } = l.into_data_parts();
627                Ok(CanonicalValidity(Canonical::List(unsafe {
628                    ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
629                        .with_zero_copy_to_list(zctl)
630                })))
631            }
632            Canonical::FixedSizeList(fsl) => {
633                let list_size = fsl.list_size();
634                let len = fsl.len();
635                let parts = fsl.into_data_parts();
636                let elements = parts.elements;
637                let validity = parts.validity;
638                Ok(CanonicalValidity(Canonical::FixedSizeList(
639                    FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
640                )))
641            }
642            Canonical::Struct(st) => {
643                let len = st.len();
644                let StructDataParts {
645                    struct_fields,
646                    fields,
647                    validity,
648                } = st.into_data_parts();
649                Ok(CanonicalValidity(Canonical::Struct(unsafe {
650                    StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
651                })))
652            }
653            Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
654                ExtensionArray::new(
655                    ext.ext_dtype().clone(),
656                    ext.storage_array()
657                        .clone()
658                        .execute::<CanonicalValidity>(ctx)?
659                        .0
660                        .into_array(),
661                ),
662            ))),
663            Canonical::Variant(variant) => {
664                let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
665                let shredded = variant
666                    .shredded()
667                    .map(|shredded| {
668                        if shredded.is::<Variant>() {
669                            recursively_canonicalize_slots(shredded, ctx)
670                        } else {
671                            shredded
672                                .clone()
673                                .execute::<CanonicalValidity>(ctx)
674                                .map(|canonical| canonical.0.into_array())
675                        }
676                    })
677                    .transpose()?;
678                Ok(CanonicalValidity(Canonical::Variant(
679                    VariantArray::try_new(core_storage, shredded)?,
680                )))
681            }
682        }
683    }
684}
685
686/// Recursively execute the array until all of its children are canonical.
687///
688/// This method is useful to guarantee that all operators are fully executed,
689/// callers should prefer an execution target that's suitable for their use case instead of this one.
690pub struct RecursiveCanonical(pub Canonical);
691
692// TODO: Currently only used for Variant, in the future
693// can probably be used for more canonical types like Struct.
694fn recursively_canonicalize_slots(
695    array: &ArrayRef,
696    ctx: &mut ExecutionCtx,
697) -> VortexResult<ArrayRef> {
698    let slots = array
699        .slots()
700        .iter()
701        .map(|slot| {
702            slot.as_ref()
703                .map(|child| {
704                    child
705                        .clone()
706                        .execute::<RecursiveCanonical>(ctx)
707                        .map(|canonical| canonical.0.into_array())
708                })
709                .transpose()
710        })
711        .collect::<VortexResult<ArraySlots>>()?;
712    array.clone().with_slots(slots)
713}
714impl Executable for RecursiveCanonical {
715    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
716        match array.execute::<Canonical>(ctx)? {
717            n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
718            Canonical::Bool(b) => {
719                let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
720                let len = b.len();
721                let BoolDataParts { bits, meta } = b.into_data().into_parts(len);
722                Ok(RecursiveCanonical(Canonical::Bool(
723                    BoolArray::try_new_from_handle(
724                        bits,
725                        meta.offset(),
726                        meta.len(),
727                        validity.execute(ctx)?,
728                    )?,
729                )))
730            }
731            Canonical::Primitive(p) => {
732                let PrimitiveDataParts {
733                    ptype,
734                    buffer,
735                    validity,
736                } = p.into_data_parts();
737                Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
738                    PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
739                })))
740            }
741            Canonical::Decimal(d) => {
742                let DecimalDataParts {
743                    decimal_dtype,
744                    values,
745                    values_type,
746                    validity,
747                } = d.into_data_parts();
748                Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
749                    DecimalArray::new_unchecked_handle(
750                        values,
751                        values_type,
752                        decimal_dtype,
753                        validity.execute(ctx)?,
754                    )
755                })))
756            }
757            Canonical::VarBinView(vbv) => {
758                let VarBinViewDataParts {
759                    dtype,
760                    buffers,
761                    views,
762                    validity,
763                } = vbv.into_data_parts();
764                Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
765                    VarBinViewArray::new_handle_unchecked(
766                        views,
767                        buffers,
768                        dtype,
769                        validity.execute(ctx)?,
770                    )
771                })))
772            }
773            Canonical::List(l) => {
774                let zctl = l.is_zero_copy_to_list();
775                let ListViewDataParts {
776                    elements,
777                    offsets,
778                    sizes,
779                    validity,
780                    ..
781                } = l.into_data_parts();
782                Ok(RecursiveCanonical(Canonical::List(unsafe {
783                    ListViewArray::new_unchecked(
784                        elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
785                        offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
786                        sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
787                        validity.execute(ctx)?,
788                    )
789                    .with_zero_copy_to_list(zctl)
790                })))
791            }
792            Canonical::FixedSizeList(fsl) => {
793                let list_size = fsl.list_size();
794                let len = fsl.len();
795                let parts = fsl.into_data_parts();
796                let elements = parts.elements;
797                let validity = parts.validity;
798                Ok(RecursiveCanonical(Canonical::FixedSizeList(
799                    FixedSizeListArray::new(
800                        elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
801                        list_size,
802                        validity.execute(ctx)?,
803                        len,
804                    ),
805                )))
806            }
807            Canonical::Struct(st) => {
808                let len = st.len();
809                let StructDataParts {
810                    struct_fields,
811                    fields,
812                    validity,
813                } = st.into_data_parts();
814                let executed_fields = fields
815                    .iter()
816                    .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
817                    .collect::<VortexResult<Arc<[_]>>>()?;
818
819                Ok(RecursiveCanonical(Canonical::Struct(unsafe {
820                    StructArray::new_unchecked(
821                        executed_fields,
822                        struct_fields,
823                        len,
824                        validity.execute(ctx)?,
825                    )
826                })))
827            }
828            Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
829                ExtensionArray::new(
830                    ext.ext_dtype().clone(),
831                    ext.storage_array()
832                        .clone()
833                        .execute::<RecursiveCanonical>(ctx)?
834                        .0
835                        .into_array(),
836                ),
837            ))),
838            Canonical::Variant(variant) => {
839                let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
840                let shredded = variant
841                    .shredded()
842                    .map(|shredded| {
843                        if shredded.is::<Variant>() {
844                            recursively_canonicalize_slots(shredded, ctx)
845                        } else {
846                            shredded
847                                .clone()
848                                .execute::<RecursiveCanonical>(ctx)
849                                .map(|canonical| canonical.0.into_array())
850                        }
851                    })
852                    .transpose()?;
853                Ok(RecursiveCanonical(Canonical::Variant(
854                    VariantArray::try_new(core_storage, shredded)?,
855                )))
856            }
857        }
858    }
859}
860
861/// Execute a primitive typed array into a buffer of native values, assuming all values are valid.
862///
863/// # Errors
864///
865/// Returns a `VortexError` if the array is not all-valid (has any nulls).
866impl<T: NativePType> Executable for Buffer<T> {
867    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
868        let array = PrimitiveArray::execute(array, ctx)?;
869        vortex_ensure!(
870            matches!(
871                array.validity()?,
872                Validity::NonNullable | Validity::AllValid
873            ),
874            "Cannot execute to native buffer: array is not all-valid."
875        );
876        Ok(array.into_buffer())
877    }
878}
879
880/// Execute the array to canonical form and unwrap as a [`PrimitiveArray`].
881///
882/// This will panic if the array's dtype is not primitive.
883impl Executable for PrimitiveArray {
884    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
885        match array.try_downcast::<Primitive>() {
886            Ok(primitive) => Ok(primitive),
887            Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
888        }
889    }
890}
891
892/// Execute the array to canonical form and unwrap as a [`BoolArray`].
893///
894/// This will panic if the array's dtype is not bool.
895impl Executable for BoolArray {
896    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
897        match array.try_downcast::<Bool>() {
898            Ok(bool_array) => Ok(bool_array),
899            Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
900        }
901    }
902}
903
904/// Execute the array to a [`BitBuffer`], aka a non-nullable  [`BoolArray`].
905///
906/// This will panic if the array's dtype is not non-nullable bool.
907impl Executable for BitBuffer {
908    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
909        let bool = BoolArray::execute(array, ctx)?;
910        assert!(
911            !bool.dtype().is_nullable(),
912            "bit buffer execute only works with non-nullable bool arrays"
913        );
914        Ok(bool.into_bit_buffer())
915    }
916}
917
918/// Execute the array to canonical form and unwrap as a [`NullArray`].
919///
920/// This will panic if the array's dtype is not null.
921impl Executable for NullArray {
922    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
923        match array.try_downcast::<Null>() {
924            Ok(null_array) => Ok(null_array),
925            Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
926        }
927    }
928}
929
930/// Execute the array to canonical form and unwrap as a [`VarBinViewArray`].
931///
932/// This will panic if the array's dtype is not utf8 or binary.
933impl Executable for VarBinViewArray {
934    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
935        match array.try_downcast::<VarBinView>() {
936            Ok(varbinview) => Ok(varbinview),
937            Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
938        }
939    }
940}
941
942/// Execute the array to canonical form and unwrap as an [`ExtensionArray`].
943///
944/// This will panic if the array's dtype is not an extension type.
945impl Executable for ExtensionArray {
946    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
947        match array.try_downcast::<Extension>() {
948            Ok(ext_array) => Ok(ext_array),
949            Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
950        }
951    }
952}
953
954/// Execute the array to canonical form and unwrap as a [`DecimalArray`].
955///
956/// This will panic if the array's dtype is not decimal.
957impl Executable for DecimalArray {
958    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
959        match array.try_downcast::<Decimal>() {
960            Ok(decimal) => Ok(decimal),
961            Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
962        }
963    }
964}
965
966/// Execute the array to canonical form and unwrap as a [`ListViewArray`].
967///
968/// This will panic if the array's dtype is not list.
969impl Executable for ListViewArray {
970    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
971        match array.try_downcast::<ListView>() {
972            Ok(list) => Ok(list),
973            Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
974        }
975    }
976}
977
978/// Execute the array to canonical form and unwrap as a [`FixedSizeListArray`].
979///
980/// This will panic if the array's dtype is not fixed size list.
981impl Executable for FixedSizeListArray {
982    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
983        match array.try_downcast::<FixedSizeList>() {
984            Ok(fsl) => Ok(fsl),
985            Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
986        }
987    }
988}
989
990/// Execute the array to canonical form and unwrap as a [`StructArray`].
991///
992/// This will panic if the array's dtype is not struct.
993impl Executable for StructArray {
994    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
995        match array.try_downcast::<Struct>() {
996            Ok(struct_array) => Ok(struct_array),
997            Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
998        }
999    }
1000}
1001
1002/// Execute the array to canonical form and unwrap as a [`VariantArray`].
1003///
1004/// This will panic if the array's dtype is not variant.
1005impl Executable for VariantArray {
1006    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
1007        match array.try_downcast::<Variant>() {
1008            Ok(variant_array) => Ok(variant_array),
1009            Err(array) => match Canonical::execute(array, ctx)? {
1010                Canonical::Variant(variant_array) => Ok(variant_array),
1011                canonical => vortex_panic!("Cannot unwrap VariantArray from {:?}", canonical),
1012            },
1013        }
1014    }
1015}
1016
1017/// A view into a canonical array type.
1018///
1019/// Uses `ArrayView<V>` because these are obtained by
1020/// downcasting through the `Matcher` trait which returns `ArrayView<V>`.
1021#[derive(Debug, Clone, Copy)]
1022pub enum CanonicalView<'a> {
1023    Null(ArrayView<'a, Null>),
1024    Bool(ArrayView<'a, Bool>),
1025    Primitive(ArrayView<'a, Primitive>),
1026    Decimal(ArrayView<'a, Decimal>),
1027    VarBinView(ArrayView<'a, VarBinView>),
1028    List(ArrayView<'a, ListView>),
1029    FixedSizeList(ArrayView<'a, FixedSizeList>),
1030    Struct(ArrayView<'a, Struct>),
1031    Extension(ArrayView<'a, Extension>),
1032    Variant(ArrayView<'a, Variant>),
1033}
1034
1035impl From<CanonicalView<'_>> for Canonical {
1036    fn from(value: CanonicalView<'_>) -> Self {
1037        match value {
1038            CanonicalView::Null(a) => Canonical::Null(a.into_owned()),
1039            CanonicalView::Bool(a) => Canonical::Bool(a.into_owned()),
1040            CanonicalView::Primitive(a) => Canonical::Primitive(a.into_owned()),
1041            CanonicalView::Decimal(a) => Canonical::Decimal(a.into_owned()),
1042            CanonicalView::VarBinView(a) => Canonical::VarBinView(a.into_owned()),
1043            CanonicalView::List(a) => Canonical::List(a.into_owned()),
1044            CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.into_owned()),
1045            CanonicalView::Struct(a) => Canonical::Struct(a.into_owned()),
1046            CanonicalView::Extension(a) => Canonical::Extension(a.into_owned()),
1047            CanonicalView::Variant(a) => Canonical::Variant(a.into_owned()),
1048        }
1049    }
1050}
1051
1052impl CanonicalView<'_> {
1053    /// Convert to a type-erased [`ArrayRef`].
1054    pub fn to_array_ref(&self) -> ArrayRef {
1055        match self {
1056            CanonicalView::Null(a) => a.array().clone(),
1057            CanonicalView::Bool(a) => a.array().clone(),
1058            CanonicalView::Primitive(a) => a.array().clone(),
1059            CanonicalView::Decimal(a) => a.array().clone(),
1060            CanonicalView::VarBinView(a) => a.array().clone(),
1061            CanonicalView::List(a) => a.array().clone(),
1062            CanonicalView::FixedSizeList(a) => a.array().clone(),
1063            CanonicalView::Struct(a) => a.array().clone(),
1064            CanonicalView::Extension(a) => a.array().clone(),
1065            CanonicalView::Variant(a) => a.array().clone(),
1066        }
1067    }
1068}
1069
1070/// A matcher for any canonical array type.
1071pub struct AnyCanonical;
1072impl Matcher for AnyCanonical {
1073    type Match<'a> = CanonicalView<'a>;
1074
1075    #[inline]
1076    fn matches(array: &ArrayRef) -> bool {
1077        array.is::<Null>()
1078            || array.is::<Bool>()
1079            || array.is::<Primitive>()
1080            || array.is::<Decimal>()
1081            || array.is::<Struct>()
1082            || array.is::<ListView>()
1083            || array.is::<FixedSizeList>()
1084            || array.is::<VarBinView>()
1085            || array.is::<Variant>()
1086            || array.is::<Extension>()
1087    }
1088
1089    #[inline]
1090    fn try_match(array: &ArrayRef) -> Option<Self::Match<'_>> {
1091        if let Some(a) = array.as_opt::<Null>() {
1092            Some(CanonicalView::Null(a))
1093        } else if let Some(a) = array.as_opt::<Bool>() {
1094            Some(CanonicalView::Bool(a))
1095        } else if let Some(a) = array.as_opt::<Primitive>() {
1096            Some(CanonicalView::Primitive(a))
1097        } else if let Some(a) = array.as_opt::<Decimal>() {
1098            Some(CanonicalView::Decimal(a))
1099        } else if let Some(a) = array.as_opt::<Struct>() {
1100            Some(CanonicalView::Struct(a))
1101        } else if let Some(a) = array.as_opt::<ListView>() {
1102            Some(CanonicalView::List(a))
1103        } else if let Some(a) = array.as_opt::<FixedSizeList>() {
1104            Some(CanonicalView::FixedSizeList(a))
1105        } else if let Some(a) = array.as_opt::<VarBinView>() {
1106            Some(CanonicalView::VarBinView(a))
1107        } else if let Some(a) = array.as_opt::<Variant>() {
1108            Some(CanonicalView::Variant(a))
1109        } else {
1110            array.as_opt::<Extension>().map(CanonicalView::Extension)
1111        }
1112    }
1113}
1114
1115#[cfg(test)]
1116mod test {
1117    use std::sync::Arc;
1118    use std::sync::LazyLock;
1119
1120    use arrow_array::Array as ArrowArray;
1121    use arrow_array::ArrayRef as ArrowArrayRef;
1122    use arrow_array::ListArray as ArrowListArray;
1123    use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
1124    use arrow_array::StringArray;
1125    use arrow_array::StringViewArray;
1126    use arrow_array::StructArray as ArrowStructArray;
1127    use arrow_array::cast::AsArray;
1128    use arrow_array::types::Int32Type;
1129    use arrow_array::types::Int64Type;
1130    use arrow_array::types::UInt64Type;
1131    use arrow_buffer::NullBufferBuilder;
1132    use arrow_buffer::OffsetBuffer;
1133    use arrow_schema::DataType;
1134    use arrow_schema::Field;
1135    use vortex_buffer::buffer;
1136    use vortex_error::VortexResult;
1137    use vortex_error::vortex_err;
1138    use vortex_session::VortexSession;
1139
1140    use crate::ArrayRef;
1141    use crate::Canonical;
1142    use crate::CanonicalValidity;
1143    use crate::IntoArray;
1144    use crate::VortexSessionExecute;
1145    use crate::arrays::Constant;
1146    use crate::arrays::ConstantArray;
1147    use crate::arrays::Primitive;
1148    use crate::arrays::Struct;
1149    use crate::arrays::Variant;
1150    use crate::arrays::VariantArray;
1151    use crate::arrays::struct_::StructArrayExt;
1152    use crate::arrays::variant::VariantArrayExt;
1153    use crate::arrow::ArrowSessionExt;
1154    use crate::arrow::FromArrowArray;
1155    use crate::canonical::StructArray;
1156    use crate::dtype::Nullability;
1157    use crate::scalar::Scalar;
1158    use crate::session::ArraySession;
1159
1160    /// A shared session for these canonical tests, used to create execution contexts.
1161    static SESSION: LazyLock<VortexSession> =
1162        LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
1163
1164    fn variant_core_storage(len: usize) -> ArrayRef {
1165        ConstantArray::new(
1166            Scalar::variant(Scalar::primitive(1i32, Nullability::NonNullable)),
1167            len,
1168        )
1169        .into_array()
1170    }
1171
1172    #[test]
1173    fn canonical_validity_canonicalizes_variant_shredded_physical_slots() -> VortexResult<()> {
1174        let len = 2;
1175        let nested_shredded =
1176            StructArray::try_from_iter([("value", ConstantArray::new(10i32, len).into_array())])?;
1177        let inner_variant = VariantArray::try_new(
1178            variant_core_storage(len),
1179            Some(nested_shredded.into_array()),
1180        )?;
1181        let outer_variant =
1182            VariantArray::try_new(variant_core_storage(len), Some(inner_variant.into_array()))?;
1183
1184        let mut ctx = SESSION.create_execution_ctx();
1185        let Canonical::Variant(canonical) = outer_variant
1186            .into_array()
1187            .execute::<CanonicalValidity>(&mut ctx)?
1188            .0
1189        else {
1190            return Err(vortex_err!("expected canonical variant"));
1191        };
1192
1193        let nested_variant = canonical
1194            .shredded()
1195            .and_then(|shredded| shredded.as_opt::<Variant>())
1196            .ok_or_else(|| vortex_err!("expected nested variant shredded child"))?;
1197        let nested_struct = nested_variant
1198            .shredded()
1199            .and_then(|shredded| shredded.as_opt::<Struct>())
1200            .ok_or_else(|| vortex_err!("expected nested struct shredded child"))?;
1201        let value = nested_struct.unmasked_field_by_name("value")?;
1202
1203        assert!(value.is::<Primitive>());
1204        assert!(!value.is::<Constant>());
1205
1206        Ok(())
1207    }
1208
1209    #[test]
1210    fn test_canonicalize_nested_struct() {
1211        let mut ctx = SESSION.create_execution_ctx();
1212        // Create a struct array with multiple internal components.
1213        let nested_struct_array = StructArray::from_fields(&[
1214            ("a", buffer![1u64].into_array()),
1215            (
1216                "b",
1217                StructArray::from_fields(&[(
1218                    "inner_a",
1219                    // The nested struct contains a ConstantArray representing the primitive array
1220                    //   [100i64]
1221                    // ConstantArray is not a canonical type, so converting `into_arrow()` should
1222                    // map this to the nearest canonical type (PrimitiveArray).
1223                    ConstantArray::new(100i64, 1).into_array(),
1224                )])
1225                .unwrap()
1226                .into_array(),
1227            ),
1228        ])
1229        .unwrap();
1230
1231        let arrow_struct = SESSION
1232            .arrow()
1233            .execute_arrow(nested_struct_array.into_array(), None, &mut ctx)
1234            .unwrap()
1235            .as_any()
1236            .downcast_ref::<ArrowStructArray>()
1237            .cloned()
1238            .unwrap();
1239
1240        assert!(
1241            arrow_struct
1242                .column(0)
1243                .as_any()
1244                .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
1245                .is_some()
1246        );
1247
1248        let inner_struct = Arc::clone(arrow_struct.column(1))
1249            .as_any()
1250            .downcast_ref::<ArrowStructArray>()
1251            .cloned()
1252            .unwrap();
1253
1254        let inner_a = inner_struct
1255            .column(0)
1256            .as_any()
1257            .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1258        assert!(inner_a.is_some());
1259
1260        assert_eq!(
1261            inner_a.cloned().unwrap(),
1262            ArrowPrimitiveArray::from_iter([100i64])
1263        );
1264    }
1265
1266    #[test]
1267    fn roundtrip_struct() {
1268        let mut ctx = SESSION.create_execution_ctx();
1269        let mut nulls = NullBufferBuilder::new(6);
1270        nulls.append_n_non_nulls(4);
1271        nulls.append_null();
1272        nulls.append_non_null();
1273        let names = Arc::new(StringViewArray::from_iter(vec![
1274            Some("Joseph"),
1275            None,
1276            Some("Angela"),
1277            Some("Mikhail"),
1278            None,
1279            None,
1280        ]));
1281        let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1282            Some(25),
1283            Some(31),
1284            None,
1285            Some(57),
1286            None,
1287            None,
1288        ]));
1289
1290        let arrow_struct = ArrowStructArray::new(
1291            vec![
1292                Arc::new(Field::new("name", DataType::Utf8View, true)),
1293                Arc::new(Field::new("age", DataType::Int32, true)),
1294            ]
1295            .into(),
1296            vec![names, ages],
1297            nulls.finish(),
1298        );
1299
1300        let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1301        let vortex_struct = SESSION
1302            .arrow()
1303            .execute_arrow(vortex_struct, None, &mut ctx)
1304            .unwrap();
1305        assert_eq!(&arrow_struct, vortex_struct.as_struct());
1306    }
1307
1308    #[test]
1309    fn roundtrip_list() {
1310        let mut ctx = SESSION.create_execution_ctx();
1311        let names = Arc::new(StringArray::from_iter(vec![
1312            Some("Joseph"),
1313            Some("Angela"),
1314            Some("Mikhail"),
1315        ]));
1316
1317        let arrow_list = ArrowListArray::new(
1318            Arc::new(Field::new_list_field(DataType::Utf8, true)),
1319            OffsetBuffer::from_lengths(vec![0, 2, 1]),
1320            names,
1321            None,
1322        );
1323        let list_data_type = arrow_list.data_type();
1324        let list_field = Field::new(String::new(), list_data_type.clone(), true);
1325
1326        let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1327
1328        let rt_arrow_list = SESSION
1329            .arrow()
1330            .execute_arrow(vortex_list, Some(&list_field), &mut ctx)
1331            .unwrap();
1332
1333        assert_eq!(
1334            (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1335            rt_arrow_list.as_ref()
1336        );
1337    }
1338}