Skip to main content

vortex_array/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Encodings that enable zero-copy sharing of data with Arrow.
5
6use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::ArrayRef;
16use crate::ArraySlots;
17use crate::Executable;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::array::ArrayView;
21use crate::array::child_to_validity;
22use crate::arrays::Bool;
23use crate::arrays::BoolArray;
24use crate::arrays::Decimal;
25use crate::arrays::DecimalArray;
26use crate::arrays::Extension;
27use crate::arrays::ExtensionArray;
28use crate::arrays::FixedSizeList;
29use crate::arrays::FixedSizeListArray;
30use crate::arrays::ListView;
31use crate::arrays::ListViewArray;
32use crate::arrays::Null;
33use crate::arrays::NullArray;
34use crate::arrays::Primitive;
35use crate::arrays::PrimitiveArray;
36use crate::arrays::Struct;
37use crate::arrays::StructArray;
38use crate::arrays::VarBinView;
39use crate::arrays::VarBinViewArray;
40use crate::arrays::Variant;
41use crate::arrays::VariantArray;
42use crate::arrays::bool::BoolDataParts;
43use crate::arrays::decimal::DecimalDataParts;
44use crate::arrays::extension::ExtensionArrayExt;
45use crate::arrays::fixed_size_list::FixedSizeListArrayExt;
46use crate::arrays::listview::ListViewDataParts;
47use crate::arrays::listview::ListViewRebuildMode;
48use crate::arrays::primitive::PrimitiveDataParts;
49use crate::arrays::struct_::StructDataParts;
50use crate::arrays::varbinview::VarBinViewDataParts;
51use crate::arrays::variant::VariantArrayExt;
52use crate::dtype::DType;
53use crate::dtype::NativePType;
54use crate::dtype::Nullability;
55use crate::dtype::PType;
56use crate::match_each_decimal_value_type;
57use crate::match_each_native_ptype;
58use crate::matcher::Matcher;
59use crate::validity::Validity;
60
61/// An enum capturing the default uncompressed encodings for each [Vortex type](DType).
62///
63/// Any array can be decoded into canonical form via the `to_canonical`
64/// trait method. This is the simplest encoding for a type, and will not be compressed but may
65/// contain compressed child arrays.
66///
67/// Canonical form is useful for doing type-specific compute where you need to know that all
68/// elements are laid out decompressed and contiguous in memory.
69///
70/// Each `Canonical` variant has a corresponding [`DType`] variant, with the notable exception of
71/// [`Canonical::VarBinView`], which is the canonical encoding for both [`DType::Utf8`] and
72/// [`DType::Binary`].
73///
74/// # Laziness
75///
76/// Canonical form is not recursive, so while a `StructArray` is the canonical format for any
77/// `Struct` type, individual column child arrays may still be compressed. This allows
78/// compute over Vortex arrays to push decoding as late as possible, and ideally many child arrays
79/// never need to be decoded into canonical form at all depending on the compute.
80///
81/// # Arrow interoperability
82///
83/// All of the Vortex canonical encodings have an equivalent Arrow encoding that can be built
84/// zero-copy, and the corresponding Arrow array types can also be built directly.
85///
86/// The full list of canonical types and their equivalent Arrow array types are:
87///
88/// * `NullArray`: [`arrow_array::NullArray`]
89/// * `BoolArray`: [`arrow_array::BooleanArray`]
90/// * `PrimitiveArray`: [`arrow_array::PrimitiveArray`]
91/// * `DecimalArray`: [`arrow_array::Decimal128Array`] and [`arrow_array::Decimal256Array`]
92/// * `VarBinViewArray`: [`arrow_array::GenericByteViewArray`]
93/// * `ListViewArray`: [`arrow_array::ListViewArray`]
94/// * `FixedSizeListArray`: [`arrow_array::FixedSizeListArray`]
95/// * `StructArray`: [`arrow_array::StructArray`]
96///
97/// Vortex uses a logical type system, unlike Arrow which uses physical encodings for its types.
98/// As an example, there are at least six valid physical encodings for a `Utf8` array. This can
99/// create ambiguity.
100/// Thus, if you receive an Arrow array, compress it using Vortex, and then
101/// decompress it later to pass to a compute kernel, there are multiple suitable Arrow array
102/// variants to hold the data.
103///
104/// To disambiguate, we choose a canonical physical encoding for every Vortex [`DType`], which
105/// will correspond to an arrow-rs [`arrow_schema::DataType`].
106///
107/// # Views support
108///
109/// Binary and String views, also known as "German strings" are a better encoding format for
110/// nearly all use-cases. Variable-length binary views are part of the Apache Arrow spec, and are
111/// fully supported by the Datafusion query engine. We use them as our canonical string encoding
112/// for all `Utf8` and `Binary` typed arrays in Vortex. They provide considerably faster filter
113/// execution than the core `StringArray` and `BinaryArray` types, at the expense of potentially
114/// needing [garbage collection][arrow_array::GenericByteViewArray::gc] to clear unreferenced items
115/// from memory.
116///
117/// # For Developers
118///
119/// If you add another variant to this enum, make sure to update `dyn Array::is_canonical`,
120/// and the fuzzer in `fuzz/fuzz_targets/array_ops.rs`.
121#[derive(Debug, Clone)]
122pub enum Canonical {
123    Null(NullArray),
124    Bool(BoolArray),
125    Primitive(PrimitiveArray),
126    Decimal(DecimalArray),
127    VarBinView(VarBinViewArray),
128    List(ListViewArray),
129    FixedSizeList(FixedSizeListArray),
130    Struct(StructArray),
131    Extension(ExtensionArray),
132    Variant(VariantArray),
133}
134
135/// Match on every canonical variant and evaluate a code block on all variants
136macro_rules! match_each_canonical {
137    ($self:expr, | $ident:ident | $eval:expr) => {{
138        match $self {
139            Canonical::Null($ident) => $eval,
140            Canonical::Bool($ident) => $eval,
141            Canonical::Primitive($ident) => $eval,
142            Canonical::Decimal($ident) => $eval,
143            Canonical::VarBinView($ident) => $eval,
144            Canonical::List($ident) => $eval,
145            Canonical::FixedSizeList($ident) => $eval,
146            Canonical::Struct($ident) => $eval,
147            Canonical::Variant($ident) => $eval,
148            Canonical::Extension($ident) => $eval,
149        }
150    }};
151}
152
153impl Canonical {
154    /// Create an empty canonical array of the given dtype.
155    pub fn empty(dtype: &DType) -> Canonical {
156        match dtype {
157            DType::Null => Canonical::Null(NullArray::new(0)),
158            DType::Bool(n) => Canonical::Bool(unsafe {
159                BoolArray::new_unchecked(BitBuffer::empty(), Validity::from(n))
160            }),
161            DType::Primitive(ptype, n) => {
162                match_each_native_ptype!(ptype, |P| {
163                    Canonical::Primitive(unsafe {
164                        PrimitiveArray::new_unchecked(Buffer::<P>::empty(), Validity::from(n))
165                    })
166                })
167            }
168            DType::Decimal(decimal_type, n) => {
169                match_each_decimal_value_type!(
170                    DecimalType::smallest_decimal_value_type(decimal_type),
171                    |D| {
172                        Canonical::Decimal(unsafe {
173                            DecimalArray::new_unchecked::<D>(
174                                Buffer::empty(),
175                                *decimal_type,
176                                Validity::from(n),
177                            )
178                        })
179                    }
180                )
181            }
182            DType::Utf8(n) => Canonical::VarBinView(unsafe {
183                VarBinViewArray::new_unchecked(
184                    Buffer::empty(),
185                    Arc::new([]),
186                    dtype.clone(),
187                    Validity::from(n),
188                )
189            }),
190            DType::Binary(n) => Canonical::VarBinView(unsafe {
191                VarBinViewArray::new_unchecked(
192                    Buffer::empty(),
193                    Arc::new([]),
194                    dtype.clone(),
195                    Validity::from(n),
196                )
197            }),
198            DType::List(dtype, n) => Canonical::List(unsafe {
199                ListViewArray::new_unchecked(
200                    Canonical::empty(dtype).into_array(),
201                    Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
202                        .into_array(),
203                    Canonical::empty(&DType::Primitive(PType::U8, Nullability::NonNullable))
204                        .into_array(),
205                    Validity::from(n),
206                )
207                // An empty list view is trivially copyable to a list.
208                .with_zero_copy_to_list(true)
209            }),
210            DType::FixedSizeList(elem_dtype, list_size, null) => Canonical::FixedSizeList(unsafe {
211                FixedSizeListArray::new_unchecked(
212                    Canonical::empty(elem_dtype).into_array(),
213                    *list_size,
214                    Validity::from(null),
215                    0,
216                )
217            }),
218            DType::Struct(struct_dtype, n) => Canonical::Struct(unsafe {
219                StructArray::new_unchecked(
220                    struct_dtype
221                        .fields()
222                        .map(|f| Canonical::empty(&f).into_array())
223                        .collect::<Arc<[_]>>(),
224                    struct_dtype.clone(),
225                    0,
226                    Validity::from(n),
227                )
228            }),
229            DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
230            DType::Variant(_) => {
231                vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant")
232            }
233            DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new(
234                ext_dtype.clone(),
235                Canonical::empty(ext_dtype.storage_dtype()).into_array(),
236            )),
237        }
238    }
239
240    pub fn len(&self) -> usize {
241        match_each_canonical!(self, |arr| arr.len())
242    }
243
244    pub fn dtype(&self) -> &DType {
245        match_each_canonical!(self, |arr| arr.dtype())
246    }
247
248    pub fn is_empty(&self) -> bool {
249        match_each_canonical!(self, |arr| arr.is_empty())
250    }
251}
252
253impl Canonical {
254    /// Performs a (potentially expensive) compaction operation on the array before it is complete.
255    ///
256    /// This is mostly relevant for the variable-length types such as Utf8, Binary or List where
257    /// they can accumulate wasted space after slicing and taking operations.
258    ///
259    /// This operation is very expensive and can result in things like allocations, full-scans
260    /// and copy operations.
261    pub fn compact(&self) -> VortexResult<Canonical> {
262        match self {
263            Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
264            Canonical::List(array) => Ok(Canonical::List(
265                array.rebuild(ListViewRebuildMode::TrimElements)?,
266            )),
267            _ => Ok(self.clone()),
268        }
269    }
270}
271
272// Unwrap canonical type back down to specialized type.
273impl Canonical {
274    pub fn as_null(&self) -> &NullArray {
275        if let Canonical::Null(a) = self {
276            a
277        } else {
278            vortex_panic!("Cannot get NullArray from {:?}", &self)
279        }
280    }
281
282    pub fn into_null(self) -> NullArray {
283        if let Canonical::Null(a) = self {
284            a
285        } else {
286            vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
287        }
288    }
289
290    pub fn as_bool(&self) -> &BoolArray {
291        if let Canonical::Bool(a) = self {
292            a
293        } else {
294            vortex_panic!("Cannot get BoolArray from {:?}", &self)
295        }
296    }
297
298    pub fn into_bool(self) -> BoolArray {
299        if let Canonical::Bool(a) = self {
300            a
301        } else {
302            vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
303        }
304    }
305
306    pub fn as_primitive(&self) -> &PrimitiveArray {
307        if let Canonical::Primitive(a) = self {
308            a
309        } else {
310            vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
311        }
312    }
313
314    pub fn into_primitive(self) -> PrimitiveArray {
315        if let Canonical::Primitive(a) = self {
316            a
317        } else {
318            vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
319        }
320    }
321
322    pub fn as_decimal(&self) -> &DecimalArray {
323        if let Canonical::Decimal(a) = self {
324            a
325        } else {
326            vortex_panic!("Cannot get DecimalArray from {:?}", &self)
327        }
328    }
329
330    pub fn into_decimal(self) -> DecimalArray {
331        if let Canonical::Decimal(a) = self {
332            a
333        } else {
334            vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
335        }
336    }
337
338    pub fn as_varbinview(&self) -> &VarBinViewArray {
339        if let Canonical::VarBinView(a) = self {
340            a
341        } else {
342            vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
343        }
344    }
345
346    pub fn into_varbinview(self) -> VarBinViewArray {
347        if let Canonical::VarBinView(a) = self {
348            a
349        } else {
350            vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
351        }
352    }
353
354    pub fn as_listview(&self) -> &ListViewArray {
355        if let Canonical::List(a) = self {
356            a
357        } else {
358            vortex_panic!("Cannot get ListArray from {:?}", &self)
359        }
360    }
361
362    pub fn into_listview(self) -> ListViewArray {
363        if let Canonical::List(a) = self {
364            a
365        } else {
366            vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
367        }
368    }
369
370    pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
371        if let Canonical::FixedSizeList(a) = self {
372            a
373        } else {
374            vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
375        }
376    }
377
378    pub fn into_fixed_size_list(self) -> FixedSizeListArray {
379        if let Canonical::FixedSizeList(a) = self {
380            a
381        } else {
382            vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
383        }
384    }
385
386    pub fn as_struct(&self) -> &StructArray {
387        if let Canonical::Struct(a) = self {
388            a
389        } else {
390            vortex_panic!("Cannot get StructArray from {:?}", &self)
391        }
392    }
393
394    pub fn into_struct(self) -> StructArray {
395        if let Canonical::Struct(a) = self {
396            a
397        } else {
398            vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
399        }
400    }
401
402    pub fn as_extension(&self) -> &ExtensionArray {
403        if let Canonical::Extension(a) = self {
404            a
405        } else {
406            vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
407        }
408    }
409
410    pub fn into_extension(self) -> ExtensionArray {
411        if let Canonical::Extension(a) = self {
412            a
413        } else {
414            vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
415        }
416    }
417}
418
419impl IntoArray for Canonical {
420    fn into_array(self) -> ArrayRef {
421        match_each_canonical!(self, |arr| arr.into_array())
422    }
423}
424
425/// Trait for types that can be converted from an owned type into an owned array variant.
426///
427/// # Canonicalization
428///
429/// This trait has a blanket implementation for all types implementing [ToCanonical].
430#[deprecated(note = "use `array.execute::<T>(ctx)` instead")]
431pub trait ToCanonical {
432    /// Canonicalize into a [`NullArray`] if the target is [`Null`](DType::Null) typed.
433    #[deprecated(note = "use `array.execute::<NullArray>(ctx)` instead")]
434    fn to_null(&self) -> NullArray;
435
436    /// Canonicalize into a [`BoolArray`] if the target is [`Bool`](DType::Bool) typed.
437    #[deprecated(note = "use `array.execute::<BoolArray>(ctx)` instead")]
438    fn to_bool(&self) -> BoolArray;
439
440    /// Canonicalize into a [`PrimitiveArray`] if the target is [`Primitive`](DType::Primitive)
441    /// typed.
442    #[deprecated(note = "use `array.execute::<PrimitiveArray>(ctx)` instead")]
443    fn to_primitive(&self) -> PrimitiveArray;
444
445    /// Canonicalize into a [`DecimalArray`] if the target is [`Decimal`](DType::Decimal)
446    /// typed.
447    #[deprecated(note = "use `array.execute::<DecimalArray>(ctx)` instead")]
448    fn to_decimal(&self) -> DecimalArray;
449
450    /// Canonicalize into a [`StructArray`] if the target is [`Struct`](DType::Struct) typed.
451    #[deprecated(note = "use `array.execute::<StructArray>(ctx)` instead")]
452    fn to_struct(&self) -> StructArray;
453
454    /// Canonicalize into a [`ListViewArray`] if the target is [`List`](DType::List) typed.
455    #[deprecated(note = "use `array.execute::<ListViewArray>(ctx)` instead")]
456    fn to_listview(&self) -> ListViewArray;
457
458    /// Canonicalize into a [`FixedSizeListArray`] if the target is [`List`](DType::FixedSizeList)
459    /// typed.
460    #[deprecated(note = "use `array.execute::<FixedSizeListArray>(ctx)` instead")]
461    fn to_fixed_size_list(&self) -> FixedSizeListArray;
462
463    /// Canonicalize into a [`VarBinViewArray`] if the target is [`Utf8`](DType::Utf8)
464    /// or [`Binary`](DType::Binary) typed.
465    #[deprecated(note = "use `array.execute::<VarBinViewArray>(ctx)` instead")]
466    fn to_varbinview(&self) -> VarBinViewArray;
467
468    /// Canonicalize into an [`ExtensionArray`] if the array is [`Extension`](DType::Extension)
469    /// typed.
470    #[deprecated(note = "use `array.execute::<ExtensionArray>(ctx)` instead")]
471    fn to_extension(&self) -> ExtensionArray;
472}
473
474// Blanket impl for all Array encodings.
475#[expect(deprecated)]
476impl ToCanonical for ArrayRef {
477    fn to_null(&self) -> NullArray {
478        #[expect(deprecated)]
479        let result = self.to_canonical().vortex_expect("to_canonical failed");
480        result.into_null()
481    }
482
483    fn to_bool(&self) -> BoolArray {
484        #[expect(deprecated)]
485        let result = self.to_canonical().vortex_expect("to_canonical failed");
486        result.into_bool()
487    }
488
489    fn to_primitive(&self) -> PrimitiveArray {
490        #[expect(deprecated)]
491        let result = self.to_canonical().vortex_expect("to_canonical failed");
492        result.into_primitive()
493    }
494
495    fn to_decimal(&self) -> DecimalArray {
496        #[expect(deprecated)]
497        let result = self.to_canonical().vortex_expect("to_canonical failed");
498        result.into_decimal()
499    }
500
501    fn to_struct(&self) -> StructArray {
502        #[expect(deprecated)]
503        let result = self.to_canonical().vortex_expect("to_canonical failed");
504        result.into_struct()
505    }
506
507    fn to_listview(&self) -> ListViewArray {
508        #[expect(deprecated)]
509        let result = self.to_canonical().vortex_expect("to_canonical failed");
510        result.into_listview()
511    }
512
513    fn to_fixed_size_list(&self) -> FixedSizeListArray {
514        #[expect(deprecated)]
515        let result = self.to_canonical().vortex_expect("to_canonical failed");
516        result.into_fixed_size_list()
517    }
518
519    fn to_varbinview(&self) -> VarBinViewArray {
520        #[expect(deprecated)]
521        let result = self.to_canonical().vortex_expect("to_canonical failed");
522        result.into_varbinview()
523    }
524
525    fn to_extension(&self) -> ExtensionArray {
526        #[expect(deprecated)]
527        let result = self.to_canonical().vortex_expect("to_canonical failed");
528        result.into_extension()
529    }
530}
531
532impl From<Canonical> for ArrayRef {
533    fn from(value: Canonical) -> Self {
534        match_each_canonical!(value, |arr| arr.into_array())
535    }
536}
537
538/// Execute into [`Canonical`] by running `execute_until` with the [`AnyCanonical`] matcher.
539///
540/// Unlike executing into [`crate::Columnar`], this will fully expand constant arrays into their
541/// canonical form. Callers should prefer to execute into `Columnar` if they are able to optimize
542/// their use for constant arrays.
543impl Executable for Canonical {
544    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
545        let result = array.execute_until::<AnyCanonical>(ctx)?;
546        Ok(result
547            .as_opt::<AnyCanonical>()
548            .map(Canonical::from)
549            .vortex_expect("execute_until::<AnyCanonical> must return a canonical array"))
550    }
551}
552
553/// Recursively execute the array until it reaches canonical form along with its validity.
554///
555/// Callers should prefer to execute into `Columnar` instead of this specific target.
556/// This target is useful when preparing arrays for writing.
557pub struct CanonicalValidity(pub Canonical);
558
559impl Executable for CanonicalValidity {
560    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
561        match array.execute::<Canonical>(ctx)? {
562            n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
563            Canonical::Bool(b) => {
564                let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
565                let len = b.len();
566                let BoolDataParts { bits, offset, len } = b.into_data().into_parts(len);
567                Ok(CanonicalValidity(Canonical::Bool(
568                    BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
569                )))
570            }
571            Canonical::Primitive(p) => {
572                let PrimitiveDataParts {
573                    ptype,
574                    buffer,
575                    validity,
576                } = p.into_data_parts();
577                Ok(CanonicalValidity(Canonical::Primitive(unsafe {
578                    PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
579                })))
580            }
581            Canonical::Decimal(d) => {
582                let DecimalDataParts {
583                    decimal_dtype,
584                    values,
585                    values_type,
586                    validity,
587                } = d.into_data_parts();
588                Ok(CanonicalValidity(Canonical::Decimal(unsafe {
589                    DecimalArray::new_unchecked_handle(
590                        values,
591                        values_type,
592                        decimal_dtype,
593                        validity.execute(ctx)?,
594                    )
595                })))
596            }
597            Canonical::VarBinView(vbv) => {
598                let VarBinViewDataParts {
599                    dtype,
600                    buffers,
601                    views,
602                    validity,
603                } = vbv.into_data_parts();
604                Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
605                    VarBinViewArray::new_handle_unchecked(
606                        views,
607                        buffers,
608                        dtype,
609                        validity.execute(ctx)?,
610                    )
611                })))
612            }
613            Canonical::List(l) => {
614                let zctl = l.is_zero_copy_to_list();
615                let ListViewDataParts {
616                    elements,
617                    offsets,
618                    sizes,
619                    validity,
620                    ..
621                } = l.into_data_parts();
622                Ok(CanonicalValidity(Canonical::List(unsafe {
623                    ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
624                        .with_zero_copy_to_list(zctl)
625                })))
626            }
627            Canonical::FixedSizeList(fsl) => {
628                let list_size = fsl.list_size();
629                let len = fsl.len();
630                let parts = fsl.into_data_parts();
631                let elements = parts.elements;
632                let validity = parts.validity;
633                Ok(CanonicalValidity(Canonical::FixedSizeList(
634                    FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
635                )))
636            }
637            Canonical::Struct(st) => {
638                let len = st.len();
639                let StructDataParts {
640                    struct_fields,
641                    fields,
642                    validity,
643                } = st.into_data_parts();
644                Ok(CanonicalValidity(Canonical::Struct(unsafe {
645                    StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
646                })))
647            }
648            Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
649                ExtensionArray::new(
650                    ext.ext_dtype().clone(),
651                    ext.storage_array()
652                        .clone()
653                        .execute::<CanonicalValidity>(ctx)?
654                        .0
655                        .into_array(),
656                ),
657            ))),
658            Canonical::Variant(variant) => {
659                let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
660                let shredded = variant
661                    .shredded()
662                    .map(|shredded| {
663                        if shredded.is::<Variant>() {
664                            recursively_canonicalize_slots(shredded, ctx)
665                        } else {
666                            shredded
667                                .clone()
668                                .execute::<CanonicalValidity>(ctx)
669                                .map(|canonical| canonical.0.into_array())
670                        }
671                    })
672                    .transpose()?;
673                Ok(CanonicalValidity(Canonical::Variant(
674                    VariantArray::try_new(core_storage, shredded)?,
675                )))
676            }
677        }
678    }
679}
680
681/// Recursively execute the array until all of its children are canonical.
682///
683/// This method is useful to guarantee that all operators are fully executed,
684/// callers should prefer an execution target that's suitable for their use case instead of this one.
685pub struct RecursiveCanonical(pub Canonical);
686
687// TODO: Currently only used for Variant, in the future
688// can probably be used for more canonical types like Struct.
689fn recursively_canonicalize_slots(
690    array: &ArrayRef,
691    ctx: &mut ExecutionCtx,
692) -> VortexResult<ArrayRef> {
693    let slots = array
694        .slots()
695        .iter()
696        .map(|slot| {
697            slot.as_ref()
698                .map(|child| {
699                    child
700                        .clone()
701                        .execute::<RecursiveCanonical>(ctx)
702                        .map(|canonical| canonical.0.into_array())
703                })
704                .transpose()
705        })
706        .collect::<VortexResult<ArraySlots>>()?;
707    array.clone().with_slots(slots)
708}
709impl Executable for RecursiveCanonical {
710    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
711        match array.execute::<Canonical>(ctx)? {
712            n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
713            Canonical::Bool(b) => {
714                let validity = child_to_validity(b.slots()[0].as_ref(), b.dtype().nullability());
715                let len = b.len();
716                let BoolDataParts { bits, offset, len } = b.into_data().into_parts(len);
717                Ok(RecursiveCanonical(Canonical::Bool(
718                    BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
719                )))
720            }
721            Canonical::Primitive(p) => {
722                let PrimitiveDataParts {
723                    ptype,
724                    buffer,
725                    validity,
726                } = p.into_data_parts();
727                Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
728                    PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
729                })))
730            }
731            Canonical::Decimal(d) => {
732                let DecimalDataParts {
733                    decimal_dtype,
734                    values,
735                    values_type,
736                    validity,
737                } = d.into_data_parts();
738                Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
739                    DecimalArray::new_unchecked_handle(
740                        values,
741                        values_type,
742                        decimal_dtype,
743                        validity.execute(ctx)?,
744                    )
745                })))
746            }
747            Canonical::VarBinView(vbv) => {
748                let VarBinViewDataParts {
749                    dtype,
750                    buffers,
751                    views,
752                    validity,
753                } = vbv.into_data_parts();
754                Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
755                    VarBinViewArray::new_handle_unchecked(
756                        views,
757                        buffers,
758                        dtype,
759                        validity.execute(ctx)?,
760                    )
761                })))
762            }
763            Canonical::List(l) => {
764                let zctl = l.is_zero_copy_to_list();
765                let ListViewDataParts {
766                    elements,
767                    offsets,
768                    sizes,
769                    validity,
770                    ..
771                } = l.into_data_parts();
772                Ok(RecursiveCanonical(Canonical::List(unsafe {
773                    ListViewArray::new_unchecked(
774                        elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
775                        offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
776                        sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
777                        validity.execute(ctx)?,
778                    )
779                    .with_zero_copy_to_list(zctl)
780                })))
781            }
782            Canonical::FixedSizeList(fsl) => {
783                let list_size = fsl.list_size();
784                let len = fsl.len();
785                let parts = fsl.into_data_parts();
786                let elements = parts.elements;
787                let validity = parts.validity;
788                Ok(RecursiveCanonical(Canonical::FixedSizeList(
789                    FixedSizeListArray::new(
790                        elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
791                        list_size,
792                        validity.execute(ctx)?,
793                        len,
794                    ),
795                )))
796            }
797            Canonical::Struct(st) => {
798                let len = st.len();
799                let StructDataParts {
800                    struct_fields,
801                    fields,
802                    validity,
803                } = st.into_data_parts();
804                let executed_fields = fields
805                    .iter()
806                    .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
807                    .collect::<VortexResult<Arc<[_]>>>()?;
808
809                Ok(RecursiveCanonical(Canonical::Struct(unsafe {
810                    StructArray::new_unchecked(
811                        executed_fields,
812                        struct_fields,
813                        len,
814                        validity.execute(ctx)?,
815                    )
816                })))
817            }
818            Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
819                ExtensionArray::new(
820                    ext.ext_dtype().clone(),
821                    ext.storage_array()
822                        .clone()
823                        .execute::<RecursiveCanonical>(ctx)?
824                        .0
825                        .into_array(),
826                ),
827            ))),
828            Canonical::Variant(variant) => {
829                let core_storage = recursively_canonicalize_slots(variant.core_storage(), ctx)?;
830                let shredded = variant
831                    .shredded()
832                    .map(|shredded| {
833                        if shredded.is::<Variant>() {
834                            recursively_canonicalize_slots(shredded, ctx)
835                        } else {
836                            shredded
837                                .clone()
838                                .execute::<RecursiveCanonical>(ctx)
839                                .map(|canonical| canonical.0.into_array())
840                        }
841                    })
842                    .transpose()?;
843                Ok(RecursiveCanonical(Canonical::Variant(
844                    VariantArray::try_new(core_storage, shredded)?,
845                )))
846            }
847        }
848    }
849}
850
851/// Execute a primitive typed array into a buffer of native values, assuming all values are valid.
852///
853/// # Errors
854///
855/// Returns a `VortexError` if the array is not all-valid (has any nulls).
856impl<T: NativePType> Executable for Buffer<T> {
857    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
858        let array = PrimitiveArray::execute(array, ctx)?;
859        vortex_ensure!(
860            matches!(
861                array.validity()?,
862                Validity::NonNullable | Validity::AllValid
863            ),
864            "Cannot execute to native buffer: array is not all-valid."
865        );
866        Ok(array.into_buffer())
867    }
868}
869
870/// Execute the array to canonical form and unwrap as a [`PrimitiveArray`].
871///
872/// This will panic if the array's dtype is not primitive.
873impl Executable for PrimitiveArray {
874    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
875        match array.try_downcast::<Primitive>() {
876            Ok(primitive) => Ok(primitive),
877            Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
878        }
879    }
880}
881
882/// Execute the array to canonical form and unwrap as a [`BoolArray`].
883///
884/// This will panic if the array's dtype is not bool.
885impl Executable for BoolArray {
886    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
887        match array.try_downcast::<Bool>() {
888            Ok(bool_array) => Ok(bool_array),
889            Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
890        }
891    }
892}
893
894/// Execute the array to a [`BitBuffer`], aka a non-nullable  [`BoolArray`].
895///
896/// This will panic if the array's dtype is not non-nullable bool.
897impl Executable for BitBuffer {
898    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
899        let bool = BoolArray::execute(array, ctx)?;
900        assert!(
901            !bool.dtype().is_nullable(),
902            "bit buffer execute only works with non-nullable bool arrays"
903        );
904        Ok(bool.into_bit_buffer())
905    }
906}
907
908/// Execute the array to canonical form and unwrap as a [`NullArray`].
909///
910/// This will panic if the array's dtype is not null.
911impl Executable for NullArray {
912    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
913        match array.try_downcast::<Null>() {
914            Ok(null_array) => Ok(null_array),
915            Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
916        }
917    }
918}
919
920/// Execute the array to canonical form and unwrap as a [`VarBinViewArray`].
921///
922/// This will panic if the array's dtype is not utf8 or binary.
923impl Executable for VarBinViewArray {
924    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
925        match array.try_downcast::<VarBinView>() {
926            Ok(varbinview) => Ok(varbinview),
927            Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
928        }
929    }
930}
931
932/// Execute the array to canonical form and unwrap as an [`ExtensionArray`].
933///
934/// This will panic if the array's dtype is not an extension type.
935impl Executable for ExtensionArray {
936    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
937        match array.try_downcast::<Extension>() {
938            Ok(ext_array) => Ok(ext_array),
939            Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
940        }
941    }
942}
943
944/// Execute the array to canonical form and unwrap as a [`DecimalArray`].
945///
946/// This will panic if the array's dtype is not decimal.
947impl Executable for DecimalArray {
948    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
949        match array.try_downcast::<Decimal>() {
950            Ok(decimal) => Ok(decimal),
951            Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
952        }
953    }
954}
955
956/// Execute the array to canonical form and unwrap as a [`ListViewArray`].
957///
958/// This will panic if the array's dtype is not list.
959impl Executable for ListViewArray {
960    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
961        match array.try_downcast::<ListView>() {
962            Ok(list) => Ok(list),
963            Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
964        }
965    }
966}
967
968/// Execute the array to canonical form and unwrap as a [`FixedSizeListArray`].
969///
970/// This will panic if the array's dtype is not fixed size list.
971impl Executable for FixedSizeListArray {
972    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
973        match array.try_downcast::<FixedSizeList>() {
974            Ok(fsl) => Ok(fsl),
975            Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
976        }
977    }
978}
979
980/// Execute the array to canonical form and unwrap as a [`StructArray`].
981///
982/// This will panic if the array's dtype is not struct.
983impl Executable for StructArray {
984    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
985        match array.try_downcast::<Struct>() {
986            Ok(struct_array) => Ok(struct_array),
987            Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
988        }
989    }
990}
991
992/// Execute the array to canonical form and unwrap as a [`VariantArray`].
993///
994/// This will panic if the array's dtype is not variant.
995impl Executable for VariantArray {
996    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
997        match array.try_downcast::<Variant>() {
998            Ok(variant_array) => Ok(variant_array),
999            Err(array) => match Canonical::execute(array, ctx)? {
1000                Canonical::Variant(variant_array) => Ok(variant_array),
1001                canonical => vortex_panic!("Cannot unwrap VariantArray from {:?}", canonical),
1002            },
1003        }
1004    }
1005}
1006
1007/// A view into a canonical array type.
1008///
1009/// Uses `ArrayView<V>` because these are obtained by
1010/// downcasting through the `Matcher` trait which returns `ArrayView<V>`.
1011#[derive(Debug, Clone, Copy)]
1012pub enum CanonicalView<'a> {
1013    Null(ArrayView<'a, Null>),
1014    Bool(ArrayView<'a, Bool>),
1015    Primitive(ArrayView<'a, Primitive>),
1016    Decimal(ArrayView<'a, Decimal>),
1017    VarBinView(ArrayView<'a, VarBinView>),
1018    List(ArrayView<'a, ListView>),
1019    FixedSizeList(ArrayView<'a, FixedSizeList>),
1020    Struct(ArrayView<'a, Struct>),
1021    Extension(ArrayView<'a, Extension>),
1022    Variant(ArrayView<'a, Variant>),
1023}
1024
1025impl From<CanonicalView<'_>> for Canonical {
1026    fn from(value: CanonicalView<'_>) -> Self {
1027        match value {
1028            CanonicalView::Null(a) => Canonical::Null(a.into_owned()),
1029            CanonicalView::Bool(a) => Canonical::Bool(a.into_owned()),
1030            CanonicalView::Primitive(a) => Canonical::Primitive(a.into_owned()),
1031            CanonicalView::Decimal(a) => Canonical::Decimal(a.into_owned()),
1032            CanonicalView::VarBinView(a) => Canonical::VarBinView(a.into_owned()),
1033            CanonicalView::List(a) => Canonical::List(a.into_owned()),
1034            CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.into_owned()),
1035            CanonicalView::Struct(a) => Canonical::Struct(a.into_owned()),
1036            CanonicalView::Extension(a) => Canonical::Extension(a.into_owned()),
1037            CanonicalView::Variant(a) => Canonical::Variant(a.into_owned()),
1038        }
1039    }
1040}
1041
1042impl CanonicalView<'_> {
1043    /// Convert to a type-erased [`ArrayRef`].
1044    pub fn to_array_ref(&self) -> ArrayRef {
1045        match self {
1046            CanonicalView::Null(a) => a.array().clone(),
1047            CanonicalView::Bool(a) => a.array().clone(),
1048            CanonicalView::Primitive(a) => a.array().clone(),
1049            CanonicalView::Decimal(a) => a.array().clone(),
1050            CanonicalView::VarBinView(a) => a.array().clone(),
1051            CanonicalView::List(a) => a.array().clone(),
1052            CanonicalView::FixedSizeList(a) => a.array().clone(),
1053            CanonicalView::Struct(a) => a.array().clone(),
1054            CanonicalView::Extension(a) => a.array().clone(),
1055            CanonicalView::Variant(a) => a.array().clone(),
1056        }
1057    }
1058}
1059
1060/// A matcher for any canonical array type.
1061pub struct AnyCanonical;
1062impl Matcher for AnyCanonical {
1063    type Match<'a> = CanonicalView<'a>;
1064
1065    fn matches(array: &ArrayRef) -> bool {
1066        array.is::<Null>()
1067            || array.is::<Bool>()
1068            || array.is::<Primitive>()
1069            || array.is::<Decimal>()
1070            || array.is::<Struct>()
1071            || array.is::<ListView>()
1072            || array.is::<FixedSizeList>()
1073            || array.is::<VarBinView>()
1074            || array.is::<Variant>()
1075            || array.is::<Extension>()
1076    }
1077
1078    fn try_match(array: &ArrayRef) -> Option<Self::Match<'_>> {
1079        if let Some(a) = array.as_opt::<Null>() {
1080            Some(CanonicalView::Null(a))
1081        } else if let Some(a) = array.as_opt::<Bool>() {
1082            Some(CanonicalView::Bool(a))
1083        } else if let Some(a) = array.as_opt::<Primitive>() {
1084            Some(CanonicalView::Primitive(a))
1085        } else if let Some(a) = array.as_opt::<Decimal>() {
1086            Some(CanonicalView::Decimal(a))
1087        } else if let Some(a) = array.as_opt::<Struct>() {
1088            Some(CanonicalView::Struct(a))
1089        } else if let Some(a) = array.as_opt::<ListView>() {
1090            Some(CanonicalView::List(a))
1091        } else if let Some(a) = array.as_opt::<FixedSizeList>() {
1092            Some(CanonicalView::FixedSizeList(a))
1093        } else if let Some(a) = array.as_opt::<VarBinView>() {
1094            Some(CanonicalView::VarBinView(a))
1095        } else if let Some(a) = array.as_opt::<Variant>() {
1096            Some(CanonicalView::Variant(a))
1097        } else {
1098            array.as_opt::<Extension>().map(CanonicalView::Extension)
1099        }
1100    }
1101}
1102
1103#[cfg(test)]
1104mod test {
1105    use std::sync::Arc;
1106
1107    use arrow_array::Array as ArrowArray;
1108    use arrow_array::ArrayRef as ArrowArrayRef;
1109    use arrow_array::ListArray as ArrowListArray;
1110    use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
1111    use arrow_array::StringArray;
1112    use arrow_array::StringViewArray;
1113    use arrow_array::StructArray as ArrowStructArray;
1114    use arrow_array::cast::AsArray;
1115    use arrow_array::types::Int32Type;
1116    use arrow_array::types::Int64Type;
1117    use arrow_array::types::UInt64Type;
1118    use arrow_buffer::NullBufferBuilder;
1119    use arrow_buffer::OffsetBuffer;
1120    use arrow_schema::DataType;
1121    use arrow_schema::Field;
1122    use vortex_buffer::buffer;
1123    use vortex_error::VortexResult;
1124    use vortex_error::vortex_err;
1125
1126    use crate::ArrayRef;
1127    use crate::Canonical;
1128    use crate::CanonicalValidity;
1129    use crate::IntoArray;
1130    use crate::LEGACY_SESSION;
1131    use crate::VortexSessionExecute;
1132    use crate::arrays::Constant;
1133    use crate::arrays::ConstantArray;
1134    use crate::arrays::Primitive;
1135    use crate::arrays::Struct;
1136    use crate::arrays::Variant;
1137    use crate::arrays::VariantArray;
1138    use crate::arrays::struct_::StructArrayExt;
1139    use crate::arrays::variant::VariantArrayExt;
1140    use crate::arrow::ArrowSessionExt;
1141    use crate::arrow::FromArrowArray;
1142    use crate::canonical::StructArray;
1143    use crate::dtype::Nullability;
1144    use crate::scalar::Scalar;
1145
1146    fn variant_core_storage(len: usize) -> ArrayRef {
1147        ConstantArray::new(
1148            Scalar::variant(Scalar::primitive(1i32, Nullability::NonNullable)),
1149            len,
1150        )
1151        .into_array()
1152    }
1153
1154    #[test]
1155    fn canonical_validity_canonicalizes_variant_shredded_physical_slots() -> VortexResult<()> {
1156        let len = 2;
1157        let nested_shredded =
1158            StructArray::try_from_iter([("value", ConstantArray::new(10i32, len).into_array())])?;
1159        let inner_variant = VariantArray::try_new(
1160            variant_core_storage(len),
1161            Some(nested_shredded.into_array()),
1162        )?;
1163        let outer_variant =
1164            VariantArray::try_new(variant_core_storage(len), Some(inner_variant.into_array()))?;
1165
1166        let mut ctx = LEGACY_SESSION.create_execution_ctx();
1167        let Canonical::Variant(canonical) = outer_variant
1168            .into_array()
1169            .execute::<CanonicalValidity>(&mut ctx)?
1170            .0
1171        else {
1172            return Err(vortex_err!("expected canonical variant"));
1173        };
1174
1175        let nested_variant = canonical
1176            .shredded()
1177            .and_then(|shredded| shredded.as_opt::<Variant>())
1178            .ok_or_else(|| vortex_err!("expected nested variant shredded child"))?;
1179        let nested_struct = nested_variant
1180            .shredded()
1181            .and_then(|shredded| shredded.as_opt::<Struct>())
1182            .ok_or_else(|| vortex_err!("expected nested struct shredded child"))?;
1183        let value = nested_struct.unmasked_field_by_name("value")?;
1184
1185        assert!(value.is::<Primitive>());
1186        assert!(!value.is::<Constant>());
1187
1188        Ok(())
1189    }
1190
1191    #[test]
1192    fn test_canonicalize_nested_struct() {
1193        let mut ctx = LEGACY_SESSION.create_execution_ctx();
1194        // Create a struct array with multiple internal components.
1195        let nested_struct_array = StructArray::from_fields(&[
1196            ("a", buffer![1u64].into_array()),
1197            (
1198                "b",
1199                StructArray::from_fields(&[(
1200                    "inner_a",
1201                    // The nested struct contains a ConstantArray representing the primitive array
1202                    //   [100i64]
1203                    // ConstantArray is not a canonical type, so converting `into_arrow()` should
1204                    // map this to the nearest canonical type (PrimitiveArray).
1205                    ConstantArray::new(100i64, 1).into_array(),
1206                )])
1207                .unwrap()
1208                .into_array(),
1209            ),
1210        ])
1211        .unwrap();
1212
1213        let arrow_struct = LEGACY_SESSION
1214            .arrow()
1215            .execute_arrow(nested_struct_array.into_array(), None, &mut ctx)
1216            .unwrap()
1217            .as_any()
1218            .downcast_ref::<ArrowStructArray>()
1219            .cloned()
1220            .unwrap();
1221
1222        assert!(
1223            arrow_struct
1224                .column(0)
1225                .as_any()
1226                .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
1227                .is_some()
1228        );
1229
1230        let inner_struct = Arc::clone(arrow_struct.column(1))
1231            .as_any()
1232            .downcast_ref::<ArrowStructArray>()
1233            .cloned()
1234            .unwrap();
1235
1236        let inner_a = inner_struct
1237            .column(0)
1238            .as_any()
1239            .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1240        assert!(inner_a.is_some());
1241
1242        assert_eq!(
1243            inner_a.cloned().unwrap(),
1244            ArrowPrimitiveArray::from_iter([100i64])
1245        );
1246    }
1247
1248    #[test]
1249    fn roundtrip_struct() {
1250        let mut ctx = LEGACY_SESSION.create_execution_ctx();
1251        let mut nulls = NullBufferBuilder::new(6);
1252        nulls.append_n_non_nulls(4);
1253        nulls.append_null();
1254        nulls.append_non_null();
1255        let names = Arc::new(StringViewArray::from_iter(vec![
1256            Some("Joseph"),
1257            None,
1258            Some("Angela"),
1259            Some("Mikhail"),
1260            None,
1261            None,
1262        ]));
1263        let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1264            Some(25),
1265            Some(31),
1266            None,
1267            Some(57),
1268            None,
1269            None,
1270        ]));
1271
1272        let arrow_struct = ArrowStructArray::new(
1273            vec![
1274                Arc::new(Field::new("name", DataType::Utf8View, true)),
1275                Arc::new(Field::new("age", DataType::Int32, true)),
1276            ]
1277            .into(),
1278            vec![names, ages],
1279            nulls.finish(),
1280        );
1281
1282        let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1283        let vortex_struct = LEGACY_SESSION
1284            .arrow()
1285            .execute_arrow(vortex_struct, None, &mut ctx)
1286            .unwrap();
1287        assert_eq!(&arrow_struct, vortex_struct.as_struct());
1288    }
1289
1290    #[test]
1291    fn roundtrip_list() {
1292        let mut ctx = LEGACY_SESSION.create_execution_ctx();
1293        let names = Arc::new(StringArray::from_iter(vec![
1294            Some("Joseph"),
1295            Some("Angela"),
1296            Some("Mikhail"),
1297        ]));
1298
1299        let arrow_list = ArrowListArray::new(
1300            Arc::new(Field::new_list_field(DataType::Utf8, true)),
1301            OffsetBuffer::from_lengths(vec![0, 2, 1]),
1302            names,
1303            None,
1304        );
1305        let list_data_type = arrow_list.data_type();
1306        let list_field = Field::new(String::new(), list_data_type.clone(), true);
1307
1308        let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1309
1310        let rt_arrow_list = LEGACY_SESSION
1311            .arrow()
1312            .execute_arrow(vortex_list, Some(&list_field), &mut ctx)
1313            .unwrap();
1314
1315        assert_eq!(
1316            (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1317            rt_arrow_list.as_ref()
1318        );
1319    }
1320}