Skip to main content

vortex_array/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Encodings that enable zero-copy sharing of data with Arrow.
5
6use std::sync::Arc;
7
8use vortex_buffer::BitBuffer;
9use vortex_buffer::Buffer;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_panic;
14
15use crate::Array;
16use crate::ArrayRef;
17use crate::Columnar;
18use crate::Executable;
19use crate::ExecutionCtx;
20use crate::IntoArray;
21use crate::arrays::BoolArray;
22use crate::arrays::BoolArrayParts;
23use crate::arrays::BoolVTable;
24use crate::arrays::DecimalArray;
25use crate::arrays::DecimalArrayParts;
26use crate::arrays::DecimalVTable;
27use crate::arrays::ExtensionArray;
28use crate::arrays::ExtensionVTable;
29use crate::arrays::FixedSizeListArray;
30use crate::arrays::FixedSizeListVTable;
31use crate::arrays::ListViewArray;
32use crate::arrays::ListViewArrayParts;
33use crate::arrays::ListViewRebuildMode;
34use crate::arrays::ListViewVTable;
35use crate::arrays::NullArray;
36use crate::arrays::NullVTable;
37use crate::arrays::PrimitiveArray;
38use crate::arrays::PrimitiveArrayParts;
39use crate::arrays::PrimitiveVTable;
40use crate::arrays::StructArray;
41use crate::arrays::StructArrayParts;
42use crate::arrays::StructVTable;
43use crate::arrays::VarBinViewArray;
44use crate::arrays::VarBinViewArrayParts;
45use crate::arrays::VarBinViewVTable;
46use crate::arrays::constant_canonicalize;
47use crate::builders::builder_with_capacity;
48use crate::dtype::DType;
49use crate::dtype::NativePType;
50use crate::matcher::Matcher;
51
52/// An enum capturing the default uncompressed encodings for each [Vortex type](DType).
53///
54/// Any array can be decoded into canonical form via the [`to_canonical`](Array::to_canonical)
55/// trait method. This is the simplest encoding for a type, and will not be compressed but may
56/// contain compressed child arrays.
57///
58/// Canonical form is useful for doing type-specific compute where you need to know that all
59/// elements are laid out decompressed and contiguous in memory.
60///
61/// Each `Canonical` variant has a corresponding [`DType`] variant, with the notable exception of
62/// [`Canonical::VarBinView`], which is the canonical encoding for both [`DType::Utf8`] and
63/// [`DType::Binary`].
64///
65/// # Laziness
66///
67/// Canonical form is not recursive, so while a `StructArray` is the canonical format for any
68/// `Struct` type, individual column child arrays may still be compressed. This allows
69/// compute over Vortex arrays to push decoding as late as possible, and ideally many child arrays
70/// never need to be decoded into canonical form at all depending on the compute.
71///
72/// # Arrow interoperability
73///
74/// All of the Vortex canonical encodings have an equivalent Arrow encoding that can be built
75/// zero-copy, and the corresponding Arrow array types can also be built directly.
76///
77/// The full list of canonical types and their equivalent Arrow array types are:
78///
79/// * `NullArray`: [`arrow_array::NullArray`]
80/// * `BoolArray`: [`arrow_array::BooleanArray`]
81/// * `PrimitiveArray`: [`arrow_array::PrimitiveArray`]
82/// * `DecimalArray`: [`arrow_array::Decimal128Array`] and [`arrow_array::Decimal256Array`]
83/// * `VarBinViewArray`: [`arrow_array::GenericByteViewArray`]
84/// * `ListViewArray`: [`arrow_array::ListViewArray`]
85/// * `FixedSizeListArray`: [`arrow_array::FixedSizeListArray`]
86/// * `StructArray`: [`arrow_array::StructArray`]
87///
88/// Vortex uses a logical type system, unlike Arrow which uses physical encodings for its types.
89/// As an example, there are at least six valid physical encodings for a `Utf8` array. This can
90/// create ambiguity.
91/// Thus, if you receive an Arrow array, compress it using Vortex, and then
92/// decompress it later to pass to a compute kernel, there are multiple suitable Arrow array
93/// variants to hold the data.
94///
95/// To disambiguate, we choose a canonical physical encoding for every Vortex [`DType`], which
96/// will correspond to an arrow-rs [`arrow_schema::DataType`].
97///
98/// # Views support
99///
100/// Binary and String views, also known as "German strings" are a better encoding format for
101/// nearly all use-cases. Variable-length binary views are part of the Apache Arrow spec, and are
102/// fully supported by the Datafusion query engine. We use them as our canonical string encoding
103/// for all `Utf8` and `Binary` typed arrays in Vortex. They provide considerably faster filter
104/// execution than the core `StringArray` and `BinaryArray` types, at the expense of potentially
105/// needing [garbage collection][arrow_array::GenericByteViewArray::gc] to clear unreferenced items
106/// from memory.
107///
108/// # For Developers
109///
110/// If you add another variant to this enum, make sure to update `dyn Array::is_canonical`,
111/// and the fuzzer in `fuzz/fuzz_targets/array_ops.rs`.
112#[derive(Debug, Clone)]
113pub enum Canonical {
114    Null(NullArray),
115    Bool(BoolArray),
116    Primitive(PrimitiveArray),
117    Decimal(DecimalArray),
118    VarBinView(VarBinViewArray),
119    List(ListViewArray),
120    FixedSizeList(FixedSizeListArray),
121    Struct(StructArray),
122    Extension(ExtensionArray),
123}
124
125/// Match on every canonical variant and evaluate a code block on all variants
126macro_rules! match_each_canonical {
127    ($self:expr, | $ident:ident | $eval:expr) => {{
128        match $self {
129            Canonical::Null($ident) => $eval,
130            Canonical::Bool($ident) => $eval,
131            Canonical::Primitive($ident) => $eval,
132            Canonical::Decimal($ident) => $eval,
133            Canonical::VarBinView($ident) => $eval,
134            Canonical::List($ident) => $eval,
135            Canonical::FixedSizeList($ident) => $eval,
136            Canonical::Struct($ident) => $eval,
137            Canonical::Extension($ident) => $eval,
138        }
139    }};
140}
141
142impl Canonical {
143    // TODO(connor): This can probably be specialized for each of the canonical arrays.
144    /// Create an empty canonical array of the given dtype.
145    pub fn empty(dtype: &DType) -> Canonical {
146        builder_with_capacity(dtype, 0).finish_into_canonical()
147    }
148
149    pub fn len(&self) -> usize {
150        match_each_canonical!(self, |arr| arr.len())
151    }
152
153    pub fn dtype(&self) -> &DType {
154        match_each_canonical!(self, |arr| arr.dtype())
155    }
156
157    pub fn is_empty(&self) -> bool {
158        match_each_canonical!(self, |arr| arr.is_empty())
159    }
160}
161
162impl Canonical {
163    /// Performs a (potentially expensive) compaction operation on the array before it is complete.
164    ///
165    /// This is mostly relevant for the variable-length types such as Utf8, Binary or List where
166    /// they can accumulate wasted space after slicing and taking operations.
167    ///
168    /// This operation is very expensive and can result in things like allocations, full-scans
169    /// and copy operations.
170    pub fn compact(&self) -> VortexResult<Canonical> {
171        match self {
172            Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
173            Canonical::List(array) => Ok(Canonical::List(
174                array.rebuild(ListViewRebuildMode::TrimElements)?,
175            )),
176            _ => Ok(self.clone()),
177        }
178    }
179}
180
181// Unwrap canonical type back down to specialized type.
182impl Canonical {
183    pub fn as_null(&self) -> &NullArray {
184        if let Canonical::Null(a) = self {
185            a
186        } else {
187            vortex_panic!("Cannot get NullArray from {:?}", &self)
188        }
189    }
190
191    pub fn into_null(self) -> NullArray {
192        if let Canonical::Null(a) = self {
193            a
194        } else {
195            vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
196        }
197    }
198
199    pub fn as_bool(&self) -> &BoolArray {
200        if let Canonical::Bool(a) = self {
201            a
202        } else {
203            vortex_panic!("Cannot get BoolArray from {:?}", &self)
204        }
205    }
206
207    pub fn into_bool(self) -> BoolArray {
208        if let Canonical::Bool(a) = self {
209            a
210        } else {
211            vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
212        }
213    }
214
215    pub fn as_primitive(&self) -> &PrimitiveArray {
216        if let Canonical::Primitive(a) = self {
217            a
218        } else {
219            vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
220        }
221    }
222
223    pub fn into_primitive(self) -> PrimitiveArray {
224        if let Canonical::Primitive(a) = self {
225            a
226        } else {
227            vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
228        }
229    }
230
231    pub fn as_decimal(&self) -> &DecimalArray {
232        if let Canonical::Decimal(a) = self {
233            a
234        } else {
235            vortex_panic!("Cannot get DecimalArray from {:?}", &self)
236        }
237    }
238
239    pub fn into_decimal(self) -> DecimalArray {
240        if let Canonical::Decimal(a) = self {
241            a
242        } else {
243            vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
244        }
245    }
246
247    pub fn as_varbinview(&self) -> &VarBinViewArray {
248        if let Canonical::VarBinView(a) = self {
249            a
250        } else {
251            vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
252        }
253    }
254
255    pub fn into_varbinview(self) -> VarBinViewArray {
256        if let Canonical::VarBinView(a) = self {
257            a
258        } else {
259            vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
260        }
261    }
262
263    pub fn as_listview(&self) -> &ListViewArray {
264        if let Canonical::List(a) = self {
265            a
266        } else {
267            vortex_panic!("Cannot get ListArray from {:?}", &self)
268        }
269    }
270
271    pub fn into_listview(self) -> ListViewArray {
272        if let Canonical::List(a) = self {
273            a
274        } else {
275            vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
276        }
277    }
278
279    pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
280        if let Canonical::FixedSizeList(a) = self {
281            a
282        } else {
283            vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
284        }
285    }
286
287    pub fn into_fixed_size_list(self) -> FixedSizeListArray {
288        if let Canonical::FixedSizeList(a) = self {
289            a
290        } else {
291            vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
292        }
293    }
294
295    pub fn as_struct(&self) -> &StructArray {
296        if let Canonical::Struct(a) = self {
297            a
298        } else {
299            vortex_panic!("Cannot get StructArray from {:?}", &self)
300        }
301    }
302
303    pub fn into_struct(self) -> StructArray {
304        if let Canonical::Struct(a) = self {
305            a
306        } else {
307            vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
308        }
309    }
310
311    pub fn as_extension(&self) -> &ExtensionArray {
312        if let Canonical::Extension(a) = self {
313            a
314        } else {
315            vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
316        }
317    }
318
319    pub fn into_extension(self) -> ExtensionArray {
320        if let Canonical::Extension(a) = self {
321            a
322        } else {
323            vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
324        }
325    }
326}
327
328impl AsRef<dyn Array> for Canonical {
329    fn as_ref(&self) -> &(dyn Array + 'static) {
330        match_each_canonical!(self, |arr| arr.as_ref())
331    }
332}
333
334impl IntoArray for Canonical {
335    fn into_array(self) -> ArrayRef {
336        match_each_canonical!(self, |arr| arr.into_array())
337    }
338}
339
340/// Trait for types that can be converted from an owned type into an owned array variant.
341///
342/// # Canonicalization
343///
344/// This trait has a blanket implementation for all types implementing [ToCanonical].
345pub trait ToCanonical {
346    /// Canonicalize into a [`NullArray`] if the target is [`Null`](DType::Null) typed.
347    fn to_null(&self) -> NullArray;
348
349    /// Canonicalize into a [`BoolArray`] if the target is [`Bool`](DType::Bool) typed.
350    fn to_bool(&self) -> BoolArray;
351
352    /// Canonicalize into a [`PrimitiveArray`] if the target is [`Primitive`](DType::Primitive)
353    /// typed.
354    fn to_primitive(&self) -> PrimitiveArray;
355
356    /// Canonicalize into a [`DecimalArray`] if the target is [`Decimal`](DType::Decimal)
357    /// typed.
358    fn to_decimal(&self) -> DecimalArray;
359
360    /// Canonicalize into a [`StructArray`] if the target is [`Struct`](DType::Struct) typed.
361    fn to_struct(&self) -> StructArray;
362
363    /// Canonicalize into a [`ListViewArray`] if the target is [`List`](DType::List) typed.
364    fn to_listview(&self) -> ListViewArray;
365
366    /// Canonicalize into a [`FixedSizeListArray`] if the target is [`List`](DType::FixedSizeList)
367    /// typed.
368    fn to_fixed_size_list(&self) -> FixedSizeListArray;
369
370    /// Canonicalize into a [`VarBinViewArray`] if the target is [`Utf8`](DType::Utf8)
371    /// or [`Binary`](DType::Binary) typed.
372    fn to_varbinview(&self) -> VarBinViewArray;
373
374    /// Canonicalize into an [`ExtensionArray`] if the array is [`Extension`](DType::Extension)
375    /// typed.
376    fn to_extension(&self) -> ExtensionArray;
377}
378
379// Blanket impl for all Array encodings.
380impl<A: Array + ?Sized> ToCanonical for A {
381    fn to_null(&self) -> NullArray {
382        self.to_canonical()
383            .vortex_expect("to_canonical failed")
384            .into_null()
385    }
386
387    fn to_bool(&self) -> BoolArray {
388        self.to_canonical()
389            .vortex_expect("to_canonical failed")
390            .into_bool()
391    }
392
393    fn to_primitive(&self) -> PrimitiveArray {
394        self.to_canonical()
395            .vortex_expect("to_canonical failed")
396            .into_primitive()
397    }
398
399    fn to_decimal(&self) -> DecimalArray {
400        self.to_canonical()
401            .vortex_expect("to_canonical failed")
402            .into_decimal()
403    }
404
405    fn to_struct(&self) -> StructArray {
406        self.to_canonical()
407            .vortex_expect("to_canonical failed")
408            .into_struct()
409    }
410
411    fn to_listview(&self) -> ListViewArray {
412        self.to_canonical()
413            .vortex_expect("to_canonical failed")
414            .into_listview()
415    }
416
417    fn to_fixed_size_list(&self) -> FixedSizeListArray {
418        self.to_canonical()
419            .vortex_expect("to_canonical failed")
420            .into_fixed_size_list()
421    }
422
423    fn to_varbinview(&self) -> VarBinViewArray {
424        self.to_canonical()
425            .vortex_expect("to_canonical failed")
426            .into_varbinview()
427    }
428
429    fn to_extension(&self) -> ExtensionArray {
430        self.to_canonical()
431            .vortex_expect("to_canonical failed")
432            .into_extension()
433    }
434}
435
436impl From<Canonical> for ArrayRef {
437    fn from(value: Canonical) -> Self {
438        match_each_canonical!(value, |arr| arr.into_array())
439    }
440}
441
442/// Recursively execute the array until it reaches canonical form.
443///
444/// Callers should prefer to execute into `Columnar` if they are able to optimize their use for
445/// constant arrays.
446impl Executable for Canonical {
447    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
448        if let Some(canonical) = array.as_opt::<AnyCanonical>() {
449            return Ok(canonical.into());
450        }
451
452        // Invoke execute directly to avoid logging the call in the execution context.
453        Ok(match Columnar::execute(array.clone(), ctx)? {
454            Columnar::Canonical(c) => c,
455            Columnar::Constant(s) => {
456                let canonical = constant_canonicalize(&s)?;
457                canonical
458                    .as_ref()
459                    .statistics()
460                    .inherit_from(array.statistics());
461                canonical
462            }
463        })
464    }
465}
466
467/// Recursively execute the array until it reaches canonical form along with its validity.
468///
469/// Callers should prefer to execute into `Columnar` instead of this specific target.
470/// This target is useful when preparing arrays for writing.
471pub struct CanonicalValidity(pub Canonical);
472
473impl Executable for CanonicalValidity {
474    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
475        match array.execute::<Canonical>(ctx)? {
476            n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
477            Canonical::Bool(b) => {
478                let BoolArrayParts {
479                    bits,
480                    offset,
481                    len,
482                    validity,
483                } = b.into_parts();
484                Ok(CanonicalValidity(Canonical::Bool(
485                    BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
486                )))
487            }
488            Canonical::Primitive(p) => {
489                let PrimitiveArrayParts {
490                    ptype,
491                    buffer,
492                    validity,
493                } = p.into_parts();
494                Ok(CanonicalValidity(Canonical::Primitive(unsafe {
495                    PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
496                })))
497            }
498            Canonical::Decimal(d) => {
499                let DecimalArrayParts {
500                    decimal_dtype,
501                    values,
502                    values_type,
503                    validity,
504                } = d.into_parts();
505                Ok(CanonicalValidity(Canonical::Decimal(unsafe {
506                    DecimalArray::new_unchecked_handle(
507                        values,
508                        values_type,
509                        decimal_dtype,
510                        validity.execute(ctx)?,
511                    )
512                })))
513            }
514            Canonical::VarBinView(vbv) => {
515                let VarBinViewArrayParts {
516                    dtype,
517                    buffers,
518                    views,
519                    validity,
520                } = vbv.into_parts();
521                Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
522                    VarBinViewArray::new_handle_unchecked(
523                        views,
524                        buffers,
525                        dtype,
526                        validity.execute(ctx)?,
527                    )
528                })))
529            }
530            Canonical::List(l) => {
531                let ListViewArrayParts {
532                    elements,
533                    offsets,
534                    sizes,
535                    validity,
536                    ..
537                } = l.into_parts();
538                Ok(CanonicalValidity(Canonical::List(unsafe {
539                    ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
540                })))
541            }
542            Canonical::FixedSizeList(fsl) => {
543                let list_size = fsl.list_size();
544                let len = fsl.len();
545                let (elements, validity, _) = fsl.into_parts();
546                Ok(CanonicalValidity(Canonical::FixedSizeList(
547                    FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
548                )))
549            }
550            Canonical::Struct(st) => {
551                let len = st.len();
552                let StructArrayParts {
553                    struct_fields,
554                    fields,
555                    validity,
556                } = st.into_parts();
557                Ok(CanonicalValidity(Canonical::Struct(unsafe {
558                    StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
559                })))
560            }
561            Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
562                ExtensionArray::new(
563                    ext.ext_dtype().clone(),
564                    ext.storage()
565                        .clone()
566                        .execute::<CanonicalValidity>(ctx)?
567                        .0
568                        .into_array(),
569                ),
570            ))),
571        }
572    }
573}
574
575/// Recursively execute the array until all of its children are canonical.
576///
577/// This method is useful to guarantee that all operators are fully executed,
578/// callers should prefer an execution target that's suitable for their use case instead of this one.
579pub struct RecursiveCanonical(pub Canonical);
580
581impl Executable for RecursiveCanonical {
582    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
583        match array.execute::<Canonical>(ctx)? {
584            n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
585            Canonical::Bool(b) => {
586                let BoolArrayParts {
587                    bits,
588                    offset,
589                    len,
590                    validity,
591                } = b.into_parts();
592                Ok(RecursiveCanonical(Canonical::Bool(
593                    BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
594                )))
595            }
596            Canonical::Primitive(p) => {
597                let PrimitiveArrayParts {
598                    ptype,
599                    buffer,
600                    validity,
601                } = p.into_parts();
602                Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
603                    PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
604                })))
605            }
606            Canonical::Decimal(d) => {
607                let DecimalArrayParts {
608                    decimal_dtype,
609                    values,
610                    values_type,
611                    validity,
612                } = d.into_parts();
613                Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
614                    DecimalArray::new_unchecked_handle(
615                        values,
616                        values_type,
617                        decimal_dtype,
618                        validity.execute(ctx)?,
619                    )
620                })))
621            }
622            Canonical::VarBinView(vbv) => {
623                let VarBinViewArrayParts {
624                    dtype,
625                    buffers,
626                    views,
627                    validity,
628                } = vbv.into_parts();
629                Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
630                    VarBinViewArray::new_handle_unchecked(
631                        views,
632                        buffers,
633                        dtype,
634                        validity.execute(ctx)?,
635                    )
636                })))
637            }
638            Canonical::List(l) => {
639                let ListViewArrayParts {
640                    elements,
641                    offsets,
642                    sizes,
643                    validity,
644                    ..
645                } = l.into_parts();
646                Ok(RecursiveCanonical(Canonical::List(unsafe {
647                    ListViewArray::new_unchecked(
648                        elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
649                        offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
650                        sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
651                        validity.execute(ctx)?,
652                    )
653                })))
654            }
655            Canonical::FixedSizeList(fsl) => {
656                let list_size = fsl.list_size();
657                let len = fsl.len();
658                let (elements, validity, _) = fsl.into_parts();
659                Ok(RecursiveCanonical(Canonical::FixedSizeList(
660                    FixedSizeListArray::new(
661                        elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
662                        list_size,
663                        validity.execute(ctx)?,
664                        len,
665                    ),
666                )))
667            }
668            Canonical::Struct(st) => {
669                let len = st.len();
670                let StructArrayParts {
671                    struct_fields,
672                    fields,
673                    validity,
674                } = st.into_parts();
675                let executed_fields = fields
676                    .iter()
677                    .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
678                    .collect::<VortexResult<Arc<[_]>>>()?;
679
680                Ok(RecursiveCanonical(Canonical::Struct(unsafe {
681                    StructArray::new_unchecked(
682                        executed_fields,
683                        struct_fields,
684                        len,
685                        validity.execute(ctx)?,
686                    )
687                })))
688            }
689            Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
690                ExtensionArray::new(
691                    ext.ext_dtype().clone(),
692                    ext.storage()
693                        .clone()
694                        .execute::<RecursiveCanonical>(ctx)?
695                        .0
696                        .into_array(),
697                ),
698            ))),
699        }
700    }
701}
702
703/// Execute a primitive typed array into a buffer of native values, assuming all values are valid.
704///
705/// # Errors
706///
707/// Returns a `VortexError` if the array is not all-valid (has any nulls).
708impl<T: NativePType> Executable for Buffer<T> {
709    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
710        let array = PrimitiveArray::execute(array, ctx)?;
711        vortex_ensure!(
712            array.all_valid()?,
713            "Cannot execute to native buffer: array is not all-valid."
714        );
715        Ok(array.into_buffer())
716    }
717}
718
719/// Execute the array to canonical form and unwrap as a [`PrimitiveArray`].
720///
721/// This will panic if the array's dtype is not primitive.
722impl Executable for PrimitiveArray {
723    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
724        match array.try_into::<PrimitiveVTable>() {
725            Ok(primitive) => Ok(primitive),
726            Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
727        }
728    }
729}
730
731/// Execute the array to canonical form and unwrap as a [`BoolArray`].
732///
733/// This will panic if the array's dtype is not bool.
734impl Executable for BoolArray {
735    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
736        match array.try_into::<BoolVTable>() {
737            Ok(bool_array) => Ok(bool_array),
738            Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
739        }
740    }
741}
742
743/// Execute the array to a [`BitBuffer`], aka a non-nullable  [`BoolArray`].
744///
745/// This will panic if the array's dtype is not non-nullable bool.
746impl Executable for BitBuffer {
747    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
748        let bool = BoolArray::execute(array, ctx)?;
749        assert!(
750            !bool.dtype().is_nullable(),
751            "bit buffer execute only works with non-nullable bool arrays"
752        );
753        Ok(bool.into_bit_buffer())
754    }
755}
756
757/// Execute the array to canonical form and unwrap as a [`NullArray`].
758///
759/// This will panic if the array's dtype is not null.
760impl Executable for NullArray {
761    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
762        match array.try_into::<NullVTable>() {
763            Ok(null_array) => Ok(null_array),
764            Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
765        }
766    }
767}
768
769/// Execute the array to canonical form and unwrap as a [`VarBinViewArray`].
770///
771/// This will panic if the array's dtype is not utf8 or binary.
772impl Executable for VarBinViewArray {
773    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
774        match array.try_into::<VarBinViewVTable>() {
775            Ok(varbinview) => Ok(varbinview),
776            Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
777        }
778    }
779}
780
781/// Execute the array to canonical form and unwrap as an [`ExtensionArray`].
782///
783/// This will panic if the array's dtype is not an extension type.
784impl Executable for ExtensionArray {
785    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
786        match array.try_into::<ExtensionVTable>() {
787            Ok(ext_array) => Ok(ext_array),
788            Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
789        }
790    }
791}
792
793/// Execute the array to canonical form and unwrap as a [`DecimalArray`].
794///
795/// This will panic if the array's dtype is not decimal.
796impl Executable for DecimalArray {
797    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
798        match array.try_into::<DecimalVTable>() {
799            Ok(decimal) => Ok(decimal),
800            Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
801        }
802    }
803}
804
805/// Execute the array to canonical form and unwrap as a [`ListViewArray`].
806///
807/// This will panic if the array's dtype is not list.
808impl Executable for ListViewArray {
809    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
810        match array.try_into::<ListViewVTable>() {
811            Ok(list) => Ok(list),
812            Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
813        }
814    }
815}
816
817/// Execute the array to canonical form and unwrap as a [`FixedSizeListArray`].
818///
819/// This will panic if the array's dtype is not fixed size list.
820impl Executable for FixedSizeListArray {
821    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
822        match array.try_into::<FixedSizeListVTable>() {
823            Ok(fsl) => Ok(fsl),
824            Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
825        }
826    }
827}
828
829/// Execute the array to canonical form and unwrap as a [`StructArray`].
830///
831/// This will panic if the array's dtype is not struct.
832impl Executable for StructArray {
833    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
834        match array.try_into::<StructVTable>() {
835            Ok(struct_array) => Ok(struct_array),
836            Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
837        }
838    }
839}
840
841/// A view into a canonical array type.
842#[derive(Debug, Clone)]
843pub enum CanonicalView<'a> {
844    Null(&'a NullArray),
845    Bool(&'a BoolArray),
846    Primitive(&'a PrimitiveArray),
847    Decimal(&'a DecimalArray),
848    VarBinView(&'a VarBinViewArray),
849    List(&'a ListViewArray),
850    FixedSizeList(&'a FixedSizeListArray),
851    Struct(&'a StructArray),
852    Extension(&'a ExtensionArray),
853}
854
855impl From<CanonicalView<'_>> for Canonical {
856    fn from(value: CanonicalView<'_>) -> Self {
857        match value {
858            CanonicalView::Null(a) => Canonical::Null(a.clone()),
859            CanonicalView::Bool(a) => Canonical::Bool(a.clone()),
860            CanonicalView::Primitive(a) => Canonical::Primitive(a.clone()),
861            CanonicalView::Decimal(a) => Canonical::Decimal(a.clone()),
862            CanonicalView::VarBinView(a) => Canonical::VarBinView(a.clone()),
863            CanonicalView::List(a) => Canonical::List(a.clone()),
864            CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.clone()),
865            CanonicalView::Struct(a) => Canonical::Struct(a.clone()),
866            CanonicalView::Extension(a) => Canonical::Extension(a.clone()),
867        }
868    }
869}
870
871impl AsRef<dyn Array> for CanonicalView<'_> {
872    fn as_ref(&self) -> &dyn Array {
873        match self {
874            CanonicalView::Null(a) => a.as_ref(),
875            CanonicalView::Bool(a) => a.as_ref(),
876            CanonicalView::Primitive(a) => a.as_ref(),
877            CanonicalView::Decimal(a) => a.as_ref(),
878            CanonicalView::VarBinView(a) => a.as_ref(),
879            CanonicalView::List(a) => a.as_ref(),
880            CanonicalView::FixedSizeList(a) => a.as_ref(),
881            CanonicalView::Struct(a) => a.as_ref(),
882            CanonicalView::Extension(a) => a.as_ref(),
883        }
884    }
885}
886
887/// A matcher for any canonical array type.
888pub struct AnyCanonical;
889impl Matcher for AnyCanonical {
890    type Match<'a> = CanonicalView<'a>;
891
892    fn matches(array: &dyn Array) -> bool {
893        array.is::<NullVTable>()
894            || array.is::<BoolVTable>()
895            || array.is::<PrimitiveVTable>()
896            || array.is::<DecimalVTable>()
897            || array.is::<StructVTable>()
898            || array.is::<ListViewVTable>()
899            || array.is::<FixedSizeListVTable>()
900            || array.is::<VarBinViewVTable>()
901            || array.is::<ExtensionVTable>()
902    }
903
904    fn try_match<'a>(array: &'a dyn Array) -> Option<Self::Match<'a>> {
905        if let Some(a) = array.as_opt::<NullVTable>() {
906            Some(CanonicalView::Null(a))
907        } else if let Some(a) = array.as_opt::<BoolVTable>() {
908            Some(CanonicalView::Bool(a))
909        } else if let Some(a) = array.as_opt::<PrimitiveVTable>() {
910            Some(CanonicalView::Primitive(a))
911        } else if let Some(a) = array.as_opt::<DecimalVTable>() {
912            Some(CanonicalView::Decimal(a))
913        } else if let Some(a) = array.as_opt::<StructVTable>() {
914            Some(CanonicalView::Struct(a))
915        } else if let Some(a) = array.as_opt::<ListViewVTable>() {
916            Some(CanonicalView::List(a))
917        } else if let Some(a) = array.as_opt::<FixedSizeListVTable>() {
918            Some(CanonicalView::FixedSizeList(a))
919        } else if let Some(a) = array.as_opt::<VarBinViewVTable>() {
920            Some(CanonicalView::VarBinView(a))
921        } else {
922            array
923                .as_opt::<ExtensionVTable>()
924                .map(CanonicalView::Extension)
925        }
926    }
927}
928
929#[cfg(test)]
930mod test {
931    use std::sync::Arc;
932
933    use arrow_array::Array as ArrowArray;
934    use arrow_array::ArrayRef as ArrowArrayRef;
935    use arrow_array::ListArray as ArrowListArray;
936    use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
937    use arrow_array::StringArray;
938    use arrow_array::StringViewArray;
939    use arrow_array::StructArray as ArrowStructArray;
940    use arrow_array::cast::AsArray;
941    use arrow_array::types::Int32Type;
942    use arrow_array::types::Int64Type;
943    use arrow_array::types::UInt64Type;
944    use arrow_buffer::NullBufferBuilder;
945    use arrow_buffer::OffsetBuffer;
946    use arrow_schema::DataType;
947    use arrow_schema::Field;
948    use vortex_buffer::buffer;
949
950    use crate::ArrayRef;
951    use crate::IntoArray;
952    use crate::arrays::ConstantArray;
953    use crate::arrays::StructArray;
954    use crate::arrow::FromArrowArray;
955    use crate::arrow::IntoArrowArray;
956
957    #[test]
958    fn test_canonicalize_nested_struct() {
959        // Create a struct array with multiple internal components.
960        let nested_struct_array = StructArray::from_fields(&[
961            ("a", buffer![1u64].into_array()),
962            (
963                "b",
964                StructArray::from_fields(&[(
965                    "inner_a",
966                    // The nested struct contains a ConstantArray representing the primitive array
967                    //   [100i64]
968                    // ConstantArray is not a canonical type, so converting `into_arrow()` should
969                    // map this to the nearest canonical type (PrimitiveArray).
970                    ConstantArray::new(100i64, 1).into_array(),
971                )])
972                .unwrap()
973                .into_array(),
974            ),
975        ])
976        .unwrap();
977
978        let arrow_struct = nested_struct_array
979            .into_array()
980            .into_arrow_preferred()
981            .unwrap()
982            .as_any()
983            .downcast_ref::<ArrowStructArray>()
984            .cloned()
985            .unwrap();
986
987        assert!(
988            arrow_struct
989                .column(0)
990                .as_any()
991                .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
992                .is_some()
993        );
994
995        let inner_struct = arrow_struct
996            .column(1)
997            .clone()
998            .as_any()
999            .downcast_ref::<ArrowStructArray>()
1000            .cloned()
1001            .unwrap();
1002
1003        let inner_a = inner_struct
1004            .column(0)
1005            .as_any()
1006            .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
1007        assert!(inner_a.is_some());
1008
1009        assert_eq!(
1010            inner_a.cloned().unwrap(),
1011            ArrowPrimitiveArray::from_iter([100i64])
1012        );
1013    }
1014
1015    #[test]
1016    fn roundtrip_struct() {
1017        let mut nulls = NullBufferBuilder::new(6);
1018        nulls.append_n_non_nulls(4);
1019        nulls.append_null();
1020        nulls.append_non_null();
1021        let names = Arc::new(StringViewArray::from_iter(vec![
1022            Some("Joseph"),
1023            None,
1024            Some("Angela"),
1025            Some("Mikhail"),
1026            None,
1027            None,
1028        ]));
1029        let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1030            Some(25),
1031            Some(31),
1032            None,
1033            Some(57),
1034            None,
1035            None,
1036        ]));
1037
1038        let arrow_struct = ArrowStructArray::new(
1039            vec![
1040                Arc::new(Field::new("name", DataType::Utf8View, true)),
1041                Arc::new(Field::new("age", DataType::Int32, true)),
1042            ]
1043            .into(),
1044            vec![names, ages],
1045            nulls.finish(),
1046        );
1047
1048        let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1049
1050        assert_eq!(
1051            &arrow_struct,
1052            vortex_struct.into_arrow_preferred().unwrap().as_struct()
1053        );
1054    }
1055
1056    #[test]
1057    fn roundtrip_list() {
1058        let names = Arc::new(StringArray::from_iter(vec![
1059            Some("Joseph"),
1060            Some("Angela"),
1061            Some("Mikhail"),
1062        ]));
1063
1064        let arrow_list = ArrowListArray::new(
1065            Arc::new(Field::new_list_field(DataType::Utf8, true)),
1066            OffsetBuffer::from_lengths(vec![0, 2, 1]),
1067            names,
1068            None,
1069        );
1070        let list_data_type = arrow_list.data_type();
1071
1072        let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1073
1074        let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
1075
1076        assert_eq!(
1077            (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1078            rt_arrow_list.as_ref()
1079        );
1080    }
1081}