Skip to main content

vortex_array/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Encodings that enable zero-copy sharing of data with Arrow.
5
6use std::sync::Arc;
7
8use vortex_buffer::Buffer;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_error::vortex_ensure;
12use vortex_error::vortex_panic;
13
14use crate::Array;
15use crate::ArrayRef;
16use crate::Columnar;
17use crate::Executable;
18use crate::ExecutionCtx;
19use crate::IntoArray;
20use crate::arrays::BoolArray;
21use crate::arrays::BoolArrayParts;
22use crate::arrays::BoolVTable;
23use crate::arrays::DecimalArray;
24use crate::arrays::DecimalArrayParts;
25use crate::arrays::DecimalVTable;
26use crate::arrays::ExtensionArray;
27use crate::arrays::ExtensionVTable;
28use crate::arrays::FixedSizeListArray;
29use crate::arrays::FixedSizeListVTable;
30use crate::arrays::ListViewArray;
31use crate::arrays::ListViewArrayParts;
32use crate::arrays::ListViewRebuildMode;
33use crate::arrays::ListViewVTable;
34use crate::arrays::NullArray;
35use crate::arrays::NullVTable;
36use crate::arrays::PrimitiveArray;
37use crate::arrays::PrimitiveArrayParts;
38use crate::arrays::PrimitiveVTable;
39use crate::arrays::StructArray;
40use crate::arrays::StructArrayParts;
41use crate::arrays::StructVTable;
42use crate::arrays::VarBinViewArray;
43use crate::arrays::VarBinViewArrayParts;
44use crate::arrays::VarBinViewVTable;
45use crate::arrays::constant_canonicalize;
46use crate::builders::builder_with_capacity;
47use crate::dtype::DType;
48use crate::dtype::NativePType;
49use crate::matcher::Matcher;
50
51/// An enum capturing the default uncompressed encodings for each [Vortex type](DType).
52///
53/// Any array can be decoded into canonical form via the [`to_canonical`](Array::to_canonical)
54/// trait method. This is the simplest encoding for a type, and will not be compressed but may
55/// contain compressed child arrays.
56///
57/// Canonical form is useful for doing type-specific compute where you need to know that all
58/// elements are laid out decompressed and contiguous in memory.
59///
60/// Each `Canonical` variant has a corresponding [`DType`] variant, with the notable exception of
61/// [`Canonical::VarBinView`], which is the canonical encoding for both [`DType::Utf8`] and
62/// [`DType::Binary`].
63///
64/// # Laziness
65///
66/// Canonical form is not recursive, so while a `StructArray` is the canonical format for any
67/// `Struct` type, individual column child arrays may still be compressed. This allows
68/// compute over Vortex arrays to push decoding as late as possible, and ideally many child arrays
69/// never need to be decoded into canonical form at all depending on the compute.
70///
71/// # Arrow interoperability
72///
73/// All of the Vortex canonical encodings have an equivalent Arrow encoding that can be built
74/// zero-copy, and the corresponding Arrow array types can also be built directly.
75///
76/// The full list of canonical types and their equivalent Arrow array types are:
77///
78/// * `NullArray`: [`arrow_array::NullArray`]
79/// * `BoolArray`: [`arrow_array::BooleanArray`]
80/// * `PrimitiveArray`: [`arrow_array::PrimitiveArray`]
81/// * `DecimalArray`: [`arrow_array::Decimal128Array`] and [`arrow_array::Decimal256Array`]
82/// * `VarBinViewArray`: [`arrow_array::GenericByteViewArray`]
83/// * `ListViewArray`: [`arrow_array::ListViewArray`]
84/// * `FixedSizeListArray`: [`arrow_array::FixedSizeListArray`]
85/// * `StructArray`: [`arrow_array::StructArray`]
86///
87/// Vortex uses a logical type system, unlike Arrow which uses physical encodings for its types.
88/// As an example, there are at least six valid physical encodings for a `Utf8` array. This can
89/// create ambiguity.
90/// Thus, if you receive an Arrow array, compress it using Vortex, and then
91/// decompress it later to pass to a compute kernel, there are multiple suitable Arrow array
92/// variants to hold the data.
93///
94/// To disambiguate, we choose a canonical physical encoding for every Vortex [`DType`], which
95/// will correspond to an arrow-rs [`arrow_schema::DataType`].
96///
97/// # Views support
98///
99/// Binary and String views, also known as "German strings" are a better encoding format for
100/// nearly all use-cases. Variable-length binary views are part of the Apache Arrow spec, and are
101/// fully supported by the Datafusion query engine. We use them as our canonical string encoding
102/// for all `Utf8` and `Binary` typed arrays in Vortex. They provide considerably faster filter
103/// execution than the core `StringArray` and `BinaryArray` types, at the expense of potentially
104/// needing [garbage collection][arrow_array::GenericByteViewArray::gc] to clear unreferenced items
105/// from memory.
106///
107/// # For Developers
108///
109/// If you add another variant to this enum, make sure to update `dyn Array::is_canonical`,
110/// and the fuzzer in `fuzz/fuzz_targets/array_ops.rs`.
111#[derive(Debug, Clone)]
112pub enum Canonical {
113    Null(NullArray),
114    Bool(BoolArray),
115    Primitive(PrimitiveArray),
116    Decimal(DecimalArray),
117    VarBinView(VarBinViewArray),
118    List(ListViewArray),
119    FixedSizeList(FixedSizeListArray),
120    Struct(StructArray),
121    Extension(ExtensionArray),
122}
123
124/// Match on every canonical variant and evaluate a code block on all variants
125macro_rules! match_each_canonical {
126    ($self:expr, | $ident:ident | $eval:expr) => {{
127        match $self {
128            Canonical::Null($ident) => $eval,
129            Canonical::Bool($ident) => $eval,
130            Canonical::Primitive($ident) => $eval,
131            Canonical::Decimal($ident) => $eval,
132            Canonical::VarBinView($ident) => $eval,
133            Canonical::List($ident) => $eval,
134            Canonical::FixedSizeList($ident) => $eval,
135            Canonical::Struct($ident) => $eval,
136            Canonical::Extension($ident) => $eval,
137        }
138    }};
139}
140
141impl Canonical {
142    // TODO(connor): This can probably be specialized for each of the canonical arrays.
143    /// Create an empty canonical array of the given dtype.
144    pub fn empty(dtype: &DType) -> Canonical {
145        builder_with_capacity(dtype, 0).finish_into_canonical()
146    }
147
148    pub fn len(&self) -> usize {
149        match_each_canonical!(self, |arr| arr.len())
150    }
151
152    pub fn dtype(&self) -> &DType {
153        match_each_canonical!(self, |arr| arr.dtype())
154    }
155
156    pub fn is_empty(&self) -> bool {
157        match_each_canonical!(self, |arr| arr.is_empty())
158    }
159}
160
161impl Canonical {
162    /// Performs a (potentially expensive) compaction operation on the array before it is complete.
163    ///
164    /// This is mostly relevant for the variable-length types such as Utf8, Binary or List where
165    /// they can accumulate wasted space after slicing and taking operations.
166    ///
167    /// This operation is very expensive and can result in things like allocations, full-scans
168    /// and copy operations.
169    pub fn compact(&self) -> VortexResult<Canonical> {
170        match self {
171            Canonical::VarBinView(array) => Ok(Canonical::VarBinView(array.compact_buffers()?)),
172            Canonical::List(array) => Ok(Canonical::List(
173                array.rebuild(ListViewRebuildMode::TrimElements)?,
174            )),
175            _ => Ok(self.clone()),
176        }
177    }
178}
179
180// Unwrap canonical type back down to specialized type.
181impl Canonical {
182    pub fn as_null(&self) -> &NullArray {
183        if let Canonical::Null(a) = self {
184            a
185        } else {
186            vortex_panic!("Cannot get NullArray from {:?}", &self)
187        }
188    }
189
190    pub fn into_null(self) -> NullArray {
191        if let Canonical::Null(a) = self {
192            a
193        } else {
194            vortex_panic!("Cannot unwrap NullArray from {:?}", &self)
195        }
196    }
197
198    pub fn as_bool(&self) -> &BoolArray {
199        if let Canonical::Bool(a) = self {
200            a
201        } else {
202            vortex_panic!("Cannot get BoolArray from {:?}", &self)
203        }
204    }
205
206    pub fn into_bool(self) -> BoolArray {
207        if let Canonical::Bool(a) = self {
208            a
209        } else {
210            vortex_panic!("Cannot unwrap BoolArray from {:?}", &self)
211        }
212    }
213
214    pub fn as_primitive(&self) -> &PrimitiveArray {
215        if let Canonical::Primitive(a) = self {
216            a
217        } else {
218            vortex_panic!("Cannot get PrimitiveArray from {:?}", &self)
219        }
220    }
221
222    pub fn into_primitive(self) -> PrimitiveArray {
223        if let Canonical::Primitive(a) = self {
224            a
225        } else {
226            vortex_panic!("Cannot unwrap PrimitiveArray from {:?}", &self)
227        }
228    }
229
230    pub fn as_decimal(&self) -> &DecimalArray {
231        if let Canonical::Decimal(a) = self {
232            a
233        } else {
234            vortex_panic!("Cannot get DecimalArray from {:?}", &self)
235        }
236    }
237
238    pub fn into_decimal(self) -> DecimalArray {
239        if let Canonical::Decimal(a) = self {
240            a
241        } else {
242            vortex_panic!("Cannot unwrap DecimalArray from {:?}", &self)
243        }
244    }
245
246    pub fn as_varbinview(&self) -> &VarBinViewArray {
247        if let Canonical::VarBinView(a) = self {
248            a
249        } else {
250            vortex_panic!("Cannot get VarBinViewArray from {:?}", &self)
251        }
252    }
253
254    pub fn into_varbinview(self) -> VarBinViewArray {
255        if let Canonical::VarBinView(a) = self {
256            a
257        } else {
258            vortex_panic!("Cannot unwrap VarBinViewArray from {:?}", &self)
259        }
260    }
261
262    pub fn as_listview(&self) -> &ListViewArray {
263        if let Canonical::List(a) = self {
264            a
265        } else {
266            vortex_panic!("Cannot get ListArray from {:?}", &self)
267        }
268    }
269
270    pub fn into_listview(self) -> ListViewArray {
271        if let Canonical::List(a) = self {
272            a
273        } else {
274            vortex_panic!("Cannot unwrap ListArray from {:?}", &self)
275        }
276    }
277
278    pub fn as_fixed_size_list(&self) -> &FixedSizeListArray {
279        if let Canonical::FixedSizeList(a) = self {
280            a
281        } else {
282            vortex_panic!("Cannot get FixedSizeListArray from {:?}", &self)
283        }
284    }
285
286    pub fn into_fixed_size_list(self) -> FixedSizeListArray {
287        if let Canonical::FixedSizeList(a) = self {
288            a
289        } else {
290            vortex_panic!("Cannot unwrap FixedSizeListArray from {:?}", &self)
291        }
292    }
293
294    pub fn as_struct(&self) -> &StructArray {
295        if let Canonical::Struct(a) = self {
296            a
297        } else {
298            vortex_panic!("Cannot get StructArray from {:?}", &self)
299        }
300    }
301
302    pub fn into_struct(self) -> StructArray {
303        if let Canonical::Struct(a) = self {
304            a
305        } else {
306            vortex_panic!("Cannot unwrap StructArray from {:?}", &self)
307        }
308    }
309
310    pub fn as_extension(&self) -> &ExtensionArray {
311        if let Canonical::Extension(a) = self {
312            a
313        } else {
314            vortex_panic!("Cannot get ExtensionArray from {:?}", &self)
315        }
316    }
317
318    pub fn into_extension(self) -> ExtensionArray {
319        if let Canonical::Extension(a) = self {
320            a
321        } else {
322            vortex_panic!("Cannot unwrap ExtensionArray from {:?}", &self)
323        }
324    }
325}
326
327impl AsRef<dyn Array> for Canonical {
328    fn as_ref(&self) -> &(dyn Array + 'static) {
329        match_each_canonical!(self, |arr| arr.as_ref())
330    }
331}
332
333impl IntoArray for Canonical {
334    fn into_array(self) -> ArrayRef {
335        match_each_canonical!(self, |arr| arr.into_array())
336    }
337}
338
339/// Trait for types that can be converted from an owned type into an owned array variant.
340///
341/// # Canonicalization
342///
343/// This trait has a blanket implementation for all types implementing [ToCanonical].
344pub trait ToCanonical {
345    /// Canonicalize into a [`NullArray`] if the target is [`Null`](DType::Null) typed.
346    fn to_null(&self) -> NullArray;
347
348    /// Canonicalize into a [`BoolArray`] if the target is [`Bool`](DType::Bool) typed.
349    fn to_bool(&self) -> BoolArray;
350
351    /// Canonicalize into a [`PrimitiveArray`] if the target is [`Primitive`](DType::Primitive)
352    /// typed.
353    fn to_primitive(&self) -> PrimitiveArray;
354
355    /// Canonicalize into a [`DecimalArray`] if the target is [`Decimal`](DType::Decimal)
356    /// typed.
357    fn to_decimal(&self) -> DecimalArray;
358
359    /// Canonicalize into a [`StructArray`] if the target is [`Struct`](DType::Struct) typed.
360    fn to_struct(&self) -> StructArray;
361
362    /// Canonicalize into a [`ListViewArray`] if the target is [`List`](DType::List) typed.
363    fn to_listview(&self) -> ListViewArray;
364
365    /// Canonicalize into a [`FixedSizeListArray`] if the target is [`List`](DType::FixedSizeList)
366    /// typed.
367    fn to_fixed_size_list(&self) -> FixedSizeListArray;
368
369    /// Canonicalize into a [`VarBinViewArray`] if the target is [`Utf8`](DType::Utf8)
370    /// or [`Binary`](DType::Binary) typed.
371    fn to_varbinview(&self) -> VarBinViewArray;
372
373    /// Canonicalize into an [`ExtensionArray`] if the array is [`Extension`](DType::Extension)
374    /// typed.
375    fn to_extension(&self) -> ExtensionArray;
376}
377
378// Blanket impl for all Array encodings.
379impl<A: Array + ?Sized> ToCanonical for A {
380    fn to_null(&self) -> NullArray {
381        self.to_canonical()
382            .vortex_expect("to_canonical failed")
383            .into_null()
384    }
385
386    fn to_bool(&self) -> BoolArray {
387        self.to_canonical()
388            .vortex_expect("to_canonical failed")
389            .into_bool()
390    }
391
392    fn to_primitive(&self) -> PrimitiveArray {
393        self.to_canonical()
394            .vortex_expect("to_canonical failed")
395            .into_primitive()
396    }
397
398    fn to_decimal(&self) -> DecimalArray {
399        self.to_canonical()
400            .vortex_expect("to_canonical failed")
401            .into_decimal()
402    }
403
404    fn to_struct(&self) -> StructArray {
405        self.to_canonical()
406            .vortex_expect("to_canonical failed")
407            .into_struct()
408    }
409
410    fn to_listview(&self) -> ListViewArray {
411        self.to_canonical()
412            .vortex_expect("to_canonical failed")
413            .into_listview()
414    }
415
416    fn to_fixed_size_list(&self) -> FixedSizeListArray {
417        self.to_canonical()
418            .vortex_expect("to_canonical failed")
419            .into_fixed_size_list()
420    }
421
422    fn to_varbinview(&self) -> VarBinViewArray {
423        self.to_canonical()
424            .vortex_expect("to_canonical failed")
425            .into_varbinview()
426    }
427
428    fn to_extension(&self) -> ExtensionArray {
429        self.to_canonical()
430            .vortex_expect("to_canonical failed")
431            .into_extension()
432    }
433}
434
435impl From<Canonical> for ArrayRef {
436    fn from(value: Canonical) -> Self {
437        match_each_canonical!(value, |arr| arr.into_array())
438    }
439}
440
441/// Recursively execute the array until it reaches canonical form.
442///
443/// Callers should prefer to execute into `Columnar` if they are able to optimize their use for
444/// constant arrays.
445impl Executable for Canonical {
446    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
447        if let Some(canonical) = array.as_opt::<AnyCanonical>() {
448            return Ok(canonical.into());
449        }
450
451        // Invoke execute directly to avoid logging the call in the execution context.
452        Ok(match Columnar::execute(array.clone(), ctx)? {
453            Columnar::Canonical(c) => c,
454            Columnar::Constant(s) => {
455                let canonical = constant_canonicalize(&s)?;
456                canonical
457                    .as_ref()
458                    .statistics()
459                    .inherit_from(array.statistics());
460                canonical
461            }
462        })
463    }
464}
465
466/// Recursively execute the array until it reaches canonical form along with its validity.
467///
468/// Callers should prefer to execute into `Columnar` instead of this specific target.
469/// This target is useful when preparing arrays for writing.
470pub struct CanonicalValidity(pub Canonical);
471
472impl Executable for CanonicalValidity {
473    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
474        match array.execute::<Canonical>(ctx)? {
475            n @ Canonical::Null(_) => Ok(CanonicalValidity(n)),
476            Canonical::Bool(b) => {
477                let BoolArrayParts {
478                    bits,
479                    offset,
480                    len,
481                    validity,
482                } = b.into_parts();
483                Ok(CanonicalValidity(Canonical::Bool(
484                    BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
485                )))
486            }
487            Canonical::Primitive(p) => {
488                let PrimitiveArrayParts {
489                    ptype,
490                    buffer,
491                    validity,
492                } = p.into_parts();
493                Ok(CanonicalValidity(Canonical::Primitive(unsafe {
494                    PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
495                })))
496            }
497            Canonical::Decimal(d) => {
498                let DecimalArrayParts {
499                    decimal_dtype,
500                    values,
501                    values_type,
502                    validity,
503                } = d.into_parts();
504                Ok(CanonicalValidity(Canonical::Decimal(unsafe {
505                    DecimalArray::new_unchecked_handle(
506                        values,
507                        values_type,
508                        decimal_dtype,
509                        validity.execute(ctx)?,
510                    )
511                })))
512            }
513            Canonical::VarBinView(vbv) => {
514                let VarBinViewArrayParts {
515                    dtype,
516                    buffers,
517                    views,
518                    validity,
519                } = vbv.into_parts();
520                Ok(CanonicalValidity(Canonical::VarBinView(unsafe {
521                    VarBinViewArray::new_handle_unchecked(
522                        views,
523                        buffers,
524                        dtype,
525                        validity.execute(ctx)?,
526                    )
527                })))
528            }
529            Canonical::List(l) => {
530                let ListViewArrayParts {
531                    elements,
532                    offsets,
533                    sizes,
534                    validity,
535                    ..
536                } = l.into_parts();
537                Ok(CanonicalValidity(Canonical::List(unsafe {
538                    ListViewArray::new_unchecked(elements, offsets, sizes, validity.execute(ctx)?)
539                })))
540            }
541            Canonical::FixedSizeList(fsl) => {
542                let list_size = fsl.list_size();
543                let len = fsl.len();
544                let (elements, validity, _) = fsl.into_parts();
545                Ok(CanonicalValidity(Canonical::FixedSizeList(
546                    FixedSizeListArray::new(elements, list_size, validity.execute(ctx)?, len),
547                )))
548            }
549            Canonical::Struct(st) => {
550                let len = st.len();
551                let StructArrayParts {
552                    struct_fields,
553                    fields,
554                    validity,
555                } = st.into_parts();
556                Ok(CanonicalValidity(Canonical::Struct(unsafe {
557                    StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?)
558                })))
559            }
560            Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(
561                ExtensionArray::new(
562                    ext.ext_dtype().clone(),
563                    ext.storage()
564                        .clone()
565                        .execute::<CanonicalValidity>(ctx)?
566                        .0
567                        .into_array(),
568                ),
569            ))),
570        }
571    }
572}
573
574/// Recursively execute the array until all of its children are canonical.
575///
576/// This method is useful to guarantee that all operators are fully executed,
577/// callers should prefer an execution target that's suitable for their use case instead of this one.
578pub struct RecursiveCanonical(pub Canonical);
579
580impl Executable for RecursiveCanonical {
581    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
582        match array.execute::<Canonical>(ctx)? {
583            n @ Canonical::Null(_) => Ok(RecursiveCanonical(n)),
584            Canonical::Bool(b) => {
585                let BoolArrayParts {
586                    bits,
587                    offset,
588                    len,
589                    validity,
590                } = b.into_parts();
591                Ok(RecursiveCanonical(Canonical::Bool(
592                    BoolArray::try_new_from_handle(bits, offset, len, validity.execute(ctx)?)?,
593                )))
594            }
595            Canonical::Primitive(p) => {
596                let PrimitiveArrayParts {
597                    ptype,
598                    buffer,
599                    validity,
600                } = p.into_parts();
601                Ok(RecursiveCanonical(Canonical::Primitive(unsafe {
602                    PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity.execute(ctx)?)
603                })))
604            }
605            Canonical::Decimal(d) => {
606                let DecimalArrayParts {
607                    decimal_dtype,
608                    values,
609                    values_type,
610                    validity,
611                } = d.into_parts();
612                Ok(RecursiveCanonical(Canonical::Decimal(unsafe {
613                    DecimalArray::new_unchecked_handle(
614                        values,
615                        values_type,
616                        decimal_dtype,
617                        validity.execute(ctx)?,
618                    )
619                })))
620            }
621            Canonical::VarBinView(vbv) => {
622                let VarBinViewArrayParts {
623                    dtype,
624                    buffers,
625                    views,
626                    validity,
627                } = vbv.into_parts();
628                Ok(RecursiveCanonical(Canonical::VarBinView(unsafe {
629                    VarBinViewArray::new_handle_unchecked(
630                        views,
631                        buffers,
632                        dtype,
633                        validity.execute(ctx)?,
634                    )
635                })))
636            }
637            Canonical::List(l) => {
638                let ListViewArrayParts {
639                    elements,
640                    offsets,
641                    sizes,
642                    validity,
643                    ..
644                } = l.into_parts();
645                Ok(RecursiveCanonical(Canonical::List(unsafe {
646                    ListViewArray::new_unchecked(
647                        elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
648                        offsets.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
649                        sizes.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
650                        validity.execute(ctx)?,
651                    )
652                })))
653            }
654            Canonical::FixedSizeList(fsl) => {
655                let list_size = fsl.list_size();
656                let len = fsl.len();
657                let (elements, validity, _) = fsl.into_parts();
658                Ok(RecursiveCanonical(Canonical::FixedSizeList(
659                    FixedSizeListArray::new(
660                        elements.execute::<RecursiveCanonical>(ctx)?.0.into_array(),
661                        list_size,
662                        validity.execute(ctx)?,
663                        len,
664                    ),
665                )))
666            }
667            Canonical::Struct(st) => {
668                let len = st.len();
669                let StructArrayParts {
670                    struct_fields,
671                    fields,
672                    validity,
673                } = st.into_parts();
674                let executed_fields = fields
675                    .iter()
676                    .map(|f| Ok(f.clone().execute::<RecursiveCanonical>(ctx)?.0.into_array()))
677                    .collect::<VortexResult<Arc<[_]>>>()?;
678
679                Ok(RecursiveCanonical(Canonical::Struct(unsafe {
680                    StructArray::new_unchecked(
681                        executed_fields,
682                        struct_fields,
683                        len,
684                        validity.execute(ctx)?,
685                    )
686                })))
687            }
688            Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(
689                ExtensionArray::new(
690                    ext.ext_dtype().clone(),
691                    ext.storage()
692                        .clone()
693                        .execute::<RecursiveCanonical>(ctx)?
694                        .0
695                        .into_array(),
696                ),
697            ))),
698        }
699    }
700}
701
702/// Execute a primitive typed array into a buffer of native values, assuming all values are valid.
703///
704/// # Errors
705///
706/// Returns a `VortexError` if the array is not all-valid (has any nulls).
707impl<T: NativePType> Executable for Buffer<T> {
708    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
709        let array = PrimitiveArray::execute(array, ctx)?;
710        vortex_ensure!(
711            array.all_valid()?,
712            "Cannot execute to native buffer: array is not all-valid."
713        );
714        Ok(array.into_buffer())
715    }
716}
717
718/// Execute the array to canonical form and unwrap as a [`PrimitiveArray`].
719///
720/// This will panic if the array's dtype is not primitive.
721impl Executable for PrimitiveArray {
722    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
723        match array.try_into::<PrimitiveVTable>() {
724            Ok(primitive) => Ok(primitive),
725            Err(array) => Ok(Canonical::execute(array, ctx)?.into_primitive()),
726        }
727    }
728}
729
730/// Execute the array to canonical form and unwrap as a [`BoolArray`].
731///
732/// This will panic if the array's dtype is not bool.
733impl Executable for BoolArray {
734    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
735        match array.try_into::<BoolVTable>() {
736            Ok(bool_array) => Ok(bool_array),
737            Err(array) => Ok(Canonical::execute(array, ctx)?.into_bool()),
738        }
739    }
740}
741
742/// Execute the array to canonical form and unwrap as a [`NullArray`].
743///
744/// This will panic if the array's dtype is not null.
745impl Executable for NullArray {
746    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
747        match array.try_into::<NullVTable>() {
748            Ok(null_array) => Ok(null_array),
749            Err(array) => Ok(Canonical::execute(array, ctx)?.into_null()),
750        }
751    }
752}
753
754/// Execute the array to canonical form and unwrap as a [`VarBinViewArray`].
755///
756/// This will panic if the array's dtype is not utf8 or binary.
757impl Executable for VarBinViewArray {
758    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
759        match array.try_into::<VarBinViewVTable>() {
760            Ok(varbinview) => Ok(varbinview),
761            Err(array) => Ok(Canonical::execute(array, ctx)?.into_varbinview()),
762        }
763    }
764}
765
766/// Execute the array to canonical form and unwrap as an [`ExtensionArray`].
767///
768/// This will panic if the array's dtype is not an extension type.
769impl Executable for ExtensionArray {
770    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
771        match array.try_into::<ExtensionVTable>() {
772            Ok(ext_array) => Ok(ext_array),
773            Err(array) => Ok(Canonical::execute(array, ctx)?.into_extension()),
774        }
775    }
776}
777
778/// Execute the array to canonical form and unwrap as a [`DecimalArray`].
779///
780/// This will panic if the array's dtype is not decimal.
781impl Executable for DecimalArray {
782    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
783        match array.try_into::<DecimalVTable>() {
784            Ok(decimal) => Ok(decimal),
785            Err(array) => Ok(Canonical::execute(array, ctx)?.into_decimal()),
786        }
787    }
788}
789
790/// Execute the array to canonical form and unwrap as a [`ListViewArray`].
791///
792/// This will panic if the array's dtype is not list.
793impl Executable for ListViewArray {
794    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
795        match array.try_into::<ListViewVTable>() {
796            Ok(list) => Ok(list),
797            Err(array) => Ok(Canonical::execute(array, ctx)?.into_listview()),
798        }
799    }
800}
801
802/// Execute the array to canonical form and unwrap as a [`FixedSizeListArray`].
803///
804/// This will panic if the array's dtype is not fixed size list.
805impl Executable for FixedSizeListArray {
806    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
807        match array.try_into::<FixedSizeListVTable>() {
808            Ok(fsl) => Ok(fsl),
809            Err(array) => Ok(Canonical::execute(array, ctx)?.into_fixed_size_list()),
810        }
811    }
812}
813
814/// Execute the array to canonical form and unwrap as a [`StructArray`].
815///
816/// This will panic if the array's dtype is not struct.
817impl Executable for StructArray {
818    fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
819        match array.try_into::<StructVTable>() {
820            Ok(struct_array) => Ok(struct_array),
821            Err(array) => Ok(Canonical::execute(array, ctx)?.into_struct()),
822        }
823    }
824}
825
826/// A view into a canonical array type.
827#[derive(Debug, Clone)]
828pub enum CanonicalView<'a> {
829    Null(&'a NullArray),
830    Bool(&'a BoolArray),
831    Primitive(&'a PrimitiveArray),
832    Decimal(&'a DecimalArray),
833    VarBinView(&'a VarBinViewArray),
834    List(&'a ListViewArray),
835    FixedSizeList(&'a FixedSizeListArray),
836    Struct(&'a StructArray),
837    Extension(&'a ExtensionArray),
838}
839
840impl From<CanonicalView<'_>> for Canonical {
841    fn from(value: CanonicalView<'_>) -> Self {
842        match value {
843            CanonicalView::Null(a) => Canonical::Null(a.clone()),
844            CanonicalView::Bool(a) => Canonical::Bool(a.clone()),
845            CanonicalView::Primitive(a) => Canonical::Primitive(a.clone()),
846            CanonicalView::Decimal(a) => Canonical::Decimal(a.clone()),
847            CanonicalView::VarBinView(a) => Canonical::VarBinView(a.clone()),
848            CanonicalView::List(a) => Canonical::List(a.clone()),
849            CanonicalView::FixedSizeList(a) => Canonical::FixedSizeList(a.clone()),
850            CanonicalView::Struct(a) => Canonical::Struct(a.clone()),
851            CanonicalView::Extension(a) => Canonical::Extension(a.clone()),
852        }
853    }
854}
855
856impl AsRef<dyn Array> for CanonicalView<'_> {
857    fn as_ref(&self) -> &dyn Array {
858        match self {
859            CanonicalView::Null(a) => a.as_ref(),
860            CanonicalView::Bool(a) => a.as_ref(),
861            CanonicalView::Primitive(a) => a.as_ref(),
862            CanonicalView::Decimal(a) => a.as_ref(),
863            CanonicalView::VarBinView(a) => a.as_ref(),
864            CanonicalView::List(a) => a.as_ref(),
865            CanonicalView::FixedSizeList(a) => a.as_ref(),
866            CanonicalView::Struct(a) => a.as_ref(),
867            CanonicalView::Extension(a) => a.as_ref(),
868        }
869    }
870}
871
872/// A matcher for any canonical array type.
873pub struct AnyCanonical;
874impl Matcher for AnyCanonical {
875    type Match<'a> = CanonicalView<'a>;
876
877    fn matches(array: &dyn Array) -> bool {
878        array.is::<NullVTable>()
879            || array.is::<BoolVTable>()
880            || array.is::<PrimitiveVTable>()
881            || array.is::<DecimalVTable>()
882            || array.is::<StructVTable>()
883            || array.is::<ListViewVTable>()
884            || array.is::<FixedSizeListVTable>()
885            || array.is::<VarBinViewVTable>()
886            || array.is::<ExtensionVTable>()
887    }
888
889    fn try_match<'a>(array: &'a dyn Array) -> Option<Self::Match<'a>> {
890        if let Some(a) = array.as_opt::<NullVTable>() {
891            Some(CanonicalView::Null(a))
892        } else if let Some(a) = array.as_opt::<BoolVTable>() {
893            Some(CanonicalView::Bool(a))
894        } else if let Some(a) = array.as_opt::<PrimitiveVTable>() {
895            Some(CanonicalView::Primitive(a))
896        } else if let Some(a) = array.as_opt::<DecimalVTable>() {
897            Some(CanonicalView::Decimal(a))
898        } else if let Some(a) = array.as_opt::<StructVTable>() {
899            Some(CanonicalView::Struct(a))
900        } else if let Some(a) = array.as_opt::<ListViewVTable>() {
901            Some(CanonicalView::List(a))
902        } else if let Some(a) = array.as_opt::<FixedSizeListVTable>() {
903            Some(CanonicalView::FixedSizeList(a))
904        } else if let Some(a) = array.as_opt::<VarBinViewVTable>() {
905            Some(CanonicalView::VarBinView(a))
906        } else {
907            array
908                .as_opt::<ExtensionVTable>()
909                .map(CanonicalView::Extension)
910        }
911    }
912}
913
914#[cfg(test)]
915mod test {
916    use std::sync::Arc;
917
918    use arrow_array::Array as ArrowArray;
919    use arrow_array::ArrayRef as ArrowArrayRef;
920    use arrow_array::ListArray as ArrowListArray;
921    use arrow_array::PrimitiveArray as ArrowPrimitiveArray;
922    use arrow_array::StringArray;
923    use arrow_array::StringViewArray;
924    use arrow_array::StructArray as ArrowStructArray;
925    use arrow_array::cast::AsArray;
926    use arrow_array::types::Int32Type;
927    use arrow_array::types::Int64Type;
928    use arrow_array::types::UInt64Type;
929    use arrow_buffer::NullBufferBuilder;
930    use arrow_buffer::OffsetBuffer;
931    use arrow_schema::DataType;
932    use arrow_schema::Field;
933    use vortex_buffer::buffer;
934
935    use crate::ArrayRef;
936    use crate::IntoArray;
937    use crate::arrays::ConstantArray;
938    use crate::arrays::StructArray;
939    use crate::arrow::FromArrowArray;
940    use crate::arrow::IntoArrowArray;
941
942    #[test]
943    fn test_canonicalize_nested_struct() {
944        // Create a struct array with multiple internal components.
945        let nested_struct_array = StructArray::from_fields(&[
946            ("a", buffer![1u64].into_array()),
947            (
948                "b",
949                StructArray::from_fields(&[(
950                    "inner_a",
951                    // The nested struct contains a ConstantArray representing the primitive array
952                    //   [100i64]
953                    // ConstantArray is not a canonical type, so converting `into_arrow()` should
954                    // map this to the nearest canonical type (PrimitiveArray).
955                    ConstantArray::new(100i64, 1).into_array(),
956                )])
957                .unwrap()
958                .into_array(),
959            ),
960        ])
961        .unwrap();
962
963        let arrow_struct = nested_struct_array
964            .into_array()
965            .into_arrow_preferred()
966            .unwrap()
967            .as_any()
968            .downcast_ref::<ArrowStructArray>()
969            .cloned()
970            .unwrap();
971
972        assert!(
973            arrow_struct
974                .column(0)
975                .as_any()
976                .downcast_ref::<ArrowPrimitiveArray<UInt64Type>>()
977                .is_some()
978        );
979
980        let inner_struct = arrow_struct
981            .column(1)
982            .clone()
983            .as_any()
984            .downcast_ref::<ArrowStructArray>()
985            .cloned()
986            .unwrap();
987
988        let inner_a = inner_struct
989            .column(0)
990            .as_any()
991            .downcast_ref::<ArrowPrimitiveArray<Int64Type>>();
992        assert!(inner_a.is_some());
993
994        assert_eq!(
995            inner_a.cloned().unwrap(),
996            ArrowPrimitiveArray::from_iter([100i64])
997        );
998    }
999
1000    #[test]
1001    fn roundtrip_struct() {
1002        let mut nulls = NullBufferBuilder::new(6);
1003        nulls.append_n_non_nulls(4);
1004        nulls.append_null();
1005        nulls.append_non_null();
1006        let names = Arc::new(StringViewArray::from_iter(vec![
1007            Some("Joseph"),
1008            None,
1009            Some("Angela"),
1010            Some("Mikhail"),
1011            None,
1012            None,
1013        ]));
1014        let ages = Arc::new(ArrowPrimitiveArray::<Int32Type>::from(vec![
1015            Some(25),
1016            Some(31),
1017            None,
1018            Some(57),
1019            None,
1020            None,
1021        ]));
1022
1023        let arrow_struct = ArrowStructArray::new(
1024            vec![
1025                Arc::new(Field::new("name", DataType::Utf8View, true)),
1026                Arc::new(Field::new("age", DataType::Int32, true)),
1027            ]
1028            .into(),
1029            vec![names, ages],
1030            nulls.finish(),
1031        );
1032
1033        let vortex_struct = ArrayRef::from_arrow(&arrow_struct, true).unwrap();
1034
1035        assert_eq!(
1036            &arrow_struct,
1037            vortex_struct.into_arrow_preferred().unwrap().as_struct()
1038        );
1039    }
1040
1041    #[test]
1042    fn roundtrip_list() {
1043        let names = Arc::new(StringArray::from_iter(vec![
1044            Some("Joseph"),
1045            Some("Angela"),
1046            Some("Mikhail"),
1047        ]));
1048
1049        let arrow_list = ArrowListArray::new(
1050            Arc::new(Field::new_list_field(DataType::Utf8, true)),
1051            OffsetBuffer::from_lengths(vec![0, 2, 1]),
1052            names,
1053            None,
1054        );
1055        let list_data_type = arrow_list.data_type();
1056
1057        let vortex_list = ArrayRef::from_arrow(&arrow_list, true).unwrap();
1058
1059        let rt_arrow_list = vortex_list.into_arrow(list_data_type).unwrap();
1060
1061        assert_eq!(
1062            (Arc::new(arrow_list.clone()) as ArrowArrayRef).as_ref(),
1063            rt_arrow_list.as_ref()
1064        );
1065    }
1066}