vortex_array/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module contains the VTable definitions for a Vortex encoding.
5
6mod array;
7mod canonical;
8mod compute;
9mod dyn_;
10mod encode;
11mod operations;
12mod validity;
13mod visitor;
14
15use std::fmt::Debug;
16use std::ops::Deref;
17
18pub use array::*;
19pub use canonical::*;
20pub use compute::*;
21pub use dyn_::*;
22pub use encode::*;
23pub use operations::*;
24pub use validity::*;
25pub use visitor::*;
26use vortex_buffer::BufferHandle;
27use vortex_dtype::DType;
28use vortex_error::VortexResult;
29use vortex_error::vortex_bail;
30use vortex_vector::Vector;
31
32use crate::Array;
33use crate::IntoArray;
34use crate::execution::ExecutionCtx;
35use crate::serde::ArrayChildren;
36
37/// The array [`VTable`] encapsulates logic for an Array type within Vortex.
38///
39/// The logic is split across several "VTable" traits to enable easier code organization than
40/// simply lumping everything into a single trait.
41///
42/// Some of these vtables are optional, such as the [`ComputeVTable`] and [`EncodeVTable`],
43/// which can be disabled by assigning to the [`NotSupported`] type.
44///
45/// From this [`VTable`] trait, we derive implementations for the sealed [`Array`] and [`DynVTable`]
46/// traits via the [`crate::ArrayAdapter`] and [`ArrayVTableAdapter`] types respectively.
47///
48/// The functions defined in these vtable traits will typically document their pre- and
49/// post-conditions. The pre-conditions are validated inside the [`Array`] and [`DynVTable`]
50/// implementations so do not need to be checked in the vtable implementations (for example, index
51/// out of bounds). Post-conditions are validated after invocation of the vtable function and will
52/// panic if violated.
53pub trait VTable: 'static + Sized + Send + Sync + Debug {
54    type Array: 'static + Send + Sync + Clone + Debug + Deref<Target = dyn Array> + IntoArray;
55    type Metadata: Debug;
56
57    type ArrayVTable: BaseArrayVTable<Self>;
58    type CanonicalVTable: CanonicalVTable<Self>;
59    type OperationsVTable: OperationsVTable<Self>;
60    type ValidityVTable: ValidityVTable<Self>;
61    type VisitorVTable: VisitorVTable<Self>;
62
63    /// Optionally enable implementing dynamic compute dispatch for this encoding.
64    /// Can be disabled by assigning to the [`NotSupported`] type.
65    type ComputeVTable: ComputeVTable<Self>;
66    /// Optionally enable the [`EncodeVTable`] for this encoding. This allows it to partake in
67    /// compression.
68    /// Can be disabled by assigning to the [`NotSupported`] type.
69    type EncodeVTable: EncodeVTable<Self>;
70
71    /// Returns the ID of the encoding.
72    fn id(&self) -> ArrayId;
73
74    /// Returns the encoding for the array.
75    fn encoding(array: &Self::Array) -> ArrayVTable;
76
77    /// Exports metadata for an array.
78    ///
79    /// All other parts of the array are exported using the [`crate::vtable::VisitorVTable`].
80    ///
81    /// * If the array does not contain metadata, it should return
82    ///   [`crate::metadata::EmptyMetadata`].
83    fn metadata(array: &Self::Array) -> VortexResult<Self::Metadata>;
84
85    /// Serialize metadata into a byte buffer for IPC or file storage.
86    /// Return `None` if the array cannot be serialized.
87    fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>>;
88
89    /// Deserialize metadata from a byte buffer.
90    fn deserialize(bytes: &[u8]) -> VortexResult<Self::Metadata>;
91
92    /// Build an array from components.
93    ///
94    /// This is called on the file and IPC deserialization pathways, to reconstruct the array from
95    /// type-erased components.
96    ///
97    /// Encoding implementers should take note that all validation necessary to ensure the encoding
98    /// is safe to read should happen inside of this method.
99    ///
100    /// # Safety and correctness
101    ///
102    /// This method should *never* panic, it must always return an error or else it returns a
103    /// valid `Array` that meets all the encoding's preconditions.
104    ///
105    /// For example, the `build` implementation for a dictionary encoding should ensure that all
106    /// codes lie in the valid range. For a UTF-8 array, it should check the bytes to ensure they
107    /// are all valid string data bytes. Any corrupt files or malformed data buffers should be
108    /// caught here, before returning the deserialized array.
109    ///
110    /// # Validation
111    ///
112    /// Validation is mainly meant to ensure that all internal pointers in the encoding reference
113    /// valid ranges of data, and that all data conforms to its DType constraints. These ensure
114    /// that no array operations will panic at runtime, or yield undefined behavior when unsafe
115    /// operations like `get_unchecked` use indices in the array buffer.
116    ///
117    /// Examples of the kinds of validation that should be part of the `build` step:
118    ///
119    /// * Checking that any offsets buffers point to valid offsets in some other child array
120    /// * Checking that any buffers for data or validity have the appropriate size for the
121    ///   encoding
122    /// * Running UTF-8 validation for any buffers that are expected to hold flat UTF-8 data
123    // TODO(ngates): take the parts by ownership, since most arrays need them anyway
124    fn build(
125        &self,
126        dtype: &DType,
127        len: usize,
128        metadata: &Self::Metadata,
129        buffers: &[BufferHandle],
130        children: &dyn ArrayChildren,
131    ) -> VortexResult<Self::Array>;
132
133    /// Execute this array tree to return a canonical [`Vector`].
134    ///
135    /// The returned vector must be the appropriate one for the array's logical type (they are
136    /// one-to-one with Vortex `DType`s), and should respect the output nullability of the array.
137    ///
138    /// Debug builds will panic if the returned vector is of the wrong type, wrong length, or
139    /// incorrectly contains null values.
140    ///
141    /// Implementations should recursively call [`Array::batch_execute`] on child
142    /// arrays as needed.
143    fn batch_execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
144        vortex_bail!(
145            "Array {} does not support vector execution",
146            Self::encoding(array).id()
147        )
148    }
149}
150
151/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.
152pub struct NotSupported;
153
154#[macro_export]
155macro_rules! vtable {
156    ($V:ident) => {
157        $crate::aliases::paste::paste! {
158            impl AsRef<dyn $crate::Array> for [<$V Array>] {
159                fn as_ref(&self) -> &dyn $crate::Array {
160                    // We can unsafe cast ourselves to an ArrayAdapter.
161                    unsafe { &*(self as *const [<$V Array>] as *const $crate::ArrayAdapter<[<$V VTable>]>) }
162                }
163            }
164
165            impl std::ops::Deref for [<$V Array>] {
166                type Target = dyn $crate::Array;
167
168                fn deref(&self) -> &Self::Target {
169                    // We can unsafe cast ourselves to an ArrayAdapter.
170                    unsafe { &*(self as *const [<$V Array>] as *const $crate::ArrayAdapter<[<$V VTable>]>) }
171                }
172            }
173
174            impl $crate::IntoArray for [<$V Array>] {
175                fn into_array(self) -> $crate::ArrayRef {
176                    // We can unsafe transmute ourselves to an ArrayAdapter.
177                    std::sync::Arc::new(unsafe { std::mem::transmute::<[<$V Array>], $crate::ArrayAdapter::<[<$V VTable>]>>(self) })
178                }
179            }
180
181            impl From<[<$V Array>]> for $crate::ArrayRef {
182                fn from(value: [<$V Array>]) -> $crate::ArrayRef {
183                    use $crate::IntoArray;
184                    value.into_array()
185                }
186            }
187        }
188    };
189}