vortex_array/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module contains the VTable definitions for a Vortex encoding.
5
6mod array;
7mod canonical;
8mod compute;
9mod dyn_;
10mod encode;
11mod operations;
12mod validity;
13mod visitor;
14
15use std::fmt::Debug;
16use std::ops::Deref;
17
18pub use array::*;
19pub use canonical::*;
20pub use compute::*;
21pub use dyn_::*;
22pub use encode::*;
23pub use operations::*;
24pub use validity::*;
25pub use visitor::*;
26use vortex_dtype::DType;
27use vortex_error::VortexResult;
28use vortex_vector::Vector;
29
30use crate::Array;
31use crate::ArrayRef;
32use crate::IntoArray;
33use crate::VectorExecutor;
34use crate::buffer::BufferHandle;
35use crate::executor::ExecutionCtx;
36use crate::serde::ArrayChildren;
37
38/// The array [`VTable`] encapsulates logic for an Array type within Vortex.
39///
40/// The logic is split across several "VTable" traits to enable easier code organization than
41/// simply lumping everything into a single trait.
42///
43/// Some of these vtables are optional, such as the [`ComputeVTable`] and [`EncodeVTable`],
44/// which can be disabled by assigning to the [`NotSupported`] type.
45///
46/// From this [`VTable`] trait, we derive implementations for the sealed [`Array`] and [`DynVTable`]
47/// traits via the [`crate::ArrayAdapter`] and [`ArrayVTableAdapter`] types respectively.
48///
49/// The functions defined in these vtable traits will typically document their pre- and
50/// post-conditions. The pre-conditions are validated inside the [`Array`] and [`DynVTable`]
51/// implementations so do not need to be checked in the vtable implementations (for example, index
52/// out of bounds). Post-conditions are validated after invocation of the vtable function and will
53/// panic if violated.
54pub trait VTable: 'static + Sized + Send + Sync + Debug {
55    type Array: 'static + Send + Sync + Clone + Debug + Deref<Target = dyn Array> + IntoArray;
56    type Metadata: Debug;
57
58    type ArrayVTable: BaseArrayVTable<Self>;
59    type CanonicalVTable: CanonicalVTable<Self>;
60    type OperationsVTable: OperationsVTable<Self>;
61    type ValidityVTable: ValidityVTable<Self>;
62    type VisitorVTable: VisitorVTable<Self>;
63
64    /// Optionally enable implementing dynamic compute dispatch for this encoding.
65    /// Can be disabled by assigning to the [`NotSupported`] type.
66    type ComputeVTable: ComputeVTable<Self>;
67    /// Optionally enable the [`EncodeVTable`] for this encoding. This allows it to partake in
68    /// compression.
69    /// Can be disabled by assigning to the [`NotSupported`] type.
70    type EncodeVTable: EncodeVTable<Self>;
71
72    /// Returns the ID of the encoding.
73    fn id(&self) -> ArrayId;
74
75    /// Returns the encoding for the array.
76    fn encoding(array: &Self::Array) -> ArrayVTable;
77
78    /// Exports metadata for an array.
79    ///
80    /// All other parts of the array are exported using the [`crate::vtable::VisitorVTable`].
81    ///
82    /// * If the array does not contain metadata, it should return
83    ///   [`crate::metadata::EmptyMetadata`].
84    fn metadata(array: &Self::Array) -> VortexResult<Self::Metadata>;
85
86    /// Serialize metadata into a byte buffer for IPC or file storage.
87    /// Return `None` if the array cannot be serialized.
88    fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>>;
89
90    /// Deserialize metadata from a byte buffer.
91    fn deserialize(bytes: &[u8]) -> VortexResult<Self::Metadata>;
92
93    /// Build an array from components.
94    ///
95    /// This is called on the file and IPC deserialization pathways, to reconstruct the array from
96    /// type-erased components.
97    ///
98    /// Encoding implementers should take note that all validation necessary to ensure the encoding
99    /// is safe to read should happen inside of this method.
100    ///
101    /// # Safety and correctness
102    ///
103    /// This method should *never* panic, it must always return an error or else it returns a
104    /// valid `Array` that meets all the encoding's preconditions.
105    ///
106    /// For example, the `build` implementation for a dictionary encoding should ensure that all
107    /// codes lie in the valid range. For a UTF-8 array, it should check the bytes to ensure they
108    /// are all valid string data bytes. Any corrupt files or malformed data buffers should be
109    /// caught here, before returning the deserialized array.
110    ///
111    /// # Validation
112    ///
113    /// Validation is mainly meant to ensure that all internal pointers in the encoding reference
114    /// valid ranges of data, and that all data conforms to its DType constraints. These ensure
115    /// that no array operations will panic at runtime, or yield undefined behavior when unsafe
116    /// operations like `get_unchecked` use indices in the array buffer.
117    ///
118    /// Examples of the kinds of validation that should be part of the `build` step:
119    ///
120    /// * Checking that any offsets buffers point to valid offsets in some other child array
121    /// * Checking that any buffers for data or validity have the appropriate size for the
122    ///   encoding
123    /// * Running UTF-8 validation for any buffers that are expected to hold flat UTF-8 data
124    // TODO(ngates): take the parts by ownership, since most arrays need them anyway
125    fn build(
126        &self,
127        dtype: &DType,
128        len: usize,
129        metadata: &Self::Metadata,
130        buffers: &[BufferHandle],
131        children: &dyn ArrayChildren,
132    ) -> VortexResult<Self::Array>;
133
134    /// Replaces the children in `array` with `children`. The count must be the same and types
135    /// of children must be expected.
136    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()>;
137
138    /// Execute this array to produce a [`Vector`].
139    ///
140    /// The returned [`Vector`] must be the appropriate one for the array's logical
141    /// type (they are one-to-one with Vortex `DType`s), and should respect the output nullability
142    /// of the array.
143    ///
144    /// Debug builds will panic if the returned vector is of the wrong type, wrong length, or
145    /// incorrectly contains null values.
146    ///
147    /// Implementations should recursively call [`crate::executor::VectorExecutor::execute`] on
148    /// child arrays as needed.
149    fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
150        // TODO(ngates): convert arrays to canonicalize over vectors.
151        let canonical = Self::CanonicalVTable::canonicalize(array);
152        canonical.into_array().execute_vector(ctx.session())
153    }
154
155    /// Attempt to execute the parent of this array to produce a [`Vector`].
156    ///
157    /// This function allows arrays to plug in specialized execution logic for their parent. For
158    /// example, strings compressed as FSST arrays can implement a custom equality comparison when
159    /// the comparing against a scalar string.
160    ///
161    /// Returns `Ok(None)` if no specialized execution is possible.
162    fn execute_parent(
163        array: &Self::Array,
164        parent: &ArrayRef,
165        child_idx: usize,
166        ctx: &mut ExecutionCtx,
167    ) -> VortexResult<Option<Vector>> {
168        _ = (array, parent, child_idx, ctx);
169        Ok(None)
170    }
171
172    /// Attempt to reduce the array to a more simple representation.
173    ///
174    /// Returns `Ok(None)` if no reduction is possible.
175    fn reduce(array: &Self::Array) -> VortexResult<Option<ArrayRef>> {
176        _ = array;
177        Ok(None)
178    }
179
180    /// Attempt to perform a reduction of the parent of this array.
181    ///
182    /// This function allows arrays to plug in reduction rules to their parents, for example
183    /// run-end arrays can pull-down scalar functions and apply them only over their values.
184    ///
185    /// Returns `Ok(None)` if no reduction is possible.
186    fn reduce_parent(
187        array: &Self::Array,
188        parent: &ArrayRef,
189        child_idx: usize,
190    ) -> VortexResult<Option<ArrayRef>> {
191        _ = (array, parent, child_idx);
192        Ok(None)
193    }
194}
195
196/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.
197pub struct NotSupported;
198
199#[macro_export]
200macro_rules! vtable {
201    ($V:ident) => {
202        $crate::aliases::paste::paste! {
203            impl AsRef<dyn $crate::Array> for [<$V Array>] {
204                fn as_ref(&self) -> &dyn $crate::Array {
205                    // We can unsafe cast ourselves to an ArrayAdapter.
206                    unsafe { &*(self as *const [<$V Array>] as *const $crate::ArrayAdapter<[<$V VTable>]>) }
207                }
208            }
209
210            impl std::ops::Deref for [<$V Array>] {
211                type Target = dyn $crate::Array;
212
213                fn deref(&self) -> &Self::Target {
214                    // We can unsafe cast ourselves to an ArrayAdapter.
215                    unsafe { &*(self as *const [<$V Array>] as *const $crate::ArrayAdapter<[<$V VTable>]>) }
216                }
217            }
218
219            impl $crate::IntoArray for [<$V Array>] {
220                fn into_array(self) -> $crate::ArrayRef {
221                    // We can unsafe transmute ourselves to an ArrayAdapter.
222                    std::sync::Arc::new(unsafe { std::mem::transmute::<[<$V Array>], $crate::ArrayAdapter::<[<$V VTable>]>>(self) })
223                }
224            }
225
226            impl From<[<$V Array>]> for $crate::ArrayRef {
227                fn from(value: [<$V Array>]) -> $crate::ArrayRef {
228                    use $crate::IntoArray;
229                    value.into_array()
230                }
231            }
232        }
233    };
234}