Skip to main content

vortex_array/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module contains the VTable definitions for a Vortex encoding.
5
6mod array;
7mod dyn_;
8mod operations;
9mod validity;
10mod visitor;
11
12use std::fmt::Debug;
13use std::ops::Deref;
14
15pub use array::*;
16pub use dyn_::*;
17pub use operations::*;
18pub use validity::*;
19pub use visitor::*;
20use vortex_dtype::DType;
21use vortex_error::VortexResult;
22use vortex_session::VortexSession;
23
24use crate::Array;
25use crate::ArrayRef;
26use crate::IntoArray;
27use crate::buffer::BufferHandle;
28use crate::builders::ArrayBuilder;
29use crate::executor::ExecutionCtx;
30use crate::serde::ArrayChildren;
31
32/// The array [`VTable`] encapsulates logic for an Array type within Vortex.
33///
34/// The logic is split across several "VTable" traits to enable easier code organization than
35/// simply lumping everything into a single trait.
36///
37/// From this [`VTable`] trait, we derive implementations for the sealed [`Array`] and [`DynVTable`]
38/// traits.
39///
40/// The functions defined in these vtable traits will typically document their pre- and
41/// post-conditions. The pre-conditions are validated inside the [`Array`] and [`DynVTable`]
42/// implementations so do not need to be checked in the vtable implementations (for example, index
43/// out of bounds). Post-conditions are validated after invocation of the vtable function and will
44/// panic if violated.
45pub trait VTable: 'static + Sized + Send + Sync + Debug {
46    type Array: 'static + Send + Sync + Clone + Debug + Deref<Target = dyn Array> + IntoArray;
47    type Metadata: Debug;
48
49    type ArrayVTable: BaseArrayVTable<Self>;
50    type OperationsVTable: OperationsVTable<Self>;
51    type ValidityVTable: ValidityVTable<Self>;
52    type VisitorVTable: VisitorVTable<Self>;
53
54    /// Returns the ID of the array.
55    fn id(array: &Self::Array) -> ArrayId;
56
57    /// Exports metadata for an array.
58    ///
59    /// All other parts of the array are exported using the [`crate::vtable::VisitorVTable`].
60    ///
61    /// * If the array does not contain metadata, it should return
62    ///   [`crate::metadata::EmptyMetadata`].
63    fn metadata(array: &Self::Array) -> VortexResult<Self::Metadata>;
64
65    /// Serialize metadata into a byte buffer for IPC or file storage.
66    /// Return `None` if the array cannot be serialized.
67    fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>>;
68
69    /// Deserialize array metadata from a byte buffer.
70    ///
71    /// To reduce the serialized form, arrays do not store their own DType and length. Instead,
72    /// this is passed down from the parent array during deserialization. These properties are
73    /// exposed here for use during deserialization.
74    fn deserialize(
75        bytes: &[u8],
76        _dtype: &DType,
77        _len: usize,
78        _buffers: &[BufferHandle],
79        _session: &VortexSession,
80    ) -> VortexResult<Self::Metadata>;
81
82    /// Writes the array into a canonical builder.
83    ///
84    /// ## Post-conditions
85    /// - The length of the builder is incremented by the length of the input array.
86    fn append_to_builder(
87        array: &Self::Array,
88        builder: &mut dyn ArrayBuilder,
89        ctx: &mut ExecutionCtx,
90    ) -> VortexResult<()> {
91        let array = Self::execute(array, ctx)?;
92        builder.extend_from_array(array.as_ref());
93        Ok(())
94    }
95
96    /// Build an array from components.
97    ///
98    /// This is called on the file and IPC deserialization pathways, to reconstruct the array from
99    /// type-erased components.
100    ///
101    /// Encoding implementers should take note that all validation necessary to ensure the encoding
102    /// is safe to read should happen inside of this method.
103    ///
104    /// # Safety and correctness
105    ///
106    /// This method should *never* panic, it must always return an error or else it returns a
107    /// valid `Array` that meets all the encoding's preconditions.
108    ///
109    /// For example, the `build` implementation for a dictionary encoding should ensure that all
110    /// codes lie in the valid range. For a UTF-8 array, it should check the bytes to ensure they
111    /// are all valid string data bytes. Any corrupt files or malformed data buffers should be
112    /// caught here, before returning the deserialized array.
113    ///
114    /// # Validation
115    ///
116    /// Validation is mainly meant to ensure that all internal pointers in the encoding reference
117    /// valid ranges of data, and that all data conforms to its DType constraints. These ensure
118    /// that no array operations will panic at runtime, or yield undefined behavior when unsafe
119    /// operations like `get_unchecked` use indices in the array buffer.
120    ///
121    /// Examples of the kinds of validation that should be part of the `build` step:
122    ///
123    /// * Checking that any offsets buffers point to valid offsets in some other child array
124    /// * Checking that any buffers for data or validity have the appropriate size for the
125    ///   encoding
126    /// * Running UTF-8 validation for any buffers that are expected to hold flat UTF-8 data
127    // TODO(ngates): take the parts by ownership, since most arrays need them anyway
128    fn build(
129        dtype: &DType,
130        len: usize,
131        metadata: &Self::Metadata,
132        buffers: &[BufferHandle],
133        children: &dyn ArrayChildren,
134    ) -> VortexResult<Self::Array>;
135
136    /// Replaces the children in `array` with `children`. The count must be the same and types
137    /// of children must be expected.
138    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()>;
139
140    /// Execute this array to produce an [`ArrayRef`].
141    ///
142    /// Array execution is designed such that repeated execution of an array will eventually
143    /// converge to a canonical representation. Implementations of this function should therefore
144    /// ensure they make progress towards that goal.
145    ///
146    /// This includes fully evaluating the array, such us decoding run-end encoding, or executing
147    /// one of the array's children and re-building the array with the executed child.
148    ///
149    /// It is recommended to only perform a single step of execution per call to this function,
150    /// such that surrounding arrays have an opportunity to perform their own parent reduction
151    /// or execution logic.
152    ///
153    /// The returned array must be logically equivalent to the input array. In other words, the
154    /// recursively canonicalized forms of both arrays must be equal.
155    ///
156    /// Debug builds will panic if the returned array is of the wrong type, wrong length, or
157    /// incorrectly contains null values.
158    ///
159    // TODO(ngates): in the future, we may pass a "target encoding hint" such that this array
160    //  can produce a more optimal representation for the parent. This could be used to preserve
161    //  varbin vs varbinview or list vs listview encodings when the parent knows it prefers
162    //  one representation over another, such as when exporting to a specific Arrow array.
163    fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef>;
164
165    /// Attempt to execute the parent of this array.
166    ///
167    /// This function allows arrays to plug in specialized execution logic for their parent. For
168    /// example, strings compressed as FSST arrays can implement a custom equality comparison when
169    /// the comparing against a scalar string.
170    ///
171    /// Returns `Ok(None)` if no specialized execution is possible.
172    fn execute_parent(
173        array: &Self::Array,
174        parent: &ArrayRef,
175        child_idx: usize,
176        ctx: &mut ExecutionCtx,
177    ) -> VortexResult<Option<ArrayRef>> {
178        _ = (array, parent, child_idx, ctx);
179        Ok(None)
180    }
181
182    /// Attempt to reduce the array to a more simple representation.
183    ///
184    /// Returns `Ok(None)` if no reduction is possible.
185    fn reduce(array: &Self::Array) -> VortexResult<Option<ArrayRef>> {
186        _ = array;
187        Ok(None)
188    }
189
190    /// Attempt to perform a reduction of the parent of this array.
191    ///
192    /// This function allows arrays to plug in reduction rules to their parents, for example
193    /// run-end arrays can pull-down scalar functions and apply them only over their values.
194    ///
195    /// Returns `Ok(None)` if no reduction is possible.
196    fn reduce_parent(
197        array: &Self::Array,
198        parent: &ArrayRef,
199        child_idx: usize,
200    ) -> VortexResult<Option<ArrayRef>> {
201        _ = (array, parent, child_idx);
202        Ok(None)
203    }
204}
205
206/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.
207pub struct NotSupported;
208
209#[macro_export]
210macro_rules! vtable {
211    ($V:ident) => {
212        $crate::aliases::paste::paste! {
213            impl AsRef<dyn $crate::Array> for [<$V Array>] {
214                fn as_ref(&self) -> &dyn $crate::Array {
215                    // We can unsafe cast ourselves to an ArrayAdapter.
216                    unsafe { &*(self as *const [<$V Array>] as *const $crate::ArrayAdapter<[<$V VTable>]>) }
217                }
218            }
219
220            impl std::ops::Deref for [<$V Array>] {
221                type Target = dyn $crate::Array;
222
223                fn deref(&self) -> &Self::Target {
224                    // We can unsafe cast ourselves to an ArrayAdapter.
225                    unsafe { &*(self as *const [<$V Array>] as *const $crate::ArrayAdapter<[<$V VTable>]>) }
226                }
227            }
228
229            impl $crate::IntoArray for [<$V Array>] {
230                fn into_array(self) -> $crate::ArrayRef {
231                    // We can unsafe transmute ourselves to an ArrayAdapter.
232                    std::sync::Arc::new(unsafe { std::mem::transmute::<[<$V Array>], $crate::ArrayAdapter::<[<$V VTable>]>>(self) })
233                }
234            }
235
236            impl From<[<$V Array>]> for $crate::ArrayRef {
237                fn from(value: [<$V Array>]) -> $crate::ArrayRef {
238                    use $crate::IntoArray;
239                    value.into_array()
240                }
241            }
242        }
243    };
244}