vortex_array/vtable/mod.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module contains the VTable definitions for a Vortex encoding.
5
6mod dyn_;
7mod operations;
8mod validity;
9
10use std::fmt::Debug;
11use std::hash::Hasher;
12use std::ops::Deref;
13
14pub use dyn_::*;
15pub use operations::*;
16pub use validity::*;
17use vortex_error::VortexExpect;
18use vortex_error::VortexResult;
19use vortex_error::vortex_panic;
20use vortex_session::VortexSession;
21
22use crate::ArrayRef;
23use crate::Canonical;
24use crate::DynArray;
25use crate::ExecutionStep;
26use crate::IntoArray;
27use crate::Precision;
28use crate::arrays::ConstantArray;
29use crate::buffer::BufferHandle;
30use crate::builders::ArrayBuilder;
31use crate::dtype::DType;
32use crate::executor::ExecutionCtx;
33use crate::patches::Patches;
34use crate::serde::ArrayChildren;
35use crate::stats::StatsSetRef;
36use crate::validity::Validity;
37
38/// The array [`VTable`] encapsulates logic for an Array type within Vortex.
39///
40/// The logic is split across several "VTable" traits to enable easier code organization than
41/// simply lumping everything into a single trait.
42///
43/// From this [`VTable`] trait, we derive implementations for the sealed [`DynArray`] and [`DynVTable`]
44/// traits.
45///
46/// The functions defined in these vtable traits will typically document their pre- and
47/// post-conditions. The pre-conditions are validated inside the [`DynArray`] and [`DynVTable`]
48/// implementations so do not need to be checked in the vtable implementations (for example, index
49/// out of bounds). Post-conditions are validated after invocation of the vtable function and will
50/// panic if violated.
51pub trait VTable: 'static + Sized + Send + Sync + Debug {
52 type Array: 'static + Send + Sync + Clone + Debug + Deref<Target = dyn DynArray> + IntoArray;
53 type Metadata: Debug;
54
55 type OperationsVTable: OperationsVTable<Self>;
56 type ValidityVTable: ValidityVTable<Self>;
57
58 /// Returns the ID of the array.
59 fn id(array: &Self::Array) -> ArrayId;
60
61 /// Returns the length of the array.
62 fn len(array: &Self::Array) -> usize;
63
64 /// Returns the DType of the array.
65 fn dtype(array: &Self::Array) -> &DType;
66
67 /// Returns the stats set for the array.
68 fn stats(array: &Self::Array) -> StatsSetRef<'_>;
69
70 /// Hashes the array contents.
71 fn array_hash<H: Hasher>(array: &Self::Array, state: &mut H, precision: Precision);
72
73 /// Compares two arrays of the same type for equality.
74 fn array_eq(array: &Self::Array, other: &Self::Array, precision: Precision) -> bool;
75
76 /// Returns the number of buffers in the array.
77 fn nbuffers(array: &Self::Array) -> usize;
78
79 /// Returns the buffer at the given index.
80 ///
81 /// # Panics
82 /// Panics if `idx >= nbuffers(array)`.
83 fn buffer(array: &Self::Array, idx: usize) -> BufferHandle;
84
85 /// Returns the name of the buffer at the given index, or `None` if unnamed.
86 fn buffer_name(array: &Self::Array, idx: usize) -> Option<String>;
87
88 /// Returns the number of children in the array.
89 fn nchildren(array: &Self::Array) -> usize;
90
91 /// Returns the child at the given index.
92 ///
93 /// # Panics
94 /// Panics if `idx >= nchildren(array)`.
95 fn child(array: &Self::Array, idx: usize) -> ArrayRef;
96
97 /// Returns the name of the child at the given index.
98 ///
99 /// # Panics
100 /// Panics if `idx >= nchildren(array)`.
101 fn child_name(array: &Self::Array, idx: usize) -> String;
102
103 /// Exports metadata for an array.
104 ///
105 /// * If the array does not contain metadata, it should return
106 /// [`crate::metadata::EmptyMetadata`].
107 fn metadata(array: &Self::Array) -> VortexResult<Self::Metadata>;
108
109 /// Serialize metadata into a byte buffer for IPC or file storage.
110 /// Return `None` if the array cannot be serialized.
111 fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>>;
112
113 /// Deserialize array metadata from a byte buffer.
114 ///
115 /// To reduce the serialized form, arrays do not store their own DType and length. Instead,
116 /// this is passed down from the parent array during deserialization. These properties are
117 /// exposed here for use during deserialization.
118 fn deserialize(
119 bytes: &[u8],
120 _dtype: &DType,
121 _len: usize,
122 _buffers: &[BufferHandle],
123 _session: &VortexSession,
124 ) -> VortexResult<Self::Metadata>;
125
126 /// Writes the array into a canonical builder.
127 ///
128 /// ## Post-conditions
129 /// - The length of the builder is incremented by the length of the input array.
130 fn append_to_builder(
131 array: &Self::Array,
132 builder: &mut dyn ArrayBuilder,
133 ctx: &mut ExecutionCtx,
134 ) -> VortexResult<()> {
135 let canonical = array.to_array().execute::<Canonical>(ctx)?.into_array();
136 builder.extend_from_array(&canonical);
137 Ok(())
138 }
139
140 /// Build an array from components.
141 ///
142 /// This is called on the file and IPC deserialization pathways, to reconstruct the array from
143 /// type-erased components.
144 ///
145 /// Encoding implementers should take note that all validation necessary to ensure the encoding
146 /// is safe to read should happen inside of this method.
147 ///
148 /// # Safety and correctness
149 ///
150 /// This method should *never* panic, it must always return an error or else it returns a
151 /// valid `Array` that meets all the encoding's preconditions.
152 ///
153 /// For example, the `build` implementation for a dictionary encoding should ensure that all
154 /// codes lie in the valid range. For a UTF-8 array, it should check the bytes to ensure they
155 /// are all valid string data bytes. Any corrupt files or malformed data buffers should be
156 /// caught here, before returning the deserialized array.
157 ///
158 /// # Validation
159 ///
160 /// Validation is mainly meant to ensure that all internal pointers in the encoding reference
161 /// valid ranges of data, and that all data conforms to its DType constraints. These ensure
162 /// that no array operations will panic at runtime, or yield undefined behavior when unsafe
163 /// operations like `get_unchecked` use indices in the array buffer.
164 ///
165 /// Examples of the kinds of validation that should be part of the `build` step:
166 ///
167 /// * Checking that any offsets buffers point to valid offsets in some other child array
168 /// * Checking that any buffers for data or validity have the appropriate size for the
169 /// encoding
170 /// * Running UTF-8 validation for any buffers that are expected to hold flat UTF-8 data
171 // TODO(ngates): take the parts by ownership, since most arrays need them anyway
172 fn build(
173 dtype: &DType,
174 len: usize,
175 metadata: &Self::Metadata,
176 buffers: &[BufferHandle],
177 children: &dyn ArrayChildren,
178 ) -> VortexResult<Self::Array>;
179
180 /// Replaces the children in `array` with `children`. The count must be the same and types
181 /// of children must be expected.
182 fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()>;
183
184 /// Execute this array by returning an [`ExecutionStep`] that tells the scheduler what to
185 /// do next.
186 ///
187 /// Instead of recursively executing children, implementations should return
188 /// [`ExecutionStep::ExecuteChild(i)`] to request that the scheduler execute a child first,
189 /// or [`ExecutionStep::Done(result)`] when the
190 /// encoding can produce a result directly.
191 ///
192 /// Array execution is designed such that repeated execution of an array will eventually
193 /// converge to a canonical representation. Implementations of this function should therefore
194 /// ensure they make progress towards that goal.
195 ///
196 /// The returned array (in `Done`) must be logically equivalent to the input array. In other
197 /// words, the recursively canonicalized forms of both arrays must be equal.
198 ///
199 /// Debug builds will panic if the returned array is of the wrong type, wrong length, or
200 /// incorrectly contains null values.
201 fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionStep>;
202
203 /// Attempt to execute the parent of this array.
204 ///
205 /// This function allows arrays to plug in specialized execution logic for their parent. For
206 /// example, strings compressed as FSST arrays can implement a custom equality comparison when
207 /// the comparing against a scalar string.
208 ///
209 /// Returns `Ok(None)` if no specialized execution is possible.
210 fn execute_parent(
211 array: &Self::Array,
212 parent: &ArrayRef,
213 child_idx: usize,
214 ctx: &mut ExecutionCtx,
215 ) -> VortexResult<Option<ArrayRef>> {
216 _ = (array, parent, child_idx, ctx);
217 Ok(None)
218 }
219
220 /// Attempt to reduce the array to a more simple representation.
221 ///
222 /// Returns `Ok(None)` if no reduction is possible.
223 fn reduce(array: &Self::Array) -> VortexResult<Option<ArrayRef>> {
224 _ = array;
225 Ok(None)
226 }
227
228 /// Attempt to perform a reduction of the parent of this array.
229 ///
230 /// This function allows arrays to plug in reduction rules to their parents, for example
231 /// run-end arrays can pull-down scalar functions and apply them only over their values.
232 ///
233 /// Returns `Ok(None)` if no reduction is possible.
234 fn reduce_parent(
235 array: &Self::Array,
236 parent: &ArrayRef,
237 child_idx: usize,
238 ) -> VortexResult<Option<ArrayRef>> {
239 _ = (array, parent, child_idx);
240 Ok(None)
241 }
242}
243
244/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.
245pub struct NotSupported;
246
247/// Returns the validity as a child array if it produces one.
248///
249/// - `NonNullable` and `AllValid` produce no child (returns `None`)
250/// - `AllInvalid` produces a `ConstantArray` of `false` values
251/// - `Array` returns the validity array
252#[inline]
253pub fn validity_to_child(validity: &Validity, len: usize) -> Option<ArrayRef> {
254 match validity {
255 Validity::NonNullable | Validity::AllValid => None,
256 Validity::AllInvalid => Some(ConstantArray::new(false, len).into_array()),
257 Validity::Array(array) => Some(array.clone()),
258 }
259}
260
261/// Returns 1 if validity produces a child, 0 otherwise.
262#[inline]
263pub fn validity_nchildren(validity: &Validity) -> usize {
264 match validity {
265 Validity::NonNullable | Validity::AllValid => 0,
266 Validity::AllInvalid | Validity::Array(_) => 1,
267 }
268}
269
270/// Returns the number of children produced by patches.
271#[inline]
272pub fn patches_nchildren(patches: &Patches) -> usize {
273 2 + patches.chunk_offsets().is_some() as usize
274}
275
276/// Returns the child at the given index within a patches component.
277///
278/// Index 0 = patch_indices, 1 = patch_values, 2 = patch_chunk_offsets (if present).
279#[inline]
280pub fn patches_child(patches: &Patches, idx: usize) -> ArrayRef {
281 match idx {
282 0 => patches.indices().clone(),
283 1 => patches.values().clone(),
284 2 => patches
285 .chunk_offsets()
286 .as_ref()
287 .vortex_expect("patch_chunk_offsets child out of bounds")
288 .clone(),
289 _ => vortex_panic!("patches child index {idx} out of bounds"),
290 }
291}
292
293/// Returns the name of the child at the given index within a patches component.
294#[inline]
295pub fn patches_child_name(idx: usize) -> &'static str {
296 match idx {
297 0 => "patch_indices",
298 1 => "patch_values",
299 2 => "patch_chunk_offsets",
300 _ => vortex_panic!("patches child name index {idx} out of bounds"),
301 }
302}
303
304#[macro_export]
305macro_rules! vtable {
306 ($V:ident) => {
307 $crate::aliases::paste::paste! {
308 impl AsRef<dyn $crate::DynArray> for [<$V Array>] {
309 fn as_ref(&self) -> &dyn $crate::DynArray {
310 // We can unsafe cast ourselves to an ArrayAdapter.
311 unsafe { &*(self as *const [<$V Array>] as *const $crate::ArrayAdapter<[<$V VTable>]>) }
312 }
313 }
314
315 impl std::ops::Deref for [<$V Array>] {
316 type Target = dyn $crate::DynArray;
317
318 fn deref(&self) -> &Self::Target {
319 // We can unsafe cast ourselves to an ArrayAdapter.
320 unsafe { &*(self as *const [<$V Array>] as *const $crate::ArrayAdapter<[<$V VTable>]>) }
321 }
322 }
323
324 impl $crate::IntoArray for [<$V Array>] {
325 fn into_array(self) -> $crate::ArrayRef {
326 // We can unsafe transmute ourselves to an ArrayAdapter.
327 std::sync::Arc::new(unsafe { std::mem::transmute::<[<$V Array>], $crate::ArrayAdapter::<[<$V VTable>]>>(self) })
328 }
329 }
330
331 impl From<[<$V Array>]> for $crate::ArrayRef {
332 fn from(value: [<$V Array>]) -> $crate::ArrayRef {
333 use $crate::IntoArray;
334 value.into_array()
335 }
336 }
337
338 impl [<$V Array>] {
339 #[deprecated(note = "use `.into_array()` (owned) or `.clone().into_array()` (ref) to make clones explicit")]
340 pub fn to_array(&self) -> $crate::ArrayRef {
341 use $crate::IntoArray;
342 self.clone().into_array()
343 }
344 }
345 }
346 };
347}