Skip to main content

vortex_array/array/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module contains the VTable definitions for a Vortex encoding.
5//!
6//! A Vortex array encoding is implemented by a small static vtable type plus an associated
7//! `TypedArrayData` value stored in each array instance. The vtable owns behavior such as
8//! validation, serialization, execution, child traversal, scalar access, and validity access.
9//!
10//! The public [`ArrayRef`] API performs common precondition checks before calling
11//! into these traits. Implementations should focus on encoding-specific work and uphold the
12//! documented postconditions.
13
14mod operations;
15mod validity;
16
17use std::fmt::Debug;
18use std::fmt::Display;
19use std::fmt::Formatter;
20use std::hash::Hasher;
21
22pub use operations::*;
23pub use validity::*;
24use vortex_error::VortexExpect;
25use vortex_error::VortexResult;
26use vortex_error::vortex_bail;
27use vortex_error::vortex_ensure;
28use vortex_error::vortex_panic;
29use vortex_session::VortexSession;
30
31use crate::Array;
32use crate::ArrayRef;
33use crate::ArrayView;
34use crate::Canonical;
35use crate::EqMode;
36use crate::ExecutionResult;
37use crate::IntoArray;
38pub use crate::array::plugin::*;
39use crate::arrays::ConstantArray;
40use crate::arrays::constant::Constant;
41use crate::buffer::BufferHandle;
42use crate::builders::ArrayBuilder;
43use crate::dtype::DType;
44use crate::dtype::Nullability;
45use crate::executor::ExecutionCtx;
46use crate::hash::ArrayEq;
47use crate::hash::ArrayHash;
48use crate::patches::Patches;
49use crate::scalar::ScalarValue;
50use crate::serde::ArrayChildren;
51use crate::validity::Validity;
52
53/// The array [`VTable`] encapsulates logic for an Array type within Vortex.
54///
55/// The logic is split across several "VTable" traits to enable easier code organization than
56/// simply lumping everything into a single trait.
57///
58/// From this [`VTable`] trait, we derive implementations for the sealed `DynArrayData` trait and the
59/// public [`ArrayPlugin`] registry trait.
60///
61/// The functions defined in these vtable traits will typically document their pre- and
62/// post-conditions. The pre-conditions are validated inside the `DynArrayData` and [`ArrayRef`]
63/// implementations so do not need to be checked in the vtable implementations (for example, index
64/// out of bounds). Post-conditions are validated after invocation of the vtable function and will
65/// panic if violated.
66pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug {
67    /// Per-array data owned by this encoding, excluding child arrays.
68    ///
69    /// Child arrays belong in [`ArrayParts::slots`](crate::ArrayParts::slots) so traversal,
70    /// serialization, and layout writers can discover them generically.
71    type TypedArrayData: 'static + Send + Sync + Clone + Debug + Display + ArrayHash + ArrayEq;
72
73    /// Scalar and element-wise operation hooks for this encoding.
74    type OperationsVTable: OperationsVTable<Self>;
75    /// Validity hook for nullable instances of this encoding.
76    type ValidityVTable: ValidityVTable<Self>;
77
78    /// Returns the ID of the array.
79    fn id(&self) -> ArrayId;
80
81    /// Validates that externally supplied logical metadata matches the array data.
82    ///
83    /// This is called by [`Array::try_from_parts`](crate::Array::try_from_parts) before the array
84    /// is published. Implementations should check dtype, length, slot count, child dtypes/lengths,
85    /// metadata bounds, and any buffer shape invariants that unsafe accessors depend on.
86    fn validate(
87        &self,
88        data: &Self::TypedArrayData,
89        dtype: &DType,
90        len: usize,
91        slots: &[Option<ArrayRef>],
92    ) -> VortexResult<()>;
93
94    /// Returns the number of top-level buffers in the array.
95    fn nbuffers(array: ArrayView<'_, Self>) -> usize;
96
97    /// Returns the buffer at the given index.
98    ///
99    /// # Panics
100    /// Panics if `idx >= nbuffers(array)`.
101    fn buffer(array: ArrayView<'_, Self>, idx: usize) -> BufferHandle;
102
103    /// Returns the name of the buffer at the given index, or `None` if unnamed.
104    fn buffer_name(array: ArrayView<'_, Self>, idx: usize) -> Option<String>;
105
106    /// Rebuild this array with replacement top-level buffers.
107    ///
108    /// This is for physical rewrites that preserve `dtype`, `len`, child slots, buffer count, and
109    /// buffer lengths. The caller checks the generic invariants before dispatching here;
110    /// implementations should interpret the replacement buffers for their encoding-specific
111    /// in-memory representation.
112    fn with_buffers(
113        &self,
114        array: ArrayView<'_, Self>,
115        buffers: &[BufferHandle],
116    ) -> VortexResult<ArrayParts<Self>>;
117
118    /// Returns the number of children in the array.
119    ///
120    /// The default counts non-None slots.
121    fn nchildren(array: ArrayView<'_, Self>) -> usize {
122        array.slots().iter().filter(|s| s.is_some()).count()
123    }
124
125    /// Returns the child at the given index.
126    ///
127    /// The default returns the `idx`-th non-None slot.
128    ///
129    /// # Panics
130    /// Panics if `idx >= nchildren(array)`.
131    fn child(array: ArrayView<'_, Self>, idx: usize) -> ArrayRef {
132        array
133            .slots()
134            .iter()
135            .filter_map(|s| s.clone())
136            .nth(idx)
137            .vortex_expect("child index out of bounds")
138    }
139
140    /// Returns the name of the child at the given index.
141    ///
142    /// The default returns the slot name of the `idx`-th non-None slot.
143    ///
144    /// # Panics
145    /// Panics if `idx >= nchildren(array)`.
146    fn child_name(array: ArrayView<'_, Self>, idx: usize) -> String {
147        array
148            .slots()
149            .iter()
150            .enumerate()
151            .filter(|(_, s)| s.is_some())
152            .nth(idx)
153            .map(|(slot_idx, _)| Self::slot_name(array, slot_idx))
154            .vortex_expect("child_name index out of bounds")
155    }
156
157    /// Serialize encoding metadata into a byte buffer for IPC or file storage.
158    ///
159    /// Return `None` if the array cannot be serialized by this encoding. Buffers and children are
160    /// serialized separately through [`buffer`](Self::buffer), [`nbuffers`](Self::nbuffers), and
161    /// child traversal.
162    fn serialize(
163        array: ArrayView<'_, Self>,
164        session: &VortexSession,
165    ) -> VortexResult<Option<Vec<u8>>>;
166
167    /// Deserialize an array from serialized metadata, buffers, and children.
168    ///
169    /// The returned [`ArrayParts`] are still validated by the generic adapter.
170    /// Deserializers should use the provided `session` to resolve plugin-owned metadata instead of
171    /// relying on global state.
172    fn deserialize(
173        &self,
174        dtype: &DType,
175        len: usize,
176        metadata: &[u8],
177        buffers: &[BufferHandle],
178        children: &dyn ArrayChildren,
179        session: &VortexSession,
180    ) -> VortexResult<ArrayParts<Self>>;
181
182    /// Writes the array's logical values into a canonical builder.
183    ///
184    /// The default implementation executes the full array to [`Canonical`] and appends that result.
185    /// Encodings may override this to avoid materializing an intermediate canonical array.
186    fn append_to_builder(
187        array: ArrayView<'_, Self>,
188        builder: &mut dyn ArrayBuilder,
189        ctx: &mut ExecutionCtx,
190    ) -> VortexResult<()> {
191        let canonical = array
192            .array()
193            .clone()
194            .execute::<Canonical>(ctx)?
195            .into_array();
196        builder.extend_from_array(&canonical);
197        Ok(())
198    }
199
200    /// Returns the name of the slot at the given index.
201    ///
202    /// # Panics
203    /// Panics if `idx >= slots(array).len()`.
204    fn slot_name(array: ArrayView<'_, Self>, idx: usize) -> String;
205
206    /// Execute this array by returning an [`ExecutionResult`].
207    ///
208    /// Execution is **iterative**, not recursive. Instead of recursively executing children,
209    /// implementations should return [`ExecutionResult::execute_slot`] to request that the
210    /// scheduler execute a slot first, or [`ExecutionResult::done`] when the encoding can
211    /// produce a result directly.
212    ///
213    /// For good examples of this pattern, see:
214    /// - [`Dict::execute`](crate::arrays::dict::vtable::Dict::execute) — demonstrates
215    ///   requiring children via `require_child!` and producing a result once they are canonical.
216    /// - `BitPacked::execute` (in `vortex-fastlanes`) — demonstrates requiring patches and
217    ///   validity via `require_patches!`/`require_validity!`.
218    ///
219    /// Array execution is designed such that repeated execution of an array will eventually
220    /// converge to a canonical representation. Implementations of this function should therefore
221    /// ensure they make progress towards that goal.
222    ///
223    /// The returned array (in `Done`) must be logically equivalent to the input array. In other
224    /// words, the recursively canonicalized forms of both arrays must be equal.
225    ///
226    /// Debug builds will panic if the returned array is of the wrong type, wrong length, or
227    /// incorrectly contains null values.
228    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult>;
229
230    /// Attempt to reduce the array to a simpler representation without changing logical values.
231    ///
232    /// Reductions are opportunistic and may return `Ok(None)` when no cheaper representation is
233    /// known.
234    fn reduce(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
235        _ = array;
236        Ok(None)
237    }
238
239    /// Attempt to reduce `parent` after this array appears as one of its children.
240    ///
241    /// This is used by lazy arrays to let child execution unlock parent simplifications.
242    fn reduce_parent(
243        array: ArrayView<'_, Self>,
244        parent: &ArrayRef,
245        child_idx: usize,
246    ) -> VortexResult<Option<ArrayRef>> {
247        _ = (array, parent, child_idx);
248        Ok(None)
249    }
250}
251
252/// Alias for migration — downstream code can start using `ArrayVTable`.
253pub use VTable as ArrayVTable;
254
255use crate::array::ArrayId;
256use crate::array::ArrayParts;
257
258/// Empty array metadata struct for encodings with no per-array metadata.
259#[derive(Clone, Debug, Default)]
260pub struct EmptyArrayData;
261
262impl ArrayEq for EmptyArrayData {
263    fn array_eq(&self, _other: &Self, _accuracy: EqMode) -> bool {
264        true
265    }
266}
267impl ArrayHash for EmptyArrayData {
268    fn array_hash<H: Hasher>(&self, _state: &mut H, _accuracy: EqMode) {}
269}
270
271impl Display for EmptyArrayData {
272    fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result {
273        Ok(())
274    }
275}
276
277/// Rebuild an array that has no top-level buffers.
278#[inline]
279pub fn with_empty_buffers<V: VTable>(
280    vtable: &V,
281    array: ArrayView<'_, V>,
282    buffers: &[BufferHandle],
283) -> VortexResult<ArrayParts<V>> {
284    vortex_ensure!(
285        buffers.is_empty(),
286        "Array {} expects 0 buffers, got {}",
287        array.encoding_id(),
288        buffers.len()
289    );
290    Ok(ArrayParts::new(
291        vtable.clone(),
292        array.dtype().clone(),
293        array.len(),
294        array.data().clone(),
295    )
296    .with_slots(array.slots().iter().cloned().collect()))
297}
298
299/// Reject buffer replacement for encodings whose exposed buffers are not runtime backing buffers.
300#[inline]
301pub fn unsupported_buffer_replacement<V: VTable>(
302    array: ArrayView<'_, V>,
303    _buffers: &[BufferHandle],
304) -> VortexResult<ArrayParts<V>> {
305    vortex_bail!(
306        "Array {} does not support in-memory buffer replacement",
307        array.encoding_id()
308    )
309}
310
311/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.
312pub struct NotSupported;
313
314/// Returns the validity as a child array if it produces one.
315#[inline]
316pub fn validity_to_child(validity: &Validity, len: usize) -> Option<ArrayRef> {
317    match validity {
318        Validity::NonNullable | Validity::AllValid => None,
319        Validity::AllInvalid => Some(ConstantArray::new(false, len).into_array()),
320        Validity::Array(array) => Some(array.clone()),
321    }
322}
323
324/// Reconstruct a [`Validity`] from an optional child array and nullability.
325///
326/// This is the inverse of [`validity_to_child`].
327#[inline]
328pub fn child_to_validity(child: Option<&ArrayRef>, nullability: Nullability) -> Validity {
329    match child {
330        Some(arr) => {
331            // Detect constant bool arrays created by validity_to_child.
332            // Use direct ScalarValue matching to avoid expensive scalar conversion.
333            if let Some(c) = arr.as_opt::<Constant>()
334                && let Some(ScalarValue::Bool(val)) = c.scalar().value()
335            {
336                return if *val {
337                    Validity::AllValid
338                } else {
339                    Validity::AllInvalid
340                };
341            }
342            Validity::Array(arr.clone())
343        }
344        None => Validity::from(nullability),
345    }
346}
347
348/// Returns 1 if validity produces a child, 0 otherwise.
349#[inline]
350pub fn validity_nchildren(validity: &Validity) -> usize {
351    match validity {
352        Validity::NonNullable | Validity::AllValid => 0,
353        Validity::AllInvalid | Validity::Array(_) => 1,
354    }
355}
356
357/// Returns the number of children produced by patches.
358#[inline]
359pub fn patches_nchildren(patches: &Patches) -> usize {
360    2 + patches.chunk_offsets().is_some() as usize
361}
362
363/// Returns the child at the given index within a patches component.
364#[inline]
365pub fn patches_child(patches: &Patches, idx: usize) -> ArrayRef {
366    match idx {
367        0 => patches.indices().clone(),
368        1 => patches.values().clone(),
369        2 => patches
370            .chunk_offsets()
371            .as_ref()
372            .vortex_expect("patch_chunk_offsets child out of bounds")
373            .clone(),
374        _ => vortex_panic!("patches child index {idx} out of bounds"),
375    }
376}
377
378/// Returns the name of the child at the given index within a patches component.
379#[inline]
380pub fn patches_child_name(idx: usize) -> &'static str {
381    match idx {
382        0 => "patch_indices",
383        1 => "patch_values",
384        2 => "patch_chunk_offsets",
385        _ => vortex_panic!("patches child name index {idx} out of bounds"),
386    }
387}