Skip to main content

vortex_array/array/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module contains the VTable definitions for a Vortex encoding.
5
6mod operations;
7mod validity;
8
9use std::fmt::Debug;
10use std::fmt::Display;
11use std::fmt::Formatter;
12use std::hash::Hasher;
13
14pub use operations::*;
15pub use validity::*;
16use vortex_error::VortexExpect;
17use vortex_error::VortexResult;
18use vortex_error::vortex_panic;
19use vortex_session::VortexSession;
20
21use crate::Array;
22use crate::ArrayRef;
23use crate::ArrayView;
24use crate::Canonical;
25use crate::ExecutionResult;
26use crate::IntoArray;
27use crate::Precision;
28pub use crate::array::plugin::*;
29use crate::arrays::ConstantArray;
30use crate::arrays::constant::Constant;
31use crate::buffer::BufferHandle;
32use crate::builders::ArrayBuilder;
33use crate::dtype::DType;
34use crate::dtype::Nullability;
35use crate::executor::ExecutionCtx;
36use crate::hash::ArrayEq;
37use crate::hash::ArrayHash;
38use crate::patches::Patches;
39use crate::scalar::ScalarValue;
40use crate::serde::ArrayChildren;
41use crate::validity::Validity;
42
43/// The array [`VTable`] encapsulates logic for an Array type within Vortex.
44///
45/// The logic is split across several "VTable" traits to enable easier code organization than
46/// simply lumping everything into a single trait.
47///
48/// From this [`VTable`] trait, we derive implementations for the sealed `DynArray` trait and the
49/// public [`ArrayPlugin`] registry trait.
50///
51/// The functions defined in these vtable traits will typically document their pre- and
52/// post-conditions. The pre-conditions are validated inside the `DynArray` and [`ArrayRef`]
53/// implementations so do not need to be checked in the vtable implementations (for example, index
54/// out of bounds). Post-conditions are validated after invocation of the vtable function and will
55/// panic if violated.
56pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug {
57    type ArrayData: 'static + Send + Sync + Clone + Debug + Display + ArrayHash + ArrayEq;
58
59    type OperationsVTable: OperationsVTable<Self>;
60    type ValidityVTable: ValidityVTable<Self>;
61
62    /// Returns the ID of the array.
63    fn id(&self) -> ArrayId;
64
65    /// Validates that externally supplied logical metadata matches the array data.
66    fn validate(
67        &self,
68        data: &Self::ArrayData,
69        dtype: &DType,
70        len: usize,
71        slots: &[Option<ArrayRef>],
72    ) -> VortexResult<()>;
73
74    /// Returns the number of buffers in the array.
75    fn nbuffers(array: ArrayView<'_, Self>) -> usize;
76
77    /// Returns the buffer at the given index.
78    ///
79    /// # Panics
80    /// Panics if `idx >= nbuffers(array)`.
81    fn buffer(array: ArrayView<'_, Self>, idx: usize) -> BufferHandle;
82
83    /// Returns the name of the buffer at the given index, or `None` if unnamed.
84    fn buffer_name(array: ArrayView<'_, Self>, idx: usize) -> Option<String>;
85
86    /// Returns the number of children in the array.
87    ///
88    /// The default counts non-None slots.
89    fn nchildren(array: ArrayView<'_, Self>) -> usize {
90        array.slots().iter().filter(|s| s.is_some()).count()
91    }
92
93    /// Returns the child at the given index.
94    ///
95    /// The default returns the `idx`-th non-None slot.
96    ///
97    /// # Panics
98    /// Panics if `idx >= nchildren(array)`.
99    fn child(array: ArrayView<'_, Self>, idx: usize) -> ArrayRef {
100        array
101            .slots()
102            .iter()
103            .filter_map(|s| s.clone())
104            .nth(idx)
105            .vortex_expect("child index out of bounds")
106    }
107
108    /// Returns the name of the child at the given index.
109    ///
110    /// The default returns the slot name of the `idx`-th non-None slot.
111    ///
112    /// # Panics
113    /// Panics if `idx >= nchildren(array)`.
114    fn child_name(array: ArrayView<'_, Self>, idx: usize) -> String {
115        array
116            .slots()
117            .iter()
118            .enumerate()
119            .filter(|(_, s)| s.is_some())
120            .nth(idx)
121            .map(|(slot_idx, _)| Self::slot_name(array, slot_idx))
122            .vortex_expect("child_name index out of bounds")
123    }
124
125    /// Serialize metadata into a byte buffer for IPC or file storage.
126    /// Return `None` if the array cannot be serialized.
127    fn serialize(
128        array: ArrayView<'_, Self>,
129        session: &VortexSession,
130    ) -> VortexResult<Option<Vec<u8>>>;
131
132    /// Deserialize an array from serialized components.
133    fn deserialize(
134        &self,
135        dtype: &DType,
136        len: usize,
137        metadata: &[u8],
138        buffers: &[BufferHandle],
139        children: &dyn ArrayChildren,
140        session: &VortexSession,
141    ) -> VortexResult<crate::array::ArrayParts<Self>>;
142
143    /// Writes the array into a canonical builder.
144    fn append_to_builder(
145        array: ArrayView<'_, Self>,
146        builder: &mut dyn ArrayBuilder,
147        ctx: &mut ExecutionCtx,
148    ) -> VortexResult<()> {
149        let canonical = array
150            .array()
151            .clone()
152            .execute::<Canonical>(ctx)?
153            .into_array();
154        builder.extend_from_array(&canonical);
155        Ok(())
156    }
157
158    /// Returns the name of the slot at the given index.
159    ///
160    /// # Panics
161    /// Panics if `idx >= slots(array).len()`.
162    fn slot_name(array: ArrayView<'_, Self>, idx: usize) -> String;
163
164    /// Execute this array by returning an [`ExecutionResult`].
165    ///
166    /// Execution is **iterative**, not recursive. Instead of recursively executing children,
167    /// implementations should return [`ExecutionResult::execute_slot`] to request that the
168    /// scheduler execute a slot first, or [`ExecutionResult::done`] when the encoding can
169    /// produce a result directly.
170    ///
171    /// For good examples of this pattern, see:
172    /// - [`Dict::execute`](crate::arrays::dict::vtable::Dict::execute) — demonstrates
173    ///   requiring children via `require_child!` and producing a result once they are canonical.
174    /// - `BitPacked::execute` (in `vortex-fastlanes`) — demonstrates requiring patches and
175    ///   validity via `require_patches!`/`require_validity!`.
176    ///
177    /// Array execution is designed such that repeated execution of an array will eventually
178    /// converge to a canonical representation. Implementations of this function should therefore
179    /// ensure they make progress towards that goal.
180    ///
181    /// The returned array (in `Done`) must be logically equivalent to the input array. In other
182    /// words, the recursively canonicalized forms of both arrays must be equal.
183    ///
184    /// Debug builds will panic if the returned array is of the wrong type, wrong length, or
185    /// incorrectly contains null values.
186    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult>;
187
188    /// Attempt to execute the parent of this array.
189    fn execute_parent(
190        array: ArrayView<'_, Self>,
191        parent: &ArrayRef,
192        child_idx: usize,
193        ctx: &mut ExecutionCtx,
194    ) -> VortexResult<Option<ArrayRef>> {
195        _ = (array, parent, child_idx, ctx);
196        Ok(None)
197    }
198
199    /// Attempt to reduce the array to a simpler representation.
200    fn reduce(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
201        _ = array;
202        Ok(None)
203    }
204
205    /// Attempt to perform a reduction of the parent of this array.
206    fn reduce_parent(
207        array: ArrayView<'_, Self>,
208        parent: &ArrayRef,
209        child_idx: usize,
210    ) -> VortexResult<Option<ArrayRef>> {
211        _ = (array, parent, child_idx);
212        Ok(None)
213    }
214}
215
216/// Alias for migration — downstream code can start using `ArrayVTable`.
217pub use VTable as ArrayVTable;
218
219use crate::array::ArrayId;
220
221/// Empty array metadata struct for encodings with no per-array metadata.
222#[derive(Clone, Debug, Default)]
223pub struct EmptyArrayData;
224
225impl ArrayEq for EmptyArrayData {
226    fn array_eq(&self, _other: &Self, _precision: Precision) -> bool {
227        true
228    }
229}
230impl ArrayHash for EmptyArrayData {
231    fn array_hash<H: Hasher>(&self, _state: &mut H, _precision: Precision) {}
232}
233
234impl Display for EmptyArrayData {
235    fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result {
236        Ok(())
237    }
238}
239
240/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.
241pub struct NotSupported;
242
243/// Returns the validity as a child array if it produces one.
244#[inline]
245pub fn validity_to_child(validity: &Validity, len: usize) -> Option<ArrayRef> {
246    match validity {
247        Validity::NonNullable | Validity::AllValid => None,
248        Validity::AllInvalid => Some(ConstantArray::new(false, len).into_array()),
249        Validity::Array(array) => Some(array.clone()),
250    }
251}
252
253/// Reconstruct a [`Validity`] from an optional child array and nullability.
254///
255/// This is the inverse of [`validity_to_child`].
256#[inline]
257pub fn child_to_validity(child: &Option<ArrayRef>, nullability: Nullability) -> Validity {
258    match child {
259        Some(arr) => {
260            // Detect constant bool arrays created by validity_to_child.
261            // Use direct ScalarValue matching to avoid expensive scalar conversion.
262            if let Some(c) = arr.as_opt::<Constant>()
263                && let Some(ScalarValue::Bool(val)) = c.scalar().value()
264            {
265                return if *val {
266                    Validity::AllValid
267                } else {
268                    Validity::AllInvalid
269                };
270            }
271            Validity::Array(arr.clone())
272        }
273        None => Validity::from(nullability),
274    }
275}
276
277/// Returns 1 if validity produces a child, 0 otherwise.
278#[inline]
279pub fn validity_nchildren(validity: &Validity) -> usize {
280    match validity {
281        Validity::NonNullable | Validity::AllValid => 0,
282        Validity::AllInvalid | Validity::Array(_) => 1,
283    }
284}
285
286/// Returns the number of children produced by patches.
287#[inline]
288pub fn patches_nchildren(patches: &Patches) -> usize {
289    2 + patches.chunk_offsets().is_some() as usize
290}
291
292/// Returns the child at the given index within a patches component.
293#[inline]
294pub fn patches_child(patches: &Patches, idx: usize) -> ArrayRef {
295    match idx {
296        0 => patches.indices().clone(),
297        1 => patches.values().clone(),
298        2 => patches
299            .chunk_offsets()
300            .as_ref()
301            .vortex_expect("patch_chunk_offsets child out of bounds")
302            .clone(),
303        _ => vortex_panic!("patches child index {idx} out of bounds"),
304    }
305}
306
307/// Returns the name of the child at the given index within a patches component.
308#[inline]
309pub fn patches_child_name(idx: usize) -> &'static str {
310    match idx {
311        0 => "patch_indices",
312        1 => "patch_values",
313        2 => "patch_chunk_offsets",
314        _ => vortex_panic!("patches child name index {idx} out of bounds"),
315    }
316}