vortex_array/array/vtable/mod.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module contains the VTable definitions for a Vortex encoding.
5//!
6//! A Vortex array encoding is implemented by a small static vtable type plus an associated
7//! `TypedArrayData` value stored in each array instance. The vtable owns behavior such as
8//! validation, serialization, execution, child traversal, scalar access, and validity access.
9//!
10//! The public [`ArrayRef`] API performs common precondition checks before calling
11//! into these traits. Implementations should focus on encoding-specific work and uphold the
12//! documented postconditions.
13
14mod operations;
15mod validity;
16
17use std::fmt::Debug;
18use std::fmt::Display;
19use std::fmt::Formatter;
20use std::hash::Hasher;
21
22pub use operations::*;
23pub use validity::*;
24use vortex_error::VortexExpect;
25use vortex_error::VortexResult;
26use vortex_error::vortex_bail;
27use vortex_error::vortex_ensure;
28use vortex_error::vortex_panic;
29use vortex_session::VortexSession;
30
31use crate::Array;
32use crate::ArrayRef;
33use crate::ArrayView;
34use crate::Canonical;
35use crate::EqMode;
36use crate::ExecutionResult;
37use crate::IntoArray;
38pub use crate::array::plugin::*;
39use crate::arrays::ConstantArray;
40use crate::arrays::constant::Constant;
41use crate::buffer::BufferHandle;
42use crate::builders::ArrayBuilder;
43use crate::dtype::DType;
44use crate::dtype::Nullability;
45use crate::executor::ExecutionCtx;
46use crate::hash::ArrayEq;
47use crate::hash::ArrayHash;
48use crate::patches::Patches;
49use crate::scalar::ScalarValue;
50use crate::serde::ArrayChildren;
51use crate::validity::Validity;
52
53/// The array [`VTable`] encapsulates logic for an Array type within Vortex.
54///
55/// The logic is split across several "VTable" traits to enable easier code organization than
56/// simply lumping everything into a single trait.
57///
58/// From this [`VTable`] trait, we derive implementations for the sealed `DynArrayData` trait and the
59/// public [`ArrayPlugin`] registry trait.
60///
61/// The functions defined in these vtable traits will typically document their pre- and
62/// post-conditions. The pre-conditions are validated inside the `DynArrayData` and [`ArrayRef`]
63/// implementations so do not need to be checked in the vtable implementations (for example, index
64/// out of bounds). Post-conditions are validated after invocation of the vtable function and will
65/// panic if violated.
66pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug {
67 /// Per-array data owned by this encoding, excluding child arrays.
68 ///
69 /// Child arrays belong in [`ArrayParts::slots`](crate::ArrayParts::slots) so traversal,
70 /// serialization, and layout writers can discover them generically.
71 type TypedArrayData: 'static + Send + Sync + Clone + Debug + Display + ArrayHash + ArrayEq;
72
73 /// Scalar and element-wise operation hooks for this encoding.
74 type OperationsVTable: OperationsVTable<Self>;
75 /// Validity hook for nullable instances of this encoding.
76 type ValidityVTable: ValidityVTable<Self>;
77
78 /// Returns the ID of the array.
79 fn id(&self) -> ArrayId;
80
81 /// Validates that externally supplied logical metadata matches the array data.
82 ///
83 /// This is called by [`Array::try_from_parts`](crate::Array::try_from_parts) before the array
84 /// is published. Implementations should check dtype, length, slot count, child dtypes/lengths,
85 /// metadata bounds, and any buffer shape invariants that unsafe accessors depend on.
86 fn validate(
87 &self,
88 data: &Self::TypedArrayData,
89 dtype: &DType,
90 len: usize,
91 slots: &[Option<ArrayRef>],
92 ) -> VortexResult<()>;
93
94 /// Returns the number of top-level buffers in the array.
95 fn nbuffers(array: ArrayView<'_, Self>) -> usize;
96
97 /// Returns the buffer at the given index.
98 ///
99 /// # Panics
100 /// Panics if `idx >= nbuffers(array)`.
101 fn buffer(array: ArrayView<'_, Self>, idx: usize) -> BufferHandle;
102
103 /// Returns the name of the buffer at the given index, or `None` if unnamed.
104 fn buffer_name(array: ArrayView<'_, Self>, idx: usize) -> Option<String>;
105
106 /// Rebuild this array with replacement top-level buffers.
107 ///
108 /// This is for physical rewrites that preserve `dtype`, `len`, child slots, buffer count, and
109 /// buffer lengths. The caller checks the generic invariants before dispatching here;
110 /// implementations should interpret the replacement buffers for their encoding-specific
111 /// in-memory representation.
112 fn with_buffers(
113 &self,
114 array: ArrayView<'_, Self>,
115 buffers: &[BufferHandle],
116 ) -> VortexResult<ArrayParts<Self>>;
117
118 /// Returns the number of children in the array.
119 ///
120 /// The default counts non-None slots.
121 fn nchildren(array: ArrayView<'_, Self>) -> usize {
122 array.slots().iter().filter(|s| s.is_some()).count()
123 }
124
125 /// Returns the child at the given index.
126 ///
127 /// The default returns the `idx`-th non-None slot.
128 ///
129 /// # Panics
130 /// Panics if `idx >= nchildren(array)`.
131 fn child(array: ArrayView<'_, Self>, idx: usize) -> ArrayRef {
132 array
133 .slots()
134 .iter()
135 .filter_map(|s| s.clone())
136 .nth(idx)
137 .vortex_expect("child index out of bounds")
138 }
139
140 /// Returns the name of the child at the given index.
141 ///
142 /// The default returns the slot name of the `idx`-th non-None slot.
143 ///
144 /// # Panics
145 /// Panics if `idx >= nchildren(array)`.
146 fn child_name(array: ArrayView<'_, Self>, idx: usize) -> String {
147 array
148 .slots()
149 .iter()
150 .enumerate()
151 .filter(|(_, s)| s.is_some())
152 .nth(idx)
153 .map(|(slot_idx, _)| Self::slot_name(array, slot_idx))
154 .vortex_expect("child_name index out of bounds")
155 }
156
157 /// Serialize encoding metadata into a byte buffer for IPC or file storage.
158 ///
159 /// Return `None` if the array cannot be serialized by this encoding. Buffers and children are
160 /// serialized separately through [`buffer`](Self::buffer), [`nbuffers`](Self::nbuffers), and
161 /// child traversal.
162 fn serialize(
163 array: ArrayView<'_, Self>,
164 session: &VortexSession,
165 ) -> VortexResult<Option<Vec<u8>>>;
166
167 /// Deserialize an array from serialized metadata, buffers, and children.
168 ///
169 /// The returned [`ArrayParts`] are still validated by the generic adapter.
170 /// Deserializers should use the provided `session` to resolve plugin-owned metadata instead of
171 /// relying on global state.
172 fn deserialize(
173 &self,
174 dtype: &DType,
175 len: usize,
176 metadata: &[u8],
177 buffers: &[BufferHandle],
178 children: &dyn ArrayChildren,
179 session: &VortexSession,
180 ) -> VortexResult<ArrayParts<Self>>;
181
182 /// Writes the array's logical values into a canonical builder.
183 ///
184 /// The default implementation executes the full array to [`Canonical`] and appends that result.
185 /// Encodings may override this to avoid materializing an intermediate canonical array.
186 fn append_to_builder(
187 array: ArrayView<'_, Self>,
188 builder: &mut dyn ArrayBuilder,
189 ctx: &mut ExecutionCtx,
190 ) -> VortexResult<()> {
191 let canonical = array
192 .array()
193 .clone()
194 .execute::<Canonical>(ctx)?
195 .into_array();
196 builder.extend_from_array(&canonical);
197 Ok(())
198 }
199
200 /// Returns the name of the slot at the given index.
201 ///
202 /// # Panics
203 /// Panics if `idx >= slots(array).len()`.
204 fn slot_name(array: ArrayView<'_, Self>, idx: usize) -> String;
205
206 /// Execute this array by returning an [`ExecutionResult`].
207 ///
208 /// Execution is **iterative**, not recursive. Instead of recursively executing children,
209 /// implementations should return [`ExecutionResult::execute_slot`] to request that the
210 /// scheduler execute a slot first, or [`ExecutionResult::done`] when the encoding can
211 /// produce a result directly.
212 ///
213 /// For good examples of this pattern, see:
214 /// - [`Dict::execute`](crate::arrays::dict::vtable::Dict::execute) — demonstrates
215 /// requiring children via `require_child!` and producing a result once they are canonical.
216 /// - `BitPacked::execute` (in `vortex-fastlanes`) — demonstrates requiring patches and
217 /// validity via `require_patches!`/`require_validity!`.
218 ///
219 /// Array execution is designed such that repeated execution of an array will eventually
220 /// converge to a canonical representation. Implementations of this function should therefore
221 /// ensure they make progress towards that goal.
222 ///
223 /// The returned array (in `Done`) must be logically equivalent to the input array. In other
224 /// words, the recursively canonicalized forms of both arrays must be equal.
225 ///
226 /// Debug builds will panic if the returned array is of the wrong type, wrong length, or
227 /// incorrectly contains null values.
228 fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult>;
229
230 /// Attempt to reduce the array to a simpler representation without changing logical values.
231 ///
232 /// Reductions are opportunistic and may return `Ok(None)` when no cheaper representation is
233 /// known.
234 fn reduce(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
235 _ = array;
236 Ok(None)
237 }
238
239 /// Attempt to reduce `parent` after this array appears as one of its children.
240 ///
241 /// This is used by lazy arrays to let child execution unlock parent simplifications.
242 fn reduce_parent(
243 array: ArrayView<'_, Self>,
244 parent: &ArrayRef,
245 child_idx: usize,
246 ) -> VortexResult<Option<ArrayRef>> {
247 _ = (array, parent, child_idx);
248 Ok(None)
249 }
250}
251
252/// Alias for migration — downstream code can start using `ArrayVTable`.
253pub use VTable as ArrayVTable;
254
255use crate::array::ArrayId;
256use crate::array::ArrayParts;
257
258/// Empty array metadata struct for encodings with no per-array metadata.
259#[derive(Clone, Debug, Default)]
260pub struct EmptyArrayData;
261
262impl ArrayEq for EmptyArrayData {
263 fn array_eq(&self, _other: &Self, _accuracy: EqMode) -> bool {
264 true
265 }
266}
267impl ArrayHash for EmptyArrayData {
268 fn array_hash<H: Hasher>(&self, _state: &mut H, _accuracy: EqMode) {}
269}
270
271impl Display for EmptyArrayData {
272 fn fmt(&self, _f: &mut Formatter<'_>) -> std::fmt::Result {
273 Ok(())
274 }
275}
276
277/// Rebuild an array that has no top-level buffers.
278#[inline]
279pub fn with_empty_buffers<V: VTable>(
280 vtable: &V,
281 array: ArrayView<'_, V>,
282 buffers: &[BufferHandle],
283) -> VortexResult<ArrayParts<V>> {
284 vortex_ensure!(
285 buffers.is_empty(),
286 "Array {} expects 0 buffers, got {}",
287 array.encoding_id(),
288 buffers.len()
289 );
290 Ok(ArrayParts::new(
291 vtable.clone(),
292 array.dtype().clone(),
293 array.len(),
294 array.data().clone(),
295 )
296 .with_slots(array.slots().iter().cloned().collect()))
297}
298
299/// Reject buffer replacement for encodings whose exposed buffers are not runtime backing buffers.
300#[inline]
301pub fn unsupported_buffer_replacement<V: VTable>(
302 array: ArrayView<'_, V>,
303 _buffers: &[BufferHandle],
304) -> VortexResult<ArrayParts<V>> {
305 vortex_bail!(
306 "Array {} does not support in-memory buffer replacement",
307 array.encoding_id()
308 )
309}
310
311/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.
312pub struct NotSupported;
313
314/// Returns the validity as a child array if it produces one.
315#[inline]
316pub fn validity_to_child(validity: &Validity, len: usize) -> Option<ArrayRef> {
317 match validity {
318 Validity::NonNullable | Validity::AllValid => None,
319 Validity::AllInvalid => Some(ConstantArray::new(false, len).into_array()),
320 Validity::Array(array) => Some(array.clone()),
321 }
322}
323
324/// Reconstruct a [`Validity`] from an optional child array and nullability.
325///
326/// This is the inverse of [`validity_to_child`].
327#[inline]
328pub fn child_to_validity(child: Option<&ArrayRef>, nullability: Nullability) -> Validity {
329 match child {
330 Some(arr) => {
331 // Detect constant bool arrays created by validity_to_child.
332 // Use direct ScalarValue matching to avoid expensive scalar conversion.
333 if let Some(c) = arr.as_opt::<Constant>()
334 && let Some(ScalarValue::Bool(val)) = c.scalar().value()
335 {
336 return if *val {
337 Validity::AllValid
338 } else {
339 Validity::AllInvalid
340 };
341 }
342 Validity::Array(arr.clone())
343 }
344 None => Validity::from(nullability),
345 }
346}
347
348/// Returns 1 if validity produces a child, 0 otherwise.
349#[inline]
350pub fn validity_nchildren(validity: &Validity) -> usize {
351 match validity {
352 Validity::NonNullable | Validity::AllValid => 0,
353 Validity::AllInvalid | Validity::Array(_) => 1,
354 }
355}
356
357/// Returns the number of children produced by patches.
358#[inline]
359pub fn patches_nchildren(patches: &Patches) -> usize {
360 2 + patches.chunk_offsets().is_some() as usize
361}
362
363/// Returns the child at the given index within a patches component.
364#[inline]
365pub fn patches_child(patches: &Patches, idx: usize) -> ArrayRef {
366 match idx {
367 0 => patches.indices().clone(),
368 1 => patches.values().clone(),
369 2 => patches
370 .chunk_offsets()
371 .as_ref()
372 .vortex_expect("patch_chunk_offsets child out of bounds")
373 .clone(),
374 _ => vortex_panic!("patches child index {idx} out of bounds"),
375 }
376}
377
378/// Returns the name of the child at the given index within a patches component.
379#[inline]
380pub fn patches_child_name(idx: usize) -> &'static str {
381 match idx {
382 0 => "patch_indices",
383 1 => "patch_values",
384 2 => "patch_chunk_offsets",
385 _ => vortex_panic!("patches child name index {idx} out of bounds"),
386 }
387}