Skip to main content

vortex_array/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Debug;
6use std::fmt::Formatter;
7use std::hash::Hasher;
8use std::sync::Arc;
9
10use vortex_buffer::ByteBuffer;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_error::vortex_err;
15use vortex_error::vortex_panic;
16use vortex_session::registry::Id;
17
18use crate::ExecutionCtx;
19use crate::buffer::BufferHandle;
20use crate::builders::ArrayBuilder;
21use crate::dtype::DType;
22use crate::dtype::Nullability;
23use crate::executor::ExecutionResult;
24use crate::executor::ExecutionStep;
25use crate::scalar::Scalar;
26use crate::validity::Validity;
27
28mod erased;
29pub use erased::*;
30
31mod plugin;
32pub use plugin::*;
33
34mod foreign;
35pub(crate) use foreign::*;
36
37mod typed;
38pub use typed::*;
39
40pub mod vtable;
41pub use vtable::*;
42
43mod view;
44use smallvec::SmallVec;
45pub use view::*;
46
47use crate::hash::ArrayEq;
48use crate::hash::ArrayHash;
49
50/// The slots of an array: a collection of optional child arrays.
51///
52/// Most encodings have 4 or fewer slots, so we use a `SmallVec` to avoid
53/// heap allocation in the common case.
54pub type ArraySlots = SmallVec<[Option<ArrayRef>; 4]>;
55
56/// The public API trait for all Vortex arrays.
57///
58/// This trait is sealed and cannot be implemented outside of `vortex-array`.
59/// Use [`ArrayRef`] as the primary handle for working with arrays.
60#[doc(hidden)]
61pub(crate) trait DynArrayData: 'static + private::Sealed + Send + Sync + Debug {
62    /// Returns the array as a reference to a generic [`Any`] trait object.
63    fn as_any(&self) -> &dyn Any;
64
65    /// Returns the array as a mutable reference to a generic [`Any`] trait object.
66    fn as_any_mut(&mut self) -> &mut dyn Any;
67
68    /// Returns the [`Validity`] of the array.
69    fn validity(&self, this: &ArrayRef) -> VortexResult<Validity>;
70
71    /// Writes the array into the canonical builder.
72    ///
73    /// The [`DType`] of the builder must match that of the array.
74    fn append_to_builder(
75        &self,
76        this: &ArrayRef,
77        builder: &mut dyn ArrayBuilder,
78        ctx: &mut ExecutionCtx,
79    ) -> VortexResult<()>;
80
81    // --- Visitor methods (formerly in ArrayVisitor) ---
82
83    /// Returns the children of the array.
84    fn children(&self, this: &ArrayRef) -> Vec<ArrayRef>;
85
86    /// Returns the number of children of the array.
87    fn nchildren(&self, this: &ArrayRef) -> usize;
88
89    /// Returns the nth child of the array without allocating a Vec.
90    ///
91    /// Returns `None` if the index is out of bounds.
92    fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option<ArrayRef>;
93
94    /// Returns the names of the children of the array.
95    fn children_names(&self, this: &ArrayRef) -> Vec<String>;
96
97    /// Returns the array's children with their names.
98    fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)>;
99
100    /// Returns the buffers of the array.
101    fn buffers(&self, this: &ArrayRef) -> Vec<ByteBuffer>;
102
103    /// Returns the buffer handles of the array.
104    fn buffer_handles(&self, this: &ArrayRef) -> Vec<BufferHandle>;
105
106    /// Returns the names of the buffers of the array.
107    fn buffer_names(&self, this: &ArrayRef) -> Vec<String>;
108
109    /// Returns the array's buffers with their names.
110    fn named_buffers(&self, this: &ArrayRef) -> Vec<(String, BufferHandle)>;
111
112    /// Returns the number of buffers of the array.
113    fn nbuffers(&self, this: &ArrayRef) -> usize;
114
115    /// Returns the name of the slot at the given index.
116    fn slot_name(&self, this: &ArrayRef, idx: usize) -> String;
117
118    /// Formats a human-readable metadata description.
119    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result;
120
121    /// Hashes the array contents including len, dtype, and encoding id.
122    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: crate::Precision);
123
124    /// Compares two arrays of the same concrete type for equality.
125    fn dyn_array_eq(&self, other: &ArrayRef, precision: crate::Precision) -> bool;
126
127    /// Returns a new array with the given slots.
128    fn with_slots(&self, this: &ArrayRef, slots: ArraySlots) -> VortexResult<ArrayRef>;
129
130    /// Returns a new array with the given slots, bypassing encoding-level validation.
131    ///
132    /// Used by the executor to temporarily carry an array that has had one of its child slots
133    /// taken out (leaving `None`) without panicking `V::validate`. The caller must ensure the
134    /// missing slot is filled back in (via `put_slot_unchecked`) or driven to completion by the
135    /// builder path before the array becomes externally observable.
136    ///
137    /// # Safety
138    ///
139    /// The array returned may have slots whose content does not match the encoding's normal
140    /// invariants. Callers must re-establish those invariants before handing the array to
141    /// anything outside the executor.
142    unsafe fn with_slots_unchecked(&self, this: &ArrayRef, slots: ArraySlots) -> ArrayRef;
143
144    /// Attempt to reduce the array to a simpler representation.
145    fn reduce(&self, this: &ArrayRef) -> VortexResult<Option<ArrayRef>>;
146
147    /// Attempt to reduce the parent of this array.
148    fn reduce_parent(
149        &self,
150        this: &ArrayRef,
151        parent: &ArrayRef,
152        child_idx: usize,
153    ) -> VortexResult<Option<ArrayRef>>;
154
155    /// Execute the array by taking a single encoding-specific execution step.
156    ///
157    /// This is the checked entry point. If the encoding reports
158    /// [`ExecutionStep::Done`](crate::ExecutionStep::Done), implementations must validate that the
159    /// returned array preserves this array's logical `len` and `dtype`, and must transfer this
160    /// array's statistics to the returned array.
161    fn execute(&self, this: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult>;
162
163    /// Execute the array by taking a single encoding-specific execution step without applying
164    /// `Done`-result postconditions.
165    ///
166    /// This exists for the iterative executor, which may call into `execute` on suspended
167    /// executor-private arrays whose slots temporarily contain `None`. In that mode the executor
168    /// itself is responsible for deciding when a `Done` result represents a real logical array,
169    /// enforcing any `len`/`dtype` invariants, and transferring statistics.
170    ///
171    /// # Safety
172    /// The `array` returned should have it's `DType` and len checked
173    /// (optionally it should have its stats propagated from `this`).
174    unsafe fn execute_unchecked(
175        &self,
176        this: ArrayRef,
177        ctx: &mut ExecutionCtx,
178    ) -> VortexResult<ExecutionResult>;
179
180    /// Attempt to execute the parent of this array.
181    fn execute_parent(
182        &self,
183        this: &ArrayRef,
184        parent: &ArrayRef,
185        child_idx: usize,
186        ctx: &mut ExecutionCtx,
187    ) -> VortexResult<Option<ArrayRef>>;
188
189    /// Execute the scalar at the given index.
190    ///
191    /// This method panics if the index is out of bounds for the array.
192    fn execute_scalar(
193        &self,
194        this: &ArrayRef,
195        index: usize,
196        ctx: &mut ExecutionCtx,
197    ) -> VortexResult<Scalar>;
198}
199
200/// Trait for converting a type into a Vortex [`ArrayRef`].
201pub trait IntoArray {
202    fn into_array(self) -> ArrayRef;
203}
204
205mod private {
206    use super::*;
207
208    pub trait Sealed {}
209
210    impl<V: VTable> Sealed for ArrayData<V> {}
211}
212
213// =============================================================================
214// New path: DynArrayData and supporting trait impls for ArrayData<V>
215// =============================================================================
216
217/// DynArrayData implementation for [`ArrayData<V>`].
218///
219/// This is self-contained: identity methods use `ArrayData<V>`'s own fields (dtype, len, stats),
220/// while data-access methods delegate to VTable methods on the inner `V::TypedArrayData`.
221impl<V: VTable> DynArrayData for ArrayData<V> {
222    fn as_any(&self) -> &dyn Any {
223        self
224    }
225
226    fn as_any_mut(&mut self) -> &mut dyn Any {
227        self
228    }
229
230    fn validity(&self, this: &ArrayRef) -> VortexResult<Validity> {
231        if this.dtype().is_nullable() {
232            let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
233            let validity = <V::ValidityVTable as ValidityVTable<V>>::validity(view)?;
234            if let Validity::Array(array) = &validity {
235                vortex_ensure!(array.len() == this.len(), "Validity array length mismatch");
236                vortex_ensure!(
237                    matches!(array.dtype(), DType::Bool(Nullability::NonNullable)),
238                    "Validity array is not non-nullable boolean: {}",
239                    this.encoding_id(),
240                );
241            }
242            Ok(validity)
243        } else {
244            Ok(Validity::NonNullable)
245        }
246    }
247
248    fn append_to_builder(
249        &self,
250        this: &ArrayRef,
251        builder: &mut dyn ArrayBuilder,
252        ctx: &mut ExecutionCtx,
253    ) -> VortexResult<()> {
254        if builder.dtype() != this.dtype() {
255            vortex_panic!(
256                "Builder dtype mismatch: expected {}, got {}",
257                this.dtype(),
258                builder.dtype(),
259            );
260        }
261        let len = builder.len();
262
263        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
264        V::append_to_builder(view, builder, ctx)?;
265
266        assert_eq!(
267            len + this.len(),
268            builder.len(),
269            "Builder length mismatch after writing array for encoding {}",
270            this.encoding_id(),
271        );
272        Ok(())
273    }
274
275    fn children(&self, this: &ArrayRef) -> Vec<ArrayRef> {
276        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
277        (0..V::nchildren(view)).map(|i| V::child(view, i)).collect()
278    }
279
280    fn nchildren(&self, this: &ArrayRef) -> usize {
281        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
282        V::nchildren(view)
283    }
284
285    fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option<ArrayRef> {
286        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
287        (idx < V::nchildren(view)).then(|| V::child(view, idx))
288    }
289
290    fn children_names(&self, this: &ArrayRef) -> Vec<String> {
291        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
292        (0..V::nchildren(view))
293            .map(|i| V::child_name(view, i))
294            .collect()
295    }
296
297    fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)> {
298        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
299        (0..V::nchildren(view))
300            .map(|i| (V::child_name(view, i), V::child(view, i)))
301            .collect()
302    }
303
304    fn buffers(&self, this: &ArrayRef) -> Vec<ByteBuffer> {
305        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
306        (0..V::nbuffers(view))
307            .map(|i| V::buffer(view, i).to_host_sync())
308            .collect()
309    }
310
311    fn buffer_handles(&self, this: &ArrayRef) -> Vec<BufferHandle> {
312        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
313        (0..V::nbuffers(view)).map(|i| V::buffer(view, i)).collect()
314    }
315
316    fn buffer_names(&self, this: &ArrayRef) -> Vec<String> {
317        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
318        (0..V::nbuffers(view))
319            .filter_map(|i| V::buffer_name(view, i))
320            .collect()
321    }
322
323    fn named_buffers(&self, this: &ArrayRef) -> Vec<(String, BufferHandle)> {
324        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
325        (0..V::nbuffers(view))
326            .filter_map(|i| V::buffer_name(view, i).map(|name| (name, V::buffer(view, i))))
327            .collect()
328    }
329
330    fn nbuffers(&self, this: &ArrayRef) -> usize {
331        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
332        V::nbuffers(view)
333    }
334
335    fn slot_name(&self, this: &ArrayRef, idx: usize) -> String {
336        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
337        V::slot_name(view, idx)
338    }
339
340    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
341        std::fmt::Display::fmt(&self.data, f)
342    }
343
344    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: crate::Precision) {
345        let mut wrapper = HasherWrapper(state);
346        // Note: metadata (len, dtype, encoding_id) and slots are hashed by ArrayRef.
347        self.data.array_hash(&mut wrapper, precision);
348    }
349
350    fn dyn_array_eq(&self, other: &ArrayRef, precision: crate::Precision) -> bool {
351        // Note: metadata (len, dtype, encoding_id) and slots are compared by ArrayRef.
352        other
353            .dyn_array()
354            .as_any()
355            .downcast_ref::<Self>()
356            .is_some_and(|other_inner| self.data.array_eq(&other_inner.data, precision))
357    }
358
359    fn with_slots(&self, this: &ArrayRef, slots: ArraySlots) -> VortexResult<ArrayRef> {
360        let stats = this.statistics().to_owned();
361        Ok(Array::<V>::try_from_parts(
362            ArrayParts::new(
363                self.vtable.clone(),
364                this.dtype().clone(),
365                this.len(),
366                self.data.clone(),
367            )
368            .with_slots(slots),
369        )?
370        .with_stats_set(stats)
371        .into_array())
372    }
373
374    unsafe fn with_slots_unchecked(&self, this: &ArrayRef, slots: ArraySlots) -> ArrayRef {
375        // SAFETY: we intentionally skip `V::validate` here. Caller guarantees that the resulting
376        // array is either repaired or not externally observed.
377        let store = unsafe {
378            ArrayInner::<ArrayData<V>>::new_unchecked(
379                self.vtable.clone(),
380                this.len(),
381                this.dtype().clone(),
382                self.data.clone(),
383                slots,
384                this.statistics().to_array_stats(),
385            )
386        };
387        ArrayRef::from_inner(Arc::new(store))
388    }
389
390    fn reduce(&self, this: &ArrayRef) -> VortexResult<Option<ArrayRef>> {
391        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
392        let Some(reduced) = V::reduce(view)? else {
393            return Ok(None);
394        };
395        vortex_ensure!(
396            reduced.len() == this.len(),
397            "Reduced array length mismatch from {} to {}",
398            this.encoding_id(),
399            reduced.encoding_id()
400        );
401        vortex_ensure!(
402            reduced.dtype() == this.dtype(),
403            "Reduced array dtype mismatch from {} to {}",
404            this.encoding_id(),
405            reduced.encoding_id()
406        );
407        Ok(Some(reduced))
408    }
409
410    fn reduce_parent(
411        &self,
412        this: &ArrayRef,
413        parent: &ArrayRef,
414        child_idx: usize,
415    ) -> VortexResult<Option<ArrayRef>> {
416        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
417        let Some(reduced) = V::reduce_parent(view, parent, child_idx)? else {
418            return Ok(None);
419        };
420
421        vortex_ensure!(
422            reduced.len() == parent.len(),
423            "Reduced array length mismatch from {} to {}",
424            parent.encoding_id(),
425            reduced.encoding_id()
426        );
427        vortex_ensure!(
428            reduced.dtype() == parent.dtype(),
429            "Reduced array dtype mismatch from {} to {}",
430            parent.encoding_id(),
431            reduced.encoding_id()
432        );
433
434        Ok(Some(reduced))
435    }
436
437    fn execute(&self, this: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
438        let len = this.len();
439        let dtype = this.dtype().clone();
440        let stats = this.statistics().to_array_stats();
441        let result = unsafe { self.execute_unchecked(this, ctx)? };
442
443        if matches!(result.step(), ExecutionStep::Done) {
444            if cfg!(debug_assertions) {
445                vortex_ensure!(
446                    result.array().len() == len,
447                    "Result length mismatch for {:?}",
448                    self.vtable
449                );
450                vortex_ensure!(
451                    result.array().dtype() == &dtype,
452                    "Executed canonical dtype mismatch for {:?}",
453                    self.vtable
454                );
455            }
456
457            result
458                .array()
459                .statistics()
460                .set_iter(crate::stats::StatsSet::from(stats).into_iter());
461        }
462
463        Ok(result)
464    }
465
466    unsafe fn execute_unchecked(
467        &self,
468        this: ArrayRef,
469        ctx: &mut ExecutionCtx,
470    ) -> VortexResult<ExecutionResult> {
471        let typed = Array::<V>::try_from_array_ref(this)
472            .map_err(|_| vortex_err!("Failed to downcast array for execute"))
473            .vortex_expect("Failed to downcast array for execute");
474        V::execute(typed, ctx)
475    }
476
477    fn execute_parent(
478        &self,
479        this: &ArrayRef,
480        parent: &ArrayRef,
481        child_idx: usize,
482        ctx: &mut ExecutionCtx,
483    ) -> VortexResult<Option<ArrayRef>> {
484        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
485        let Some(result) = V::execute_parent(view, parent, child_idx, ctx)? else {
486            return Ok(None);
487        };
488
489        if cfg!(debug_assertions) {
490            vortex_ensure!(
491                result.len() == parent.len(),
492                "Executed parent canonical length mismatch"
493            );
494            vortex_ensure!(
495                result.dtype() == parent.dtype(),
496                "Executed parent canonical dtype mismatch"
497            );
498        }
499
500        Ok(Some(result))
501    }
502
503    fn execute_scalar(
504        &self,
505        this: &ArrayRef,
506        index: usize,
507        ctx: &mut ExecutionCtx,
508    ) -> VortexResult<Scalar> {
509        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
510        <V::OperationsVTable as OperationsVTable<V>>::scalar_at(view, index, ctx)
511    }
512}
513
514/// Wrapper around `&mut dyn Hasher` that implements `Hasher` (and is `Sized`).
515struct HasherWrapper<'a>(&'a mut dyn Hasher);
516
517impl Hasher for HasherWrapper<'_> {
518    fn finish(&self) -> u64 {
519        self.0.finish()
520    }
521
522    fn write(&mut self, bytes: &[u8]) {
523        self.0.write(bytes);
524    }
525}
526
527/// ArrayId is a globally unique name for the array's vtable.
528pub type ArrayId = Id;