Skip to main content

vortex_array/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Debug;
6use std::fmt::Formatter;
7use std::hash::Hasher;
8use std::sync::Arc;
9
10use vortex_buffer::ByteBuffer;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_error::vortex_err;
15use vortex_error::vortex_panic;
16use vortex_session::registry::Id;
17
18use crate::ExecutionCtx;
19use crate::buffer::BufferHandle;
20use crate::builders::ArrayBuilder;
21use crate::dtype::DType;
22use crate::dtype::Nullability;
23use crate::executor::ExecutionResult;
24use crate::executor::ExecutionStep;
25use crate::scalar::Scalar;
26use crate::validity::Validity;
27
28mod erased;
29pub use erased::*;
30
31mod plugin;
32pub use plugin::*;
33
34mod foreign;
35pub(crate) use foreign::*;
36
37mod typed;
38pub use typed::*;
39
40pub mod vtable;
41pub use vtable::*;
42
43mod view;
44use smallvec::SmallVec;
45pub use view::*;
46
47use crate::hash::ArrayEq;
48use crate::hash::ArrayHash;
49
50/// The slots of an array: a collection of optional child arrays.
51///
52/// Most encodings have 4 or fewer slots, so we use a `SmallVec` to avoid
53/// heap allocation in the common case.
54pub type ArraySlots = SmallVec<[Option<ArrayRef>; 4]>;
55
56/// The public API trait for all Vortex arrays.
57///
58/// This trait is sealed and cannot be implemented outside of `vortex-array`.
59/// Use [`ArrayRef`] as the primary handle for working with arrays.
60#[doc(hidden)]
61pub(crate) trait DynArrayData: 'static + private::Sealed + Send + Sync + Debug {
62    /// Returns the array as a reference to a generic [`Any`] trait object.
63    fn as_any(&self) -> &dyn Any;
64
65    /// Returns the array as a mutable reference to a generic [`Any`] trait object.
66    fn as_any_mut(&mut self) -> &mut dyn Any;
67
68    /// Returns the [`Validity`] of the array.
69    fn validity(&self, this: &ArrayRef) -> VortexResult<Validity>;
70
71    /// Writes the array into the canonical builder.
72    ///
73    /// The [`DType`] of the builder must match that of the array.
74    fn append_to_builder(
75        &self,
76        this: &ArrayRef,
77        builder: &mut dyn ArrayBuilder,
78        ctx: &mut ExecutionCtx,
79    ) -> VortexResult<()>;
80
81    // --- Visitor methods (formerly in ArrayVisitor) ---
82
83    /// Returns the children of the array.
84    fn children(&self, this: &ArrayRef) -> Vec<ArrayRef>;
85
86    /// Returns the number of children of the array.
87    fn nchildren(&self, this: &ArrayRef) -> usize;
88
89    /// Returns the nth child of the array without allocating a Vec.
90    ///
91    /// Returns `None` if the index is out of bounds.
92    fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option<ArrayRef>;
93
94    /// Returns the names of the children of the array.
95    fn children_names(&self, this: &ArrayRef) -> Vec<String>;
96
97    /// Returns the array's children with their names.
98    fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)>;
99
100    /// Returns the buffers of the array.
101    fn buffers(&self, this: &ArrayRef) -> Vec<ByteBuffer>;
102
103    /// Returns the buffer handles of the array.
104    fn buffer_handles(&self, this: &ArrayRef) -> Vec<BufferHandle>;
105
106    /// Returns the names of the buffers of the array.
107    fn buffer_names(&self, this: &ArrayRef) -> Vec<String>;
108
109    /// Returns the array's buffers with their names.
110    fn named_buffers(&self, this: &ArrayRef) -> Vec<(String, BufferHandle)>;
111
112    /// Returns the number of buffers of the array.
113    fn nbuffers(&self, this: &ArrayRef) -> usize;
114
115    /// Returns the name of the slot at the given index.
116    fn slot_name(&self, this: &ArrayRef, idx: usize) -> String;
117
118    /// Formats a human-readable metadata description.
119    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result;
120
121    /// Hashes the array contents including len, dtype, and encoding id.
122    fn dyn_array_hash(&self, state: &mut dyn Hasher, accuracy: crate::EqMode);
123
124    /// Compares two arrays of the same concrete type for equality.
125    fn dyn_array_eq(&self, other: &ArrayRef, accuracy: crate::EqMode) -> bool;
126
127    /// Returns a new array with the given slots.
128    fn with_slots(&self, this: &ArrayRef, slots: ArraySlots) -> VortexResult<ArrayRef>;
129
130    /// Returns a new array with the given buffers.
131    fn with_buffers(&self, this: &ArrayRef, buffers: Vec<BufferHandle>) -> VortexResult<ArrayRef>;
132
133    /// Returns a new array with the given slots, bypassing encoding-level validation.
134    ///
135    /// Used by the executor to temporarily carry an array that has had one of its child slots
136    /// taken out (leaving `None`) without panicking `V::validate`. The caller must ensure the
137    /// missing slot is filled back in (via `put_slot_unchecked`) or driven to completion by the
138    /// builder path before the array becomes externally observable.
139    ///
140    /// # Safety
141    ///
142    /// The array returned may have slots whose content does not match the encoding's normal
143    /// invariants. Callers must re-establish those invariants before handing the array to
144    /// anything outside the executor.
145    unsafe fn with_slots_unchecked(&self, this: &ArrayRef, slots: ArraySlots) -> ArrayRef;
146
147    /// Attempt to reduce the array to a simpler representation.
148    fn reduce(&self, this: &ArrayRef) -> VortexResult<Option<ArrayRef>>;
149
150    /// Attempt to reduce the parent of this array.
151    fn reduce_parent(
152        &self,
153        this: &ArrayRef,
154        parent: &ArrayRef,
155        child_idx: usize,
156    ) -> VortexResult<Option<ArrayRef>>;
157
158    /// Execute the array by taking a single encoding-specific execution step.
159    ///
160    /// This is the checked entry point. If the encoding reports
161    /// [`ExecutionStep::Done`](crate::ExecutionStep::Done), implementations must validate that the
162    /// returned array preserves this array's logical `len` and `dtype`, and must transfer this
163    /// array's statistics to the returned array.
164    fn execute(&self, this: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult>;
165
166    /// Execute the array by taking a single encoding-specific execution step without applying
167    /// `Done`-result postconditions.
168    ///
169    /// This exists for the iterative executor, which may call into `execute` on suspended
170    /// executor-private arrays whose slots temporarily contain `None`. In that mode the executor
171    /// itself is responsible for deciding when a `Done` result represents a real logical array,
172    /// enforcing any `len`/`dtype` invariants, and transferring statistics.
173    ///
174    /// # Safety
175    /// The `array` returned should have it's `DType` and len checked
176    /// (optionally it should have its stats propagated from `this`).
177    unsafe fn execute_unchecked(
178        &self,
179        this: ArrayRef,
180        ctx: &mut ExecutionCtx,
181    ) -> VortexResult<ExecutionResult>;
182
183    /// Execute the scalar at the given index.
184    ///
185    /// This method panics if the index is out of bounds for the array.
186    fn execute_scalar(
187        &self,
188        this: &ArrayRef,
189        index: usize,
190        ctx: &mut ExecutionCtx,
191    ) -> VortexResult<Scalar>;
192}
193
194/// Trait for converting a type into a Vortex [`ArrayRef`].
195pub trait IntoArray {
196    /// Convert this value into the erased array handle used by generic APIs.
197    fn into_array(self) -> ArrayRef;
198}
199
200mod private {
201    use super::*;
202
203    pub trait Sealed {}
204
205    impl<V: VTable> Sealed for ArrayData<V> {}
206}
207
208// =============================================================================
209// New path: DynArrayData and supporting trait impls for ArrayData<V>
210// =============================================================================
211
212/// DynArrayData implementation for [`ArrayData<V>`].
213///
214/// This is self-contained: identity methods use `ArrayData<V>`'s own fields (dtype, len, stats),
215/// while data-access methods delegate to VTable methods on the inner `V::TypedArrayData`.
216impl<V: VTable> DynArrayData for ArrayData<V> {
217    fn as_any(&self) -> &dyn Any {
218        self
219    }
220
221    fn as_any_mut(&mut self) -> &mut dyn Any {
222        self
223    }
224
225    fn validity(&self, this: &ArrayRef) -> VortexResult<Validity> {
226        if this.dtype().is_nullable() {
227            let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
228            let validity = <V::ValidityVTable as ValidityVTable<V>>::validity(view)?;
229            if let Validity::Array(array) = &validity {
230                vortex_ensure!(array.len() == this.len(), "Validity array length mismatch");
231                vortex_ensure!(
232                    matches!(array.dtype(), DType::Bool(Nullability::NonNullable)),
233                    "Validity array is not non-nullable boolean: {}",
234                    this.encoding_id(),
235                );
236            }
237            Ok(validity)
238        } else {
239            Ok(Validity::NonNullable)
240        }
241    }
242
243    fn append_to_builder(
244        &self,
245        this: &ArrayRef,
246        builder: &mut dyn ArrayBuilder,
247        ctx: &mut ExecutionCtx,
248    ) -> VortexResult<()> {
249        if builder.dtype() != this.dtype() {
250            vortex_panic!(
251                "Builder dtype mismatch: expected {}, got {}",
252                this.dtype(),
253                builder.dtype(),
254            );
255        }
256        let len = builder.len();
257
258        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
259        V::append_to_builder(view, builder, ctx)?;
260
261        assert_eq!(
262            len + this.len(),
263            builder.len(),
264            "Builder length mismatch after writing array for encoding {}",
265            this.encoding_id(),
266        );
267        Ok(())
268    }
269
270    fn children(&self, this: &ArrayRef) -> Vec<ArrayRef> {
271        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
272        (0..V::nchildren(view)).map(|i| V::child(view, i)).collect()
273    }
274
275    fn nchildren(&self, this: &ArrayRef) -> usize {
276        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
277        V::nchildren(view)
278    }
279
280    fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option<ArrayRef> {
281        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
282        (idx < V::nchildren(view)).then(|| V::child(view, idx))
283    }
284
285    fn children_names(&self, this: &ArrayRef) -> Vec<String> {
286        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
287        (0..V::nchildren(view))
288            .map(|i| V::child_name(view, i))
289            .collect()
290    }
291
292    fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)> {
293        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
294        (0..V::nchildren(view))
295            .map(|i| (V::child_name(view, i), V::child(view, i)))
296            .collect()
297    }
298
299    fn buffers(&self, this: &ArrayRef) -> Vec<ByteBuffer> {
300        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
301        (0..V::nbuffers(view))
302            .map(|i| V::buffer(view, i).to_host_sync())
303            .collect()
304    }
305
306    fn buffer_handles(&self, this: &ArrayRef) -> Vec<BufferHandle> {
307        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
308        (0..V::nbuffers(view)).map(|i| V::buffer(view, i)).collect()
309    }
310
311    fn buffer_names(&self, this: &ArrayRef) -> Vec<String> {
312        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
313        (0..V::nbuffers(view))
314            .filter_map(|i| V::buffer_name(view, i))
315            .collect()
316    }
317
318    fn named_buffers(&self, this: &ArrayRef) -> Vec<(String, BufferHandle)> {
319        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
320        (0..V::nbuffers(view))
321            .filter_map(|i| V::buffer_name(view, i).map(|name| (name, V::buffer(view, i))))
322            .collect()
323    }
324
325    fn nbuffers(&self, this: &ArrayRef) -> usize {
326        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
327        V::nbuffers(view)
328    }
329
330    fn slot_name(&self, this: &ArrayRef, idx: usize) -> String {
331        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
332        V::slot_name(view, idx)
333    }
334
335    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
336        std::fmt::Display::fmt(&self.data, f)
337    }
338
339    fn dyn_array_hash(&self, state: &mut dyn Hasher, accuracy: crate::EqMode) {
340        let mut wrapper = HasherWrapper(state);
341        // Note: metadata (len, dtype, encoding_id) and slots are hashed by ArrayRef.
342        self.data.array_hash(&mut wrapper, accuracy);
343    }
344
345    fn dyn_array_eq(&self, other: &ArrayRef, accuracy: crate::EqMode) -> bool {
346        // Note: metadata (len, dtype, encoding_id) and slots are compared by ArrayRef.
347        other
348            .dyn_array()
349            .as_any()
350            .downcast_ref::<Self>()
351            .is_some_and(|other_inner| self.data.array_eq(&other_inner.data, accuracy))
352    }
353
354    fn with_slots(&self, this: &ArrayRef, slots: ArraySlots) -> VortexResult<ArrayRef> {
355        let stats = this.statistics().to_owned();
356        Ok(Array::<V>::try_from_parts(
357            ArrayParts::new(
358                self.vtable.clone(),
359                this.dtype().clone(),
360                this.len(),
361                self.data.clone(),
362            )
363            .with_slots(slots),
364        )?
365        .with_stats_set(stats)
366        .into_array())
367    }
368
369    fn with_buffers(&self, this: &ArrayRef, buffers: Vec<BufferHandle>) -> VortexResult<ArrayRef> {
370        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
371        let stats = this.statistics().to_owned();
372        Ok(
373            Array::<V>::try_from_parts(V::with_buffers(&self.vtable, view, &buffers)?)?
374                .with_stats_set(stats)
375                .into_array(),
376        )
377    }
378
379    unsafe fn with_slots_unchecked(&self, this: &ArrayRef, slots: ArraySlots) -> ArrayRef {
380        // SAFETY: we intentionally skip `V::validate` here. Caller guarantees that the resulting
381        // array is either repaired or not externally observed.
382        let store = unsafe {
383            ArrayInner::<ArrayData<V>>::new_unchecked(
384                self.vtable.clone(),
385                this.len(),
386                this.dtype().clone(),
387                self.data.clone(),
388                slots,
389                this.statistics().to_array_stats(),
390            )
391        };
392        ArrayRef::from_inner(Arc::new(store))
393    }
394
395    fn reduce(&self, this: &ArrayRef) -> VortexResult<Option<ArrayRef>> {
396        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
397        let Some(reduced) = V::reduce(view)? else {
398            return Ok(None);
399        };
400        vortex_ensure!(
401            reduced.len() == this.len(),
402            "Reduced array length mismatch from {} to {}",
403            this.encoding_id(),
404            reduced.encoding_id()
405        );
406        vortex_ensure!(
407            reduced.dtype() == this.dtype(),
408            "Reduced array dtype mismatch from {} to {}",
409            this.encoding_id(),
410            reduced.encoding_id()
411        );
412        Ok(Some(reduced))
413    }
414
415    fn reduce_parent(
416        &self,
417        this: &ArrayRef,
418        parent: &ArrayRef,
419        child_idx: usize,
420    ) -> VortexResult<Option<ArrayRef>> {
421        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
422        let Some(reduced) = V::reduce_parent(view, parent, child_idx)? else {
423            return Ok(None);
424        };
425
426        vortex_ensure!(
427            reduced.len() == parent.len(),
428            "Reduced array length mismatch from {} to {}",
429            parent.encoding_id(),
430            reduced.encoding_id()
431        );
432        vortex_ensure!(
433            reduced.dtype() == parent.dtype(),
434            "Reduced array dtype mismatch from {} to {}",
435            parent.encoding_id(),
436            reduced.encoding_id()
437        );
438
439        Ok(Some(reduced))
440    }
441
442    fn execute(&self, this: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
443        let len = this.len();
444        let dtype = this.dtype().clone();
445        let stats = this.statistics().to_array_stats();
446        let result = unsafe { self.execute_unchecked(this, ctx)? };
447
448        if matches!(result.step(), ExecutionStep::Done) {
449            if cfg!(debug_assertions) {
450                vortex_ensure!(
451                    result.array().len() == len,
452                    "Result length mismatch for {:?}",
453                    self.vtable
454                );
455                vortex_ensure!(
456                    result.array().dtype() == &dtype,
457                    "Executed canonical dtype mismatch for {:?}",
458                    self.vtable
459                );
460            }
461
462            result
463                .array()
464                .statistics()
465                .set_iter(crate::stats::StatsSet::from(stats).into_iter());
466        }
467
468        Ok(result)
469    }
470
471    unsafe fn execute_unchecked(
472        &self,
473        this: ArrayRef,
474        ctx: &mut ExecutionCtx,
475    ) -> VortexResult<ExecutionResult> {
476        let typed = Array::<V>::try_from_array_ref(this)
477            .map_err(|_| vortex_err!("Failed to downcast array for execute"))
478            .vortex_expect("Failed to downcast array for execute");
479        V::execute(typed, ctx)
480    }
481
482    fn execute_scalar(
483        &self,
484        this: &ArrayRef,
485        index: usize,
486        ctx: &mut ExecutionCtx,
487    ) -> VortexResult<Scalar> {
488        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
489        <V::OperationsVTable as OperationsVTable<V>>::scalar_at(view, index, ctx)
490    }
491}
492
493/// Wrapper around `&mut dyn Hasher` that implements `Hasher` (and is `Sized`).
494struct HasherWrapper<'a>(&'a mut dyn Hasher);
495
496impl Hasher for HasherWrapper<'_> {
497    fn finish(&self) -> u64 {
498        self.0.finish()
499    }
500
501    fn write(&mut self, bytes: &[u8]) {
502        self.0.write(bytes);
503    }
504}
505
506/// ArrayId is a globally unique name for the array's vtable.
507pub type ArrayId = Id;