Skip to main content

vortex_array/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Debug;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use vortex_buffer::ByteBuffer;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_error::vortex_err;
15use vortex_error::vortex_panic;
16use vortex_session::registry::Id;
17
18use crate::ExecutionCtx;
19use crate::buffer::BufferHandle;
20use crate::builders::ArrayBuilder;
21use crate::dtype::DType;
22use crate::dtype::Nullability;
23use crate::executor::ExecutionResult;
24use crate::executor::ExecutionStep;
25use crate::scalar::Scalar;
26use crate::stats::ArrayStats;
27use crate::validity::Validity;
28
29mod erased;
30pub use erased::*;
31
32mod plugin;
33pub use plugin::*;
34
35mod foreign;
36pub(crate) use foreign::*;
37
38mod typed;
39pub use typed::*;
40
41pub mod vtable;
42pub use vtable::*;
43
44mod view;
45pub use view::*;
46
47use crate::hash::ArrayEq;
48use crate::hash::ArrayHash;
49
50/// The public API trait for all Vortex arrays.
51///
52/// This trait is sealed and cannot be implemented outside of `vortex-array`.
53/// Use [`ArrayRef`] as the primary handle for working with arrays.
54#[doc(hidden)]
55pub(crate) trait DynArray: 'static + private::Sealed + Send + Sync + Debug {
56    /// Returns the array as a reference to a generic [`Any`] trait object.
57    fn as_any(&self) -> &dyn Any;
58
59    /// Returns the array as a mutable reference to a generic [`Any`] trait object.
60    fn as_any_mut(&mut self) -> &mut dyn Any;
61
62    /// Converts an owned array allocation into an owned [`Any`] allocation for downcasting.
63    fn into_any_arc(self: std::sync::Arc<Self>) -> std::sync::Arc<dyn Any + Send + Sync>;
64
65    /// Returns the length of the array.
66    fn len(&self) -> usize;
67
68    /// Returns the logical Vortex [`DType`] of the array.
69    fn dtype(&self) -> &DType;
70
71    /// Returns the slots of the array.
72    fn slots(&self) -> &[Option<ArrayRef>];
73
74    /// Returns mutable slots of the array.
75    ///
76    /// # Safety: any slot (Some(child)) that replaces an existing slot must have a compatible
77    /// DType and length. Currently compatible means equal, but there is no reason why that must
78    /// be the case.
79    unsafe fn slots_mut(&mut self) -> &mut [Option<ArrayRef>];
80
81    /// Returns the encoding ID of the array.
82    fn encoding_id(&self) -> ArrayId;
83
84    /// Returns the [`Validity`] of the array.
85    fn validity(&self, this: &ArrayRef) -> VortexResult<Validity>;
86
87    /// Writes the array into the canonical builder.
88    ///
89    /// The [`DType`] of the builder must match that of the array.
90    fn append_to_builder(
91        &self,
92        this: &ArrayRef,
93        builder: &mut dyn ArrayBuilder,
94        ctx: &mut ExecutionCtx,
95    ) -> VortexResult<()>;
96
97    /// Returns the statistics of the array.
98    fn statistics(&self) -> &ArrayStats;
99
100    // --- Visitor methods (formerly in ArrayVisitor) ---
101
102    /// Returns the children of the array.
103    fn children(&self, this: &ArrayRef) -> Vec<ArrayRef>;
104
105    /// Returns the number of children of the array.
106    fn nchildren(&self, this: &ArrayRef) -> usize;
107
108    /// Returns the nth child of the array without allocating a Vec.
109    ///
110    /// Returns `None` if the index is out of bounds.
111    fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option<ArrayRef>;
112
113    /// Returns the names of the children of the array.
114    fn children_names(&self, this: &ArrayRef) -> Vec<String>;
115
116    /// Returns the array's children with their names.
117    fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)>;
118
119    /// Returns the buffers of the array.
120    fn buffers(&self, this: &ArrayRef) -> Vec<ByteBuffer>;
121
122    /// Returns the buffer handles of the array.
123    fn buffer_handles(&self, this: &ArrayRef) -> Vec<BufferHandle>;
124
125    /// Returns the names of the buffers of the array.
126    fn buffer_names(&self, this: &ArrayRef) -> Vec<String>;
127
128    /// Returns the array's buffers with their names.
129    fn named_buffers(&self, this: &ArrayRef) -> Vec<(String, BufferHandle)>;
130
131    /// Returns the number of buffers of the array.
132    fn nbuffers(&self, this: &ArrayRef) -> usize;
133
134    /// Returns the name of the slot at the given index.
135    fn slot_name(&self, this: &ArrayRef, idx: usize) -> String;
136
137    /// Formats a human-readable metadata description.
138    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result;
139
140    /// Hashes the array contents including len, dtype, and encoding id.
141    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: crate::Precision);
142
143    /// Compares two arrays of the same concrete type for equality.
144    fn dyn_array_eq(&self, other: &ArrayRef, precision: crate::Precision) -> bool;
145
146    /// Returns a new array with the given slots.
147    fn with_slots(&self, this: ArrayRef, slots: Vec<Option<ArrayRef>>) -> VortexResult<ArrayRef>;
148
149    /// Returns a new array with the given slots, bypassing encoding-level validation.
150    ///
151    /// Used by the executor to temporarily carry an array that has had one of its child slots
152    /// taken out (leaving `None`) without panicking `V::validate`. The caller must ensure the
153    /// missing slot is filled back in (via `put_slot_unchecked`) or driven to completion by the
154    /// builder path before the array becomes externally observable.
155    ///
156    /// # Safety
157    ///
158    /// The array returned may have slots whose content does not match the encoding's normal
159    /// invariants. Callers must re-establish those invariants before handing the array to
160    /// anything outside the executor.
161    unsafe fn with_slots_unchecked(
162        &self,
163        this: &ArrayRef,
164        slots: Vec<Option<ArrayRef>>,
165    ) -> ArrayRef;
166
167    /// Attempt to reduce the array to a simpler representation.
168    fn reduce(&self, this: &ArrayRef) -> VortexResult<Option<ArrayRef>>;
169
170    /// Attempt to reduce the parent of this array.
171    fn reduce_parent(
172        &self,
173        this: &ArrayRef,
174        parent: &ArrayRef,
175        child_idx: usize,
176    ) -> VortexResult<Option<ArrayRef>>;
177
178    /// Execute the array by taking a single encoding-specific execution step.
179    ///
180    /// This is the checked entry point. If the encoding reports
181    /// [`ExecutionStep::Done`](crate::ExecutionStep::Done), implementations must validate that the
182    /// returned array preserves this array's logical `len` and `dtype`, and must transfer this
183    /// array's statistics to the returned array.
184    fn execute(&self, this: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult>;
185
186    /// Execute the array by taking a single encoding-specific execution step without applying
187    /// `Done`-result postconditions.
188    ///
189    /// This exists for the iterative executor, which may call into `execute` on suspended
190    /// executor-private arrays whose slots temporarily contain `None`. In that mode the executor
191    /// itself is responsible for deciding when a `Done` result represents a real logical array,
192    /// enforcing any `len`/`dtype` invariants, and transferring statistics.
193    ///
194    /// # Safety
195    /// The `array` returned should have it's `DType` and len checked
196    /// (optionally it should have its stats propagated from `this`).
197    unsafe fn execute_unchecked(
198        &self,
199        this: ArrayRef,
200        ctx: &mut ExecutionCtx,
201    ) -> VortexResult<ExecutionResult>;
202
203    /// Attempt to execute the parent of this array.
204    fn execute_parent(
205        &self,
206        this: &ArrayRef,
207        parent: &ArrayRef,
208        child_idx: usize,
209        ctx: &mut ExecutionCtx,
210    ) -> VortexResult<Option<ArrayRef>>;
211
212    /// Execute the scalar at the given index.
213    ///
214    /// This method panics if the index is out of bounds for the array.
215    fn execute_scalar(
216        &self,
217        this: &ArrayRef,
218        index: usize,
219        ctx: &mut ExecutionCtx,
220    ) -> VortexResult<Scalar>;
221}
222
223/// Trait for converting a type into a Vortex [`ArrayRef`].
224pub trait IntoArray {
225    fn into_array(self) -> ArrayRef;
226}
227
228mod private {
229    use super::*;
230
231    pub trait Sealed {}
232
233    impl<V: VTable> Sealed for ArrayInner<V> {}
234}
235
236// =============================================================================
237// New path: DynArray and supporting trait impls for ArrayInner<V>
238// =============================================================================
239
240/// DynArray implementation for [`ArrayInner<V>`].
241///
242/// This is self-contained: identity methods use `ArrayInner<V>`'s own fields (dtype, len, stats),
243/// while data-access methods delegate to VTable methods on the inner `V::ArrayData`.
244impl<V: VTable> DynArray for ArrayInner<V> {
245    fn as_any(&self) -> &dyn Any {
246        self
247    }
248
249    fn as_any_mut(&mut self) -> &mut dyn Any {
250        self
251    }
252
253    fn into_any_arc(self: std::sync::Arc<Self>) -> std::sync::Arc<dyn Any + Send + Sync> {
254        self
255    }
256
257    fn len(&self) -> usize {
258        self.len
259    }
260
261    fn dtype(&self) -> &DType {
262        &self.dtype
263    }
264
265    fn slots(&self) -> &[Option<ArrayRef>] {
266        &self.slots
267    }
268
269    unsafe fn slots_mut(&mut self) -> &mut [Option<ArrayRef>] {
270        &mut self.slots
271    }
272
273    fn encoding_id(&self) -> ArrayId {
274        self.vtable.id()
275    }
276
277    fn validity(&self, this: &ArrayRef) -> VortexResult<Validity> {
278        if self.dtype.is_nullable() {
279            let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
280            let validity = <V::ValidityVTable as ValidityVTable<V>>::validity(view)?;
281            if let Validity::Array(array) = &validity {
282                vortex_ensure!(array.len() == self.len, "Validity array length mismatch");
283                vortex_ensure!(
284                    matches!(array.dtype(), DType::Bool(Nullability::NonNullable)),
285                    "Validity array is not non-nullable boolean: {}",
286                    self.vtable.id(),
287                );
288            }
289            Ok(validity)
290        } else {
291            Ok(Validity::NonNullable)
292        }
293    }
294
295    fn append_to_builder(
296        &self,
297        this: &ArrayRef,
298        builder: &mut dyn ArrayBuilder,
299        ctx: &mut ExecutionCtx,
300    ) -> VortexResult<()> {
301        if builder.dtype() != &self.dtype {
302            vortex_panic!(
303                "Builder dtype mismatch: expected {}, got {}",
304                self.dtype,
305                builder.dtype(),
306            );
307        }
308        let len = builder.len();
309
310        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
311        V::append_to_builder(view, builder, ctx)?;
312
313        assert_eq!(
314            len + self.len,
315            builder.len(),
316            "Builder length mismatch after writing array for encoding {}",
317            self.vtable.id(),
318        );
319        Ok(())
320    }
321
322    fn statistics(&self) -> &ArrayStats {
323        &self.stats
324    }
325
326    fn children(&self, this: &ArrayRef) -> Vec<ArrayRef> {
327        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
328        (0..V::nchildren(view)).map(|i| V::child(view, i)).collect()
329    }
330
331    fn nchildren(&self, this: &ArrayRef) -> usize {
332        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
333        V::nchildren(view)
334    }
335
336    fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option<ArrayRef> {
337        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
338        (idx < V::nchildren(view)).then(|| V::child(view, idx))
339    }
340
341    fn children_names(&self, this: &ArrayRef) -> Vec<String> {
342        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
343        (0..V::nchildren(view))
344            .map(|i| V::child_name(view, i))
345            .collect()
346    }
347
348    fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)> {
349        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
350        (0..V::nchildren(view))
351            .map(|i| (V::child_name(view, i), V::child(view, i)))
352            .collect()
353    }
354
355    fn buffers(&self, this: &ArrayRef) -> Vec<ByteBuffer> {
356        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
357        (0..V::nbuffers(view))
358            .map(|i| V::buffer(view, i).to_host_sync())
359            .collect()
360    }
361
362    fn buffer_handles(&self, this: &ArrayRef) -> Vec<BufferHandle> {
363        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
364        (0..V::nbuffers(view)).map(|i| V::buffer(view, i)).collect()
365    }
366
367    fn buffer_names(&self, this: &ArrayRef) -> Vec<String> {
368        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
369        (0..V::nbuffers(view))
370            .filter_map(|i| V::buffer_name(view, i))
371            .collect()
372    }
373
374    fn named_buffers(&self, this: &ArrayRef) -> Vec<(String, BufferHandle)> {
375        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
376        (0..V::nbuffers(view))
377            .filter_map(|i| V::buffer_name(view, i).map(|name| (name, V::buffer(view, i))))
378            .collect()
379    }
380
381    fn nbuffers(&self, this: &ArrayRef) -> usize {
382        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
383        V::nbuffers(view)
384    }
385
386    fn slot_name(&self, this: &ArrayRef, idx: usize) -> String {
387        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
388        V::slot_name(view, idx)
389    }
390
391    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
392        std::fmt::Display::fmt(&self.data, f)
393    }
394
395    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: crate::Precision) {
396        let mut wrapper = HasherWrapper(state);
397        self.len.hash(&mut wrapper);
398        self.dtype.hash(&mut wrapper);
399        self.vtable.id().hash(&mut wrapper);
400        self.slots.len().hash(&mut wrapper);
401        for slot in &self.slots {
402            slot.array_hash(&mut wrapper, precision);
403        }
404        self.data.array_hash(&mut wrapper, precision);
405    }
406
407    fn dyn_array_eq(&self, other: &ArrayRef, precision: crate::Precision) -> bool {
408        other
409            .inner()
410            .as_any()
411            .downcast_ref::<Self>()
412            .is_some_and(|other_inner| {
413                self.len == other.len()
414                    && self.dtype == *other.dtype()
415                    && self.vtable.id() == other.encoding_id()
416                    && self.slots.len() == other_inner.slots.len()
417                    && self
418                        .slots
419                        .iter()
420                        .zip(other_inner.slots.iter())
421                        .all(|(slot, other_slot)| slot.array_eq(other_slot, precision))
422                    && self.data.array_eq(&other_inner.data, precision)
423            })
424    }
425
426    fn with_slots(&self, this: ArrayRef, slots: Vec<Option<ArrayRef>>) -> VortexResult<ArrayRef> {
427        let data = self.data.clone();
428        let stats = this.statistics().to_owned();
429        Ok(Array::<V>::try_from_parts(
430            ArrayParts::new(self.vtable.clone(), this.dtype().clone(), this.len(), data)
431                .with_slots(slots),
432        )?
433        .with_stats_set(stats)
434        .into_array())
435    }
436
437    unsafe fn with_slots_unchecked(
438        &self,
439        this: &ArrayRef,
440        slots: Vec<Option<ArrayRef>>,
441    ) -> ArrayRef {
442        // SAFETY: we intentionally skip `V::validate` here. Caller guarantees that the resulting
443        // array is either repaired or not externally observed.
444        let inner = unsafe {
445            ArrayInner::<V>::from_data_unchecked(
446                self.vtable.clone(),
447                this.dtype().clone(),
448                self.len,
449                self.data.clone(),
450                slots,
451                self.stats.clone(),
452            )
453        };
454        ArrayRef::from_inner(std::sync::Arc::new(inner))
455    }
456
457    fn reduce(&self, this: &ArrayRef) -> VortexResult<Option<ArrayRef>> {
458        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
459        let Some(reduced) = V::reduce(view)? else {
460            return Ok(None);
461        };
462        vortex_ensure!(
463            reduced.len() == this.len(),
464            "Reduced array length mismatch from {} to {}",
465            this.encoding_id(),
466            reduced.encoding_id()
467        );
468        vortex_ensure!(
469            reduced.dtype() == this.dtype(),
470            "Reduced array dtype mismatch from {} to {}",
471            this.encoding_id(),
472            reduced.encoding_id()
473        );
474        Ok(Some(reduced))
475    }
476
477    fn reduce_parent(
478        &self,
479        this: &ArrayRef,
480        parent: &ArrayRef,
481        child_idx: usize,
482    ) -> VortexResult<Option<ArrayRef>> {
483        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
484        let Some(reduced) = V::reduce_parent(view, parent, child_idx)? else {
485            return Ok(None);
486        };
487
488        vortex_ensure!(
489            reduced.len() == parent.len(),
490            "Reduced array length mismatch from {} to {}",
491            parent.encoding_id(),
492            reduced.encoding_id()
493        );
494        vortex_ensure!(
495            reduced.dtype() == parent.dtype(),
496            "Reduced array dtype mismatch from {} to {}",
497            parent.encoding_id(),
498            reduced.encoding_id()
499        );
500
501        Ok(Some(reduced))
502    }
503
504    fn execute(&self, this: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
505        let len = this.len();
506        let dtype = this.dtype().clone();
507        let stats = this.statistics().to_array_stats();
508        let result = unsafe { self.execute_unchecked(this, ctx)? };
509
510        if matches!(result.step(), ExecutionStep::Done) {
511            if cfg!(debug_assertions) {
512                vortex_ensure!(
513                    result.array().len() == len,
514                    "Result length mismatch for {:?}",
515                    self.vtable
516                );
517                vortex_ensure!(
518                    result.array().dtype() == &dtype,
519                    "Executed canonical dtype mismatch for {:?}",
520                    self.vtable
521                );
522            }
523
524            result
525                .array()
526                .statistics()
527                .set_iter(crate::stats::StatsSet::from(stats).into_iter());
528        }
529
530        Ok(result)
531    }
532
533    unsafe fn execute_unchecked(
534        &self,
535        this: ArrayRef,
536        ctx: &mut ExecutionCtx,
537    ) -> VortexResult<ExecutionResult> {
538        let typed = Array::<V>::try_from_array_ref(this)
539            .map_err(|_| vortex_err!("Failed to downcast array for execute"))
540            .vortex_expect("Failed to downcast array for execute");
541        V::execute(typed, ctx)
542    }
543
544    fn execute_parent(
545        &self,
546        this: &ArrayRef,
547        parent: &ArrayRef,
548        child_idx: usize,
549        ctx: &mut ExecutionCtx,
550    ) -> VortexResult<Option<ArrayRef>> {
551        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
552        let Some(result) = V::execute_parent(view, parent, child_idx, ctx)? else {
553            return Ok(None);
554        };
555
556        if cfg!(debug_assertions) {
557            vortex_ensure!(
558                result.len() == parent.len(),
559                "Executed parent canonical length mismatch"
560            );
561            vortex_ensure!(
562                result.dtype() == parent.dtype(),
563                "Executed parent canonical dtype mismatch"
564            );
565        }
566
567        Ok(Some(result))
568    }
569
570    fn execute_scalar(
571        &self,
572        this: &ArrayRef,
573        index: usize,
574        ctx: &mut ExecutionCtx,
575    ) -> VortexResult<Scalar> {
576        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
577        <V::OperationsVTable as OperationsVTable<V>>::scalar_at(view, index, ctx)
578    }
579}
580
581/// Wrapper around `&mut dyn Hasher` that implements `Hasher` (and is `Sized`).
582struct HasherWrapper<'a>(&'a mut dyn Hasher);
583
584impl Hasher for HasherWrapper<'_> {
585    fn finish(&self) -> u64 {
586        self.0.finish()
587    }
588
589    fn write(&mut self, bytes: &[u8]) {
590        self.0.write(bytes);
591    }
592}
593
594/// ArrayId is a globally unique name for the array's vtable.
595pub type ArrayId = Id;