Skip to main content

vortex_array/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Debug;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use vortex_buffer::ByteBuffer;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_ensure;
14use vortex_error::vortex_err;
15use vortex_error::vortex_panic;
16use vortex_session::registry::Id;
17
18use crate::ExecutionCtx;
19use crate::buffer::BufferHandle;
20use crate::builders::ArrayBuilder;
21use crate::dtype::DType;
22use crate::dtype::Nullability;
23use crate::executor::ExecutionResult;
24use crate::executor::ExecutionStep;
25use crate::scalar::Scalar;
26use crate::stats::ArrayStats;
27use crate::validity::Validity;
28
29mod erased;
30pub use erased::*;
31
32mod plugin;
33pub use plugin::*;
34
35mod foreign;
36pub(crate) use foreign::*;
37
38mod typed;
39pub use typed::*;
40
41pub mod vtable;
42pub use vtable::*;
43
44mod view;
45pub use view::*;
46
47use crate::hash::ArrayEq;
48use crate::hash::ArrayHash;
49
50/// The public API trait for all Vortex arrays.
51///
52/// This trait is sealed and cannot be implemented outside of `vortex-array`.
53/// Use [`ArrayRef`] as the primary handle for working with arrays.
54#[doc(hidden)]
55pub(crate) trait DynArray: 'static + private::Sealed + Send + Sync + Debug {
56    /// Returns the array as a reference to a generic [`Any`] trait object.
57    fn as_any(&self) -> &dyn Any;
58
59    /// Converts an owned array allocation into an owned [`Any`] allocation for downcasting.
60    fn into_any_arc(self: std::sync::Arc<Self>) -> std::sync::Arc<dyn Any + Send + Sync>;
61
62    /// Returns the length of the array.
63    fn len(&self) -> usize;
64
65    /// Returns the logical Vortex [`DType`] of the array.
66    fn dtype(&self) -> &DType;
67
68    /// Returns the slots of the array.
69    fn slots(&self) -> &[Option<ArrayRef>];
70
71    /// Returns the encoding ID of the array.
72    fn encoding_id(&self) -> ArrayId;
73
74    /// Returns the [`Validity`] of the array.
75    fn validity(&self, this: &ArrayRef) -> VortexResult<Validity>;
76
77    /// Writes the array into the canonical builder.
78    ///
79    /// The [`DType`] of the builder must match that of the array.
80    fn append_to_builder(
81        &self,
82        this: &ArrayRef,
83        builder: &mut dyn ArrayBuilder,
84        ctx: &mut ExecutionCtx,
85    ) -> VortexResult<()>;
86
87    /// Returns the statistics of the array.
88    fn statistics(&self) -> &ArrayStats;
89
90    // --- Visitor methods (formerly in ArrayVisitor) ---
91
92    /// Returns the children of the array.
93    fn children(&self, this: &ArrayRef) -> Vec<ArrayRef>;
94
95    /// Returns the number of children of the array.
96    fn nchildren(&self, this: &ArrayRef) -> usize;
97
98    /// Returns the nth child of the array without allocating a Vec.
99    ///
100    /// Returns `None` if the index is out of bounds.
101    fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option<ArrayRef>;
102
103    /// Returns the names of the children of the array.
104    fn children_names(&self, this: &ArrayRef) -> Vec<String>;
105
106    /// Returns the array's children with their names.
107    fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)>;
108
109    /// Returns the buffers of the array.
110    fn buffers(&self, this: &ArrayRef) -> Vec<ByteBuffer>;
111
112    /// Returns the buffer handles of the array.
113    fn buffer_handles(&self, this: &ArrayRef) -> Vec<BufferHandle>;
114
115    /// Returns the names of the buffers of the array.
116    fn buffer_names(&self, this: &ArrayRef) -> Vec<String>;
117
118    /// Returns the array's buffers with their names.
119    fn named_buffers(&self, this: &ArrayRef) -> Vec<(String, BufferHandle)>;
120
121    /// Returns the number of buffers of the array.
122    fn nbuffers(&self, this: &ArrayRef) -> usize;
123
124    /// Returns the name of the slot at the given index.
125    fn slot_name(&self, this: &ArrayRef, idx: usize) -> String;
126
127    /// Formats a human-readable metadata description.
128    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result;
129
130    /// Hashes the array contents including len, dtype, and encoding id.
131    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: crate::Precision);
132
133    /// Compares two arrays of the same concrete type for equality.
134    fn dyn_array_eq(&self, other: &ArrayRef, precision: crate::Precision) -> bool;
135
136    /// Returns a new array with the given slots.
137    fn with_slots(&self, this: ArrayRef, slots: Vec<Option<ArrayRef>>) -> VortexResult<ArrayRef>;
138
139    /// Attempt to reduce the array to a simpler representation.
140    fn reduce(&self, this: &ArrayRef) -> VortexResult<Option<ArrayRef>>;
141
142    /// Attempt to reduce the parent of this array.
143    fn reduce_parent(
144        &self,
145        this: &ArrayRef,
146        parent: &ArrayRef,
147        child_idx: usize,
148    ) -> VortexResult<Option<ArrayRef>>;
149
150    /// Execute the array by taking a single encoding-specific execution step.
151    fn execute(&self, this: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult>;
152
153    /// Attempt to execute the parent of this array.
154    fn execute_parent(
155        &self,
156        this: &ArrayRef,
157        parent: &ArrayRef,
158        child_idx: usize,
159        ctx: &mut ExecutionCtx,
160    ) -> VortexResult<Option<ArrayRef>>;
161
162    /// Execute the scalar at the given index.
163    ///
164    /// This method panics if the index is out of bounds for the array.
165    fn execute_scalar(
166        &self,
167        this: &ArrayRef,
168        index: usize,
169        ctx: &mut ExecutionCtx,
170    ) -> VortexResult<Scalar>;
171}
172
173/// Trait for converting a type into a Vortex [`ArrayRef`].
174pub trait IntoArray {
175    fn into_array(self) -> ArrayRef;
176}
177
178mod private {
179    use super::*;
180
181    pub trait Sealed {}
182
183    impl<V: VTable> Sealed for ArrayInner<V> {}
184}
185
186// =============================================================================
187// New path: DynArray and supporting trait impls for ArrayInner<V>
188// =============================================================================
189
190/// DynArray implementation for [`ArrayInner<V>`].
191///
192/// This is self-contained: identity methods use `ArrayInner<V>`'s own fields (dtype, len, stats),
193/// while data-access methods delegate to VTable methods on the inner `V::ArrayData`.
194impl<V: VTable> DynArray for ArrayInner<V> {
195    fn as_any(&self) -> &dyn Any {
196        self
197    }
198
199    fn into_any_arc(self: std::sync::Arc<Self>) -> std::sync::Arc<dyn Any + Send + Sync> {
200        self
201    }
202
203    fn len(&self) -> usize {
204        self.len
205    }
206
207    fn dtype(&self) -> &DType {
208        &self.dtype
209    }
210
211    fn slots(&self) -> &[Option<ArrayRef>] {
212        &self.slots
213    }
214
215    fn encoding_id(&self) -> ArrayId {
216        self.vtable.id()
217    }
218
219    fn validity(&self, this: &ArrayRef) -> VortexResult<Validity> {
220        if self.dtype.is_nullable() {
221            let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
222            let validity = <V::ValidityVTable as ValidityVTable<V>>::validity(view)?;
223            if let Validity::Array(array) = &validity {
224                vortex_ensure!(array.len() == self.len, "Validity array length mismatch");
225                vortex_ensure!(
226                    matches!(array.dtype(), DType::Bool(Nullability::NonNullable)),
227                    "Validity array is not non-nullable boolean: {}",
228                    self.vtable.id(),
229                );
230            }
231            Ok(validity)
232        } else {
233            Ok(Validity::NonNullable)
234        }
235    }
236
237    fn append_to_builder(
238        &self,
239        this: &ArrayRef,
240        builder: &mut dyn ArrayBuilder,
241        ctx: &mut ExecutionCtx,
242    ) -> VortexResult<()> {
243        if builder.dtype() != &self.dtype {
244            vortex_panic!(
245                "Builder dtype mismatch: expected {}, got {}",
246                self.dtype,
247                builder.dtype(),
248            );
249        }
250        let len = builder.len();
251
252        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
253        V::append_to_builder(view, builder, ctx)?;
254
255        assert_eq!(
256            len + self.len,
257            builder.len(),
258            "Builder length mismatch after writing array for encoding {}",
259            self.vtable.id(),
260        );
261        Ok(())
262    }
263
264    fn statistics(&self) -> &ArrayStats {
265        &self.stats
266    }
267
268    fn children(&self, this: &ArrayRef) -> Vec<ArrayRef> {
269        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
270        (0..V::nchildren(view)).map(|i| V::child(view, i)).collect()
271    }
272
273    fn nchildren(&self, this: &ArrayRef) -> usize {
274        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
275        V::nchildren(view)
276    }
277
278    fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option<ArrayRef> {
279        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
280        (idx < V::nchildren(view)).then(|| V::child(view, idx))
281    }
282
283    fn children_names(&self, this: &ArrayRef) -> Vec<String> {
284        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
285        (0..V::nchildren(view))
286            .map(|i| V::child_name(view, i))
287            .collect()
288    }
289
290    fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)> {
291        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
292        (0..V::nchildren(view))
293            .map(|i| (V::child_name(view, i), V::child(view, i)))
294            .collect()
295    }
296
297    fn buffers(&self, this: &ArrayRef) -> Vec<ByteBuffer> {
298        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
299        (0..V::nbuffers(view))
300            .map(|i| V::buffer(view, i).to_host_sync())
301            .collect()
302    }
303
304    fn buffer_handles(&self, this: &ArrayRef) -> Vec<BufferHandle> {
305        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
306        (0..V::nbuffers(view)).map(|i| V::buffer(view, i)).collect()
307    }
308
309    fn buffer_names(&self, this: &ArrayRef) -> Vec<String> {
310        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
311        (0..V::nbuffers(view))
312            .filter_map(|i| V::buffer_name(view, i))
313            .collect()
314    }
315
316    fn named_buffers(&self, this: &ArrayRef) -> Vec<(String, BufferHandle)> {
317        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
318        (0..V::nbuffers(view))
319            .filter_map(|i| V::buffer_name(view, i).map(|name| (name, V::buffer(view, i))))
320            .collect()
321    }
322
323    fn nbuffers(&self, this: &ArrayRef) -> usize {
324        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
325        V::nbuffers(view)
326    }
327
328    fn slot_name(&self, this: &ArrayRef, idx: usize) -> String {
329        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
330        V::slot_name(view, idx)
331    }
332
333    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
334        std::fmt::Display::fmt(&self.data, f)
335    }
336
337    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: crate::Precision) {
338        let mut wrapper = HasherWrapper(state);
339        self.len.hash(&mut wrapper);
340        self.dtype.hash(&mut wrapper);
341        self.vtable.id().hash(&mut wrapper);
342        self.slots.len().hash(&mut wrapper);
343        for slot in &self.slots {
344            slot.array_hash(&mut wrapper, precision);
345        }
346        self.data.array_hash(&mut wrapper, precision);
347    }
348
349    fn dyn_array_eq(&self, other: &ArrayRef, precision: crate::Precision) -> bool {
350        other
351            .inner()
352            .as_any()
353            .downcast_ref::<Self>()
354            .is_some_and(|other_inner| {
355                self.len == other.len()
356                    && self.dtype == *other.dtype()
357                    && self.vtable.id() == other.encoding_id()
358                    && self.slots.len() == other_inner.slots.len()
359                    && self
360                        .slots
361                        .iter()
362                        .zip(other_inner.slots.iter())
363                        .all(|(slot, other_slot)| slot.array_eq(other_slot, precision))
364                    && self.data.array_eq(&other_inner.data, precision)
365            })
366    }
367
368    fn with_slots(&self, this: ArrayRef, slots: Vec<Option<ArrayRef>>) -> VortexResult<ArrayRef> {
369        let data = self.data.clone();
370        let stats = this.statistics().to_owned();
371        Ok(Array::<V>::try_from_parts(
372            ArrayParts::new(self.vtable.clone(), this.dtype().clone(), this.len(), data)
373                .with_slots(slots),
374        )?
375        .with_stats_set(stats)
376        .into_array())
377    }
378
379    fn reduce(&self, this: &ArrayRef) -> VortexResult<Option<ArrayRef>> {
380        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
381        let Some(reduced) = V::reduce(view)? else {
382            return Ok(None);
383        };
384        vortex_ensure!(
385            reduced.len() == this.len(),
386            "Reduced array length mismatch from {} to {}",
387            this.encoding_id(),
388            reduced.encoding_id()
389        );
390        vortex_ensure!(
391            reduced.dtype() == this.dtype(),
392            "Reduced array dtype mismatch from {} to {}",
393            this.encoding_id(),
394            reduced.encoding_id()
395        );
396        Ok(Some(reduced))
397    }
398
399    fn reduce_parent(
400        &self,
401        this: &ArrayRef,
402        parent: &ArrayRef,
403        child_idx: usize,
404    ) -> VortexResult<Option<ArrayRef>> {
405        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
406        let Some(reduced) = V::reduce_parent(view, parent, child_idx)? else {
407            return Ok(None);
408        };
409
410        vortex_ensure!(
411            reduced.len() == parent.len(),
412            "Reduced array length mismatch from {} to {}",
413            parent.encoding_id(),
414            reduced.encoding_id()
415        );
416        vortex_ensure!(
417            reduced.dtype() == parent.dtype(),
418            "Reduced array dtype mismatch from {} to {}",
419            parent.encoding_id(),
420            reduced.encoding_id()
421        );
422
423        Ok(Some(reduced))
424    }
425
426    fn execute(&self, this: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
427        let len = this.len();
428        let dtype = this.dtype().clone();
429        let stats = this.statistics().to_owned();
430
431        let typed = Array::<V>::try_from_array_ref(this)
432            .map_err(|_| vortex_err!("Failed to downcast array for execute"))
433            .vortex_expect("Failed to downcast array for execute");
434        let result = V::execute(typed, ctx)?;
435
436        if matches!(result.step(), ExecutionStep::Done) {
437            if cfg!(debug_assertions) {
438                vortex_ensure!(
439                    result.array().len() == len,
440                    "Result length mismatch for {:?}",
441                    self.vtable
442                );
443                vortex_ensure!(
444                    result.array().dtype() == &dtype,
445                    "Executed canonical dtype mismatch for {:?}",
446                    self.vtable
447                );
448            }
449
450            result.array().statistics().set_iter(stats.into_iter());
451        }
452
453        Ok(result)
454    }
455
456    fn execute_parent(
457        &self,
458        this: &ArrayRef,
459        parent: &ArrayRef,
460        child_idx: usize,
461        ctx: &mut ExecutionCtx,
462    ) -> VortexResult<Option<ArrayRef>> {
463        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
464        let Some(result) = V::execute_parent(view, parent, child_idx, ctx)? else {
465            return Ok(None);
466        };
467
468        if cfg!(debug_assertions) {
469            vortex_ensure!(
470                result.len() == parent.len(),
471                "Executed parent canonical length mismatch"
472            );
473            vortex_ensure!(
474                result.dtype() == parent.dtype(),
475                "Executed parent canonical dtype mismatch"
476            );
477        }
478
479        Ok(Some(result))
480    }
481
482    fn execute_scalar(
483        &self,
484        this: &ArrayRef,
485        index: usize,
486        ctx: &mut ExecutionCtx,
487    ) -> VortexResult<Scalar> {
488        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
489        <V::OperationsVTable as OperationsVTable<V>>::scalar_at(view, index, ctx)
490    }
491}
492
493/// Wrapper around `&mut dyn Hasher` that implements `Hasher` (and is `Sized`).
494struct HasherWrapper<'a>(&'a mut dyn Hasher);
495
496impl Hasher for HasherWrapper<'_> {
497    fn finish(&self) -> u64 {
498        self.0.finish()
499    }
500
501    fn write(&mut self, bytes: &[u8]) {
502        self.0.write(bytes);
503    }
504}
505
506/// ArrayId is a globally unique name for the array's vtable.
507pub type ArrayId = Id;