Skip to main content

vortex_array/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::fmt::Debug;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use arcref::ArcRef;
11use vortex_buffer::ByteBuffer;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_ensure;
15use vortex_error::vortex_err;
16use vortex_error::vortex_panic;
17use vortex_session::VortexSession;
18
19use crate::ExecutionCtx;
20use crate::LEGACY_SESSION;
21use crate::VortexSessionExecute;
22use crate::buffer::BufferHandle;
23use crate::builders::ArrayBuilder;
24use crate::dtype::DType;
25use crate::dtype::Nullability;
26use crate::executor::ExecutionResult;
27use crate::executor::ExecutionStep;
28use crate::scalar::Scalar;
29use crate::stats::ArrayStats;
30use crate::validity::Validity;
31
32mod erased;
33pub use erased::*;
34
35mod plugin;
36pub use plugin::*;
37
38mod foreign;
39pub(crate) use foreign::*;
40
41mod typed;
42pub use typed::*;
43
44pub mod vtable;
45pub use vtable::*;
46
47mod view;
48pub use view::*;
49
50use crate::hash::ArrayEq;
51use crate::hash::ArrayHash;
52
53/// The public API trait for all Vortex arrays.
54///
55/// This trait is sealed and cannot be implemented outside of `vortex-array`.
56/// Use [`ArrayRef`] as the primary handle for working with arrays.
57#[doc(hidden)]
58pub(crate) trait DynArray: 'static + private::Sealed + Send + Sync + Debug {
59    /// Returns the array as a reference to a generic [`Any`] trait object.
60    fn as_any(&self) -> &dyn Any;
61
62    /// Converts an owned array allocation into an owned [`Any`] allocation for downcasting.
63    fn into_any_arc(self: std::sync::Arc<Self>) -> std::sync::Arc<dyn Any + Send + Sync>;
64
65    /// Returns the length of the array.
66    fn len(&self) -> usize;
67
68    /// Returns the logical Vortex [`DType`] of the array.
69    fn dtype(&self) -> &DType;
70
71    /// Returns the slots of the array.
72    fn slots(&self) -> &[Option<ArrayRef>];
73
74    /// Returns the encoding ID of the array.
75    fn encoding_id(&self) -> ArrayId;
76
77    /// Fetch the scalar at the given index.
78    ///
79    /// This method panics if the index is out of bounds for the array.
80    fn scalar_at(&self, this: &ArrayRef, index: usize) -> VortexResult<Scalar>;
81
82    /// Returns the [`Validity`] of the array.
83    fn validity(&self, this: &ArrayRef) -> VortexResult<Validity>;
84
85    /// Writes the array into the canonical builder.
86    ///
87    /// The [`DType`] of the builder must match that of the array.
88    fn append_to_builder(
89        &self,
90        this: &ArrayRef,
91        builder: &mut dyn ArrayBuilder,
92        ctx: &mut ExecutionCtx,
93    ) -> VortexResult<()>;
94
95    /// Returns the statistics of the array.
96    fn statistics(&self) -> &ArrayStats;
97
98    // --- Visitor methods (formerly in ArrayVisitor) ---
99
100    /// Returns the children of the array.
101    fn children(&self, this: &ArrayRef) -> Vec<ArrayRef>;
102
103    /// Returns the number of children of the array.
104    fn nchildren(&self, this: &ArrayRef) -> usize;
105
106    /// Returns the nth child of the array without allocating a Vec.
107    ///
108    /// Returns `None` if the index is out of bounds.
109    fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option<ArrayRef>;
110
111    /// Returns the names of the children of the array.
112    fn children_names(&self, this: &ArrayRef) -> Vec<String>;
113
114    /// Returns the array's children with their names.
115    fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)>;
116
117    /// Returns the buffers of the array.
118    fn buffers(&self, this: &ArrayRef) -> Vec<ByteBuffer>;
119
120    /// Returns the buffer handles of the array.
121    fn buffer_handles(&self, this: &ArrayRef) -> Vec<BufferHandle>;
122
123    /// Returns the names of the buffers of the array.
124    fn buffer_names(&self, this: &ArrayRef) -> Vec<String>;
125
126    /// Returns the array's buffers with their names.
127    fn named_buffers(&self, this: &ArrayRef) -> Vec<(String, BufferHandle)>;
128
129    /// Returns the number of buffers of the array.
130    fn nbuffers(&self, this: &ArrayRef) -> usize;
131
132    /// Returns the name of the slot at the given index.
133    fn slot_name(&self, this: &ArrayRef, idx: usize) -> String;
134
135    /// Returns the serialized metadata of the array, or `None` if the array does not
136    /// support serialization.
137    fn metadata(&self, this: &ArrayRef, session: &VortexSession) -> VortexResult<Option<Vec<u8>>>;
138
139    /// Formats a human-readable metadata description.
140    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result;
141
142    /// Hashes the array contents including len, dtype, and encoding id.
143    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: crate::Precision);
144
145    /// Compares two arrays of the same concrete type for equality.
146    fn dyn_array_eq(&self, other: &ArrayRef, precision: crate::Precision) -> bool;
147
148    /// Returns a new array with the given slots.
149    fn with_slots(&self, this: ArrayRef, slots: Vec<Option<ArrayRef>>) -> VortexResult<ArrayRef>;
150
151    /// Attempt to reduce the array to a simpler representation.
152    fn reduce(&self, this: &ArrayRef) -> VortexResult<Option<ArrayRef>>;
153
154    /// Attempt to reduce the parent of this array.
155    fn reduce_parent(
156        &self,
157        this: &ArrayRef,
158        parent: &ArrayRef,
159        child_idx: usize,
160    ) -> VortexResult<Option<ArrayRef>>;
161
162    /// Execute the array by taking a single encoding-specific execution step.
163    fn execute(&self, this: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult>;
164
165    /// Attempt to execute the parent of this array.
166    fn execute_parent(
167        &self,
168        this: &ArrayRef,
169        parent: &ArrayRef,
170        child_idx: usize,
171        ctx: &mut ExecutionCtx,
172    ) -> VortexResult<Option<ArrayRef>>;
173}
174
175/// Trait for converting a type into a Vortex [`ArrayRef`].
176pub trait IntoArray {
177    fn into_array(self) -> ArrayRef;
178}
179
180mod private {
181    use super::*;
182
183    pub trait Sealed {}
184
185    impl<V: VTable> Sealed for ArrayInner<V> {}
186}
187
188// =============================================================================
189// New path: DynArray and supporting trait impls for ArrayInner<V>
190// =============================================================================
191
192/// DynArray implementation for [`ArrayInner<V>`].
193///
194/// This is self-contained: identity methods use `ArrayInner<V>`'s own fields (dtype, len, stats),
195/// while data-access methods delegate to VTable methods on the inner `V::ArrayData`.
196impl<V: VTable> DynArray for ArrayInner<V> {
197    fn as_any(&self) -> &dyn Any {
198        self
199    }
200
201    fn into_any_arc(self: std::sync::Arc<Self>) -> std::sync::Arc<dyn Any + Send + Sync> {
202        self
203    }
204
205    fn len(&self) -> usize {
206        self.len
207    }
208
209    fn dtype(&self) -> &DType {
210        &self.dtype
211    }
212
213    fn slots(&self) -> &[Option<ArrayRef>] {
214        &self.slots
215    }
216
217    fn encoding_id(&self) -> ArrayId {
218        self.vtable.id()
219    }
220
221    fn scalar_at(&self, this: &ArrayRef, index: usize) -> VortexResult<Scalar> {
222        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
223        <V::OperationsVTable as OperationsVTable<V>>::scalar_at(
224            view,
225            index,
226            &mut LEGACY_SESSION.create_execution_ctx(),
227        )
228    }
229
230    fn validity(&self, this: &ArrayRef) -> VortexResult<Validity> {
231        if self.dtype.is_nullable() {
232            let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
233            let validity = <V::ValidityVTable as ValidityVTable<V>>::validity(view)?;
234            if let Validity::Array(array) = &validity {
235                vortex_ensure!(array.len() == self.len, "Validity array length mismatch");
236                vortex_ensure!(
237                    matches!(array.dtype(), DType::Bool(Nullability::NonNullable)),
238                    "Validity array is not non-nullable boolean: {}",
239                    self.vtable.id(),
240                );
241            }
242            Ok(validity)
243        } else {
244            Ok(Validity::NonNullable)
245        }
246    }
247
248    fn append_to_builder(
249        &self,
250        this: &ArrayRef,
251        builder: &mut dyn ArrayBuilder,
252        ctx: &mut ExecutionCtx,
253    ) -> VortexResult<()> {
254        if builder.dtype() != &self.dtype {
255            vortex_panic!(
256                "Builder dtype mismatch: expected {}, got {}",
257                self.dtype,
258                builder.dtype(),
259            );
260        }
261        let len = builder.len();
262
263        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
264        V::append_to_builder(view, builder, ctx)?;
265
266        assert_eq!(
267            len + self.len,
268            builder.len(),
269            "Builder length mismatch after writing array for encoding {}",
270            self.vtable.id(),
271        );
272        Ok(())
273    }
274
275    fn statistics(&self) -> &ArrayStats {
276        &self.stats
277    }
278
279    fn children(&self, this: &ArrayRef) -> Vec<ArrayRef> {
280        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
281        (0..V::nchildren(view)).map(|i| V::child(view, i)).collect()
282    }
283
284    fn nchildren(&self, this: &ArrayRef) -> usize {
285        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
286        V::nchildren(view)
287    }
288
289    fn nth_child(&self, this: &ArrayRef, idx: usize) -> Option<ArrayRef> {
290        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
291        (idx < V::nchildren(view)).then(|| V::child(view, idx))
292    }
293
294    fn children_names(&self, this: &ArrayRef) -> Vec<String> {
295        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
296        (0..V::nchildren(view))
297            .map(|i| V::child_name(view, i))
298            .collect()
299    }
300
301    fn named_children(&self, this: &ArrayRef) -> Vec<(String, ArrayRef)> {
302        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
303        (0..V::nchildren(view))
304            .map(|i| (V::child_name(view, i), V::child(view, i)))
305            .collect()
306    }
307
308    fn buffers(&self, this: &ArrayRef) -> Vec<ByteBuffer> {
309        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
310        (0..V::nbuffers(view))
311            .map(|i| V::buffer(view, i).to_host_sync())
312            .collect()
313    }
314
315    fn buffer_handles(&self, this: &ArrayRef) -> Vec<BufferHandle> {
316        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
317        (0..V::nbuffers(view)).map(|i| V::buffer(view, i)).collect()
318    }
319
320    fn buffer_names(&self, this: &ArrayRef) -> Vec<String> {
321        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
322        (0..V::nbuffers(view))
323            .filter_map(|i| V::buffer_name(view, i))
324            .collect()
325    }
326
327    fn named_buffers(&self, this: &ArrayRef) -> Vec<(String, BufferHandle)> {
328        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
329        (0..V::nbuffers(view))
330            .filter_map(|i| V::buffer_name(view, i).map(|name| (name, V::buffer(view, i))))
331            .collect()
332    }
333
334    fn nbuffers(&self, this: &ArrayRef) -> usize {
335        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
336        V::nbuffers(view)
337    }
338
339    fn slot_name(&self, this: &ArrayRef, idx: usize) -> String {
340        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
341        V::slot_name(view, idx)
342    }
343
344    fn metadata(&self, this: &ArrayRef, session: &VortexSession) -> VortexResult<Option<Vec<u8>>> {
345        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
346        V::serialize(view, session)
347    }
348
349    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
350        std::fmt::Display::fmt(&self.data, f)
351    }
352
353    fn dyn_array_hash(&self, state: &mut dyn Hasher, precision: crate::Precision) {
354        let mut wrapper = HasherWrapper(state);
355        self.len.hash(&mut wrapper);
356        self.dtype.hash(&mut wrapper);
357        self.vtable.id().hash(&mut wrapper);
358        self.slots.len().hash(&mut wrapper);
359        for slot in &self.slots {
360            slot.array_hash(&mut wrapper, precision);
361        }
362        self.data.array_hash(&mut wrapper, precision);
363    }
364
365    fn dyn_array_eq(&self, other: &ArrayRef, precision: crate::Precision) -> bool {
366        other
367            .inner()
368            .as_any()
369            .downcast_ref::<Self>()
370            .is_some_and(|other_inner| {
371                self.len == other.len()
372                    && self.dtype == *other.dtype()
373                    && self.vtable.id() == other.encoding_id()
374                    && self.slots.len() == other_inner.slots.len()
375                    && self
376                        .slots
377                        .iter()
378                        .zip(other_inner.slots.iter())
379                        .all(|(slot, other_slot)| slot.array_eq(other_slot, precision))
380                    && self.data.array_eq(&other_inner.data, precision)
381            })
382    }
383
384    fn with_slots(&self, this: ArrayRef, slots: Vec<Option<ArrayRef>>) -> VortexResult<ArrayRef> {
385        let data = self.data.clone();
386        let stats = this.statistics().to_owned();
387        Ok(Array::<V>::try_from_parts(
388            ArrayParts::new(self.vtable.clone(), this.dtype().clone(), this.len(), data)
389                .with_slots(slots),
390        )?
391        .with_stats_set(stats)
392        .into_array())
393    }
394
395    fn reduce(&self, this: &ArrayRef) -> VortexResult<Option<ArrayRef>> {
396        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
397        let Some(reduced) = V::reduce(view)? else {
398            return Ok(None);
399        };
400        vortex_ensure!(
401            reduced.len() == this.len(),
402            "Reduced array length mismatch from {} to {}",
403            this.encoding_id(),
404            reduced.encoding_id()
405        );
406        vortex_ensure!(
407            reduced.dtype() == this.dtype(),
408            "Reduced array dtype mismatch from {} to {}",
409            this.encoding_id(),
410            reduced.encoding_id()
411        );
412        Ok(Some(reduced))
413    }
414
415    fn reduce_parent(
416        &self,
417        this: &ArrayRef,
418        parent: &ArrayRef,
419        child_idx: usize,
420    ) -> VortexResult<Option<ArrayRef>> {
421        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
422        let Some(reduced) = V::reduce_parent(view, parent, child_idx)? else {
423            return Ok(None);
424        };
425
426        vortex_ensure!(
427            reduced.len() == parent.len(),
428            "Reduced array length mismatch from {} to {}",
429            parent.encoding_id(),
430            reduced.encoding_id()
431        );
432        vortex_ensure!(
433            reduced.dtype() == parent.dtype(),
434            "Reduced array dtype mismatch from {} to {}",
435            parent.encoding_id(),
436            reduced.encoding_id()
437        );
438
439        Ok(Some(reduced))
440    }
441
442    fn execute(&self, this: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
443        let len = this.len();
444        let dtype = this.dtype().clone();
445        let stats = this.statistics().to_owned();
446
447        let typed = Array::<V>::try_from_array_ref(this)
448            .map_err(|_| vortex_err!("Failed to downcast array for execute"))
449            .vortex_expect("Failed to downcast array for execute");
450        let result = V::execute(typed, ctx)?;
451
452        if matches!(result.step(), ExecutionStep::Done) {
453            if cfg!(debug_assertions) {
454                vortex_ensure!(
455                    result.array().len() == len,
456                    "Result length mismatch for {:?}",
457                    self.vtable
458                );
459                vortex_ensure!(
460                    result.array().dtype() == &dtype,
461                    "Executed canonical dtype mismatch for {:?}",
462                    self.vtable
463                );
464            }
465
466            result.array().statistics().set_iter(stats.into_iter());
467        }
468
469        Ok(result)
470    }
471
472    fn execute_parent(
473        &self,
474        this: &ArrayRef,
475        parent: &ArrayRef,
476        child_idx: usize,
477        ctx: &mut ExecutionCtx,
478    ) -> VortexResult<Option<ArrayRef>> {
479        let view = unsafe { ArrayView::new_unchecked(this, &self.data) };
480        let Some(result) = V::execute_parent(view, parent, child_idx, ctx)? else {
481            return Ok(None);
482        };
483
484        if cfg!(debug_assertions) {
485            vortex_ensure!(
486                result.len() == parent.len(),
487                "Executed parent canonical length mismatch"
488            );
489            vortex_ensure!(
490                result.dtype() == parent.dtype(),
491                "Executed parent canonical dtype mismatch"
492            );
493        }
494
495        Ok(Some(result))
496    }
497}
498
499/// Wrapper around `&mut dyn Hasher` that implements `Hasher` (and is `Sized`).
500struct HasherWrapper<'a>(&'a mut dyn Hasher);
501
502impl Hasher for HasherWrapper<'_> {
503    fn finish(&self) -> u64 {
504        self.0.finish()
505    }
506
507    fn write(&mut self, bytes: &[u8]) {
508        self.0.write(bytes);
509    }
510}
511
512/// ArrayId is a globally unique name for the array's vtable.
513pub type ArrayId = ArcRef<str>;