Skip to main content

vortex_array/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod visitor;
5
6use std::any::Any;
7use std::fmt::Debug;
8use std::fmt::Formatter;
9use std::hash::Hash;
10use std::hash::Hasher;
11use std::ops::Deref;
12use std::ops::Range;
13use std::sync::Arc;
14
15pub use visitor::*;
16use vortex_buffer::ByteBuffer;
17use vortex_error::VortexExpect;
18use vortex_error::VortexResult;
19use vortex_error::vortex_ensure;
20use vortex_error::vortex_err;
21use vortex_error::vortex_panic;
22use vortex_mask::Mask;
23
24use crate::AnyCanonical;
25use crate::ArrayEq;
26use crate::ArrayHash;
27use crate::Canonical;
28use crate::DynArrayEq;
29use crate::DynArrayHash;
30use crate::ExecutionCtx;
31use crate::LEGACY_SESSION;
32use crate::ToCanonical;
33use crate::VortexSessionExecute;
34use crate::aggregate_fn::fns::sum::sum;
35use crate::arrays::Bool;
36use crate::arrays::Constant;
37use crate::arrays::DictArray;
38use crate::arrays::FilterArray;
39use crate::arrays::Null;
40use crate::arrays::Primitive;
41use crate::arrays::ScalarFnVTable;
42use crate::arrays::SliceArray;
43use crate::arrays::VarBin;
44use crate::arrays::VarBinView;
45use crate::buffer::BufferHandle;
46use crate::builders::ArrayBuilder;
47use crate::dtype::DType;
48use crate::dtype::Nullability;
49use crate::expr::stats::Precision;
50use crate::expr::stats::Stat;
51use crate::expr::stats::StatsProviderExt;
52use crate::hash;
53use crate::matcher::Matcher;
54use crate::optimizer::ArrayOptimizer;
55use crate::scalar::Scalar;
56use crate::scalar_fn::ReduceNode;
57use crate::scalar_fn::ReduceNodeRef;
58use crate::scalar_fn::ScalarFnRef;
59use crate::stats::StatsSetRef;
60use crate::validity::Validity;
61use crate::vtable::Array;
62use crate::vtable::ArrayId;
63use crate::vtable::DynVTable;
64use crate::vtable::OperationsVTable;
65use crate::vtable::VTable;
66use crate::vtable::ValidityVTable;
67
68/// The public API trait for all Vortex arrays.
69pub trait DynArray:
70    'static
71    + private::Sealed
72    + Send
73    + Sync
74    + Debug
75    + DynArrayEq
76    + DynArrayHash
77    + ArrayVisitor
78    + ReduceNode
79{
80    /// Returns the array as a reference to a generic [`Any`] trait object.
81    fn as_any(&self) -> &dyn Any;
82
83    /// Returns the array as an `Arc<dyn Any + Send + Sync>`.
84    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
85
86    /// Returns the array as an [`ArrayRef`].
87    fn to_array(&self) -> ArrayRef;
88
89    /// Returns the length of the array.
90    fn len(&self) -> usize;
91
92    /// Returns whether the array is empty (has zero rows).
93    fn is_empty(&self) -> bool {
94        self.len() == 0
95    }
96
97    /// Returns the logical Vortex [`DType`] of the array.
98    fn dtype(&self) -> &DType;
99
100    /// Returns the vtable of the array.
101    fn vtable(&self) -> &dyn DynVTable;
102
103    /// Returns the encoding ID of the array.
104    fn encoding_id(&self) -> ArrayId;
105
106    /// Performs a constant-time slice of the array.
107    fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef>;
108
109    /// Wraps the array in a [`FilterArray`] such that it is logically filtered by the given mask.
110    fn filter(&self, mask: Mask) -> VortexResult<ArrayRef>;
111
112    /// Wraps the array in a [`DictArray`] such that it is logically taken by the given indices.
113    fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef>;
114
115    /// Fetch the scalar at the given index.
116    ///
117    /// This method panics if the index is out of bounds for the array.
118    fn scalar_at(&self, index: usize) -> VortexResult<Scalar>;
119
120    /// Returns whether the item at `index` is valid.
121    fn is_valid(&self, index: usize) -> VortexResult<bool>;
122
123    /// Returns whether the item at `index` is invalid.
124    fn is_invalid(&self, index: usize) -> VortexResult<bool>;
125
126    /// Returns whether all items in the array are valid.
127    ///
128    /// This is usually cheaper than computing a precise `valid_count`, but may return false
129    /// negatives.
130    fn all_valid(&self) -> VortexResult<bool>;
131
132    /// Returns whether the array is all invalid.
133    ///
134    /// This is usually cheaper than computing a precise `invalid_count`, but may return false
135    /// negatives.
136    fn all_invalid(&self) -> VortexResult<bool>;
137
138    /// Returns the number of valid elements in the array.
139    fn valid_count(&self) -> VortexResult<usize>;
140
141    /// Returns the number of invalid elements in the array.
142    fn invalid_count(&self) -> VortexResult<usize>;
143
144    /// Returns the [`Validity`] of the array.
145    fn validity(&self) -> VortexResult<Validity>;
146
147    /// Returns the canonical validity mask for the array.
148    fn validity_mask(&self) -> VortexResult<Mask>;
149
150    /// Returns the canonical representation of the array.
151    fn to_canonical(&self) -> VortexResult<Canonical>;
152
153    /// Writes the array into the canonical builder.
154    ///
155    /// The [`DType`] of the builder must match that of the array.
156    fn append_to_builder(
157        &self,
158        builder: &mut dyn ArrayBuilder,
159        ctx: &mut ExecutionCtx,
160    ) -> VortexResult<()>;
161
162    /// Returns the statistics of the array.
163    // TODO(ngates): change how this works. It's weird.
164    fn statistics(&self) -> StatsSetRef<'_>;
165
166    /// Replaces the children of the array with the given array references.
167    fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef>;
168}
169
170impl DynArray for Arc<dyn DynArray> {
171    #[inline]
172    fn as_any(&self) -> &dyn Any {
173        DynArray::as_any(self.as_ref())
174    }
175
176    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
177        self
178    }
179
180    #[inline]
181    fn to_array(&self) -> ArrayRef {
182        self.clone()
183    }
184
185    #[inline]
186    fn len(&self) -> usize {
187        self.as_ref().len()
188    }
189
190    #[inline]
191    fn dtype(&self) -> &DType {
192        self.as_ref().dtype()
193    }
194
195    fn vtable(&self) -> &dyn DynVTable {
196        self.as_ref().vtable()
197    }
198
199    #[inline]
200    fn encoding_id(&self) -> ArrayId {
201        self.as_ref().encoding_id()
202    }
203
204    #[inline]
205    fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef> {
206        self.as_ref().slice(range)
207    }
208
209    fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
210        self.as_ref().filter(mask)
211    }
212
213    fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
214        self.as_ref().take(indices)
215    }
216
217    #[inline]
218    fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
219        self.as_ref().scalar_at(index)
220    }
221
222    #[inline]
223    fn is_valid(&self, index: usize) -> VortexResult<bool> {
224        self.as_ref().is_valid(index)
225    }
226
227    #[inline]
228    fn is_invalid(&self, index: usize) -> VortexResult<bool> {
229        self.as_ref().is_invalid(index)
230    }
231
232    #[inline]
233    fn all_valid(&self) -> VortexResult<bool> {
234        self.as_ref().all_valid()
235    }
236
237    #[inline]
238    fn all_invalid(&self) -> VortexResult<bool> {
239        self.as_ref().all_invalid()
240    }
241
242    #[inline]
243    fn valid_count(&self) -> VortexResult<usize> {
244        self.as_ref().valid_count()
245    }
246
247    #[inline]
248    fn invalid_count(&self) -> VortexResult<usize> {
249        self.as_ref().invalid_count()
250    }
251
252    #[inline]
253    fn validity(&self) -> VortexResult<Validity> {
254        self.as_ref().validity()
255    }
256
257    #[inline]
258    fn validity_mask(&self) -> VortexResult<Mask> {
259        self.as_ref().validity_mask()
260    }
261
262    fn to_canonical(&self) -> VortexResult<Canonical> {
263        self.as_ref().to_canonical()
264    }
265
266    fn append_to_builder(
267        &self,
268        builder: &mut dyn ArrayBuilder,
269        ctx: &mut ExecutionCtx,
270    ) -> VortexResult<()> {
271        self.as_ref().append_to_builder(builder, ctx)
272    }
273
274    fn statistics(&self) -> StatsSetRef<'_> {
275        self.as_ref().statistics()
276    }
277
278    fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef> {
279        self.as_ref().with_children(children)
280    }
281}
282
283/// A reference counted pointer to a dynamic [`DynArray`] trait object.
284pub type ArrayRef = Arc<dyn DynArray>;
285
286impl ToOwned for dyn DynArray {
287    type Owned = ArrayRef;
288
289    fn to_owned(&self) -> Self::Owned {
290        self.to_array()
291    }
292}
293
294impl dyn DynArray + '_ {
295    /// Does the array match the given matcher.
296    pub fn is<M: Matcher>(&self) -> bool {
297        M::matches(self)
298    }
299
300    /// Returns the array downcast by the given matcher.
301    pub fn as_<M: Matcher>(&self) -> M::Match<'_> {
302        self.as_opt::<M>().vortex_expect("Failed to downcast")
303    }
304
305    /// Returns the array downcast by the given matcher.
306    pub fn as_opt<M: Matcher>(&self) -> Option<M::Match<'_>> {
307        M::try_match(self)
308    }
309
310    /// Returns the array downcast to the given `V::Array` as an owned object.
311    pub fn try_into<V: VTable>(self: Arc<Self>) -> Result<V::Array, Arc<Self>> {
312        if !self.is::<V>() {
313            return Err(self);
314        }
315        let any_arc = self.as_any_arc();
316        let typed: Arc<Array<V>> = any_arc
317            .downcast::<Array<V>>()
318            .map_err(|_| vortex_err!("failed to downcast"))
319            .vortex_expect("Failed to downcast");
320        Ok(match Arc::try_unwrap(typed) {
321            Ok(array) => array.into_inner(),
322            Err(arc) => arc.deref().inner().clone(),
323        })
324    }
325
326    pub fn as_constant(&self) -> Option<Scalar> {
327        self.as_opt::<Constant>().map(|a| a.scalar().clone())
328    }
329
330    /// Total size of the array in bytes, including all children and buffers.
331    pub fn nbytes(&self) -> u64 {
332        let mut nbytes = 0;
333        for array in self.depth_first_traversal() {
334            for buffer in array.buffers() {
335                nbytes += buffer.len() as u64;
336            }
337        }
338        nbytes
339    }
340
341    /// Returns whether this array is an arrow encoding.
342    pub fn is_arrow(&self) -> bool {
343        self.is::<Null>()
344            || self.is::<Bool>()
345            || self.is::<Primitive>()
346            || self.is::<VarBin>()
347            || self.is::<VarBinView>()
348    }
349
350    /// Whether the array is of a canonical encoding.
351    pub fn is_canonical(&self) -> bool {
352        self.is::<AnyCanonical>()
353    }
354
355    /// Returns a new array with the child at `child_idx` replaced by `replacement`.
356    pub fn with_child(&self, child_idx: usize, replacement: ArrayRef) -> VortexResult<ArrayRef> {
357        let mut children: Vec<ArrayRef> = self.children();
358        vortex_ensure!(
359            child_idx < children.len(),
360            "child index {} out of bounds for array with {} children",
361            child_idx,
362            children.len()
363        );
364        children[child_idx] = replacement;
365        self.with_children(children)
366    }
367}
368
369/// Trait for converting a type into a Vortex [`ArrayRef`].
370pub trait IntoArray {
371    fn into_array(self) -> ArrayRef;
372}
373
374impl IntoArray for ArrayRef {
375    fn into_array(self) -> ArrayRef {
376        self
377    }
378}
379
380mod private {
381    use super::*;
382
383    pub trait Sealed {}
384
385    impl<V: VTable> Sealed for Array<V> {}
386    impl<V: VTable> Sealed for ArrayAdapter<V> {}
387    impl Sealed for Arc<dyn DynArray> {}
388}
389
390// =============================================================================
391// New path: DynArray and supporting trait impls for Array<V>
392// =============================================================================
393
394/// DynArray implementation for [`Array<V>`].
395///
396/// This is self-contained: identity methods use `Array<V>`'s own fields (dtype, len, stats),
397/// while data-access methods delegate to VTable methods on the inner `V::Array`.
398impl<V: VTable> DynArray for Array<V> {
399    fn as_any(&self) -> &dyn Any {
400        self
401    }
402
403    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
404        self
405    }
406
407    fn to_array(&self) -> ArrayRef {
408        Arc::new(self.clone())
409    }
410
411    fn len(&self) -> usize {
412        self.len
413    }
414
415    fn dtype(&self) -> &DType {
416        &self.dtype
417    }
418
419    fn vtable(&self) -> &dyn DynVTable {
420        self.typed_vtable()
421    }
422
423    fn encoding_id(&self) -> ArrayId {
424        self.typed_vtable().id()
425    }
426
427    fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef> {
428        let start = range.start;
429        let stop = range.end;
430
431        if start == 0 && stop == self.len {
432            return Ok(self.to_array());
433        }
434
435        vortex_ensure!(
436            start <= self.len,
437            "OutOfBounds: start {start} > length {}",
438            self.len
439        );
440        vortex_ensure!(
441            stop <= self.len,
442            "OutOfBounds: stop {stop} > length {}",
443            self.len
444        );
445
446        vortex_ensure!(start <= stop, "start ({start}) must be <= stop ({stop})");
447
448        if start == stop {
449            return Ok(Canonical::empty(&self.dtype).into_array());
450        }
451
452        let sliced = SliceArray::try_new(self.to_array(), range)?
453            .into_array()
454            .optimize()?;
455
456        // Propagate some stats from the original array to the sliced array.
457        if !sliced.is::<Constant>() {
458            self.statistics().with_iter(|iter| {
459                sliced.statistics().inherit(iter.filter(|(stat, value)| {
460                    matches!(
461                        stat,
462                        Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
463                    ) && value.as_ref().as_exact().is_some_and(|v| {
464                        Scalar::try_new(DType::Bool(Nullability::NonNullable), Some(v.clone()))
465                            .vortex_expect("A stat that was expected to be a boolean stat was not")
466                            .as_bool()
467                            .value()
468                            .unwrap_or_default()
469                    })
470                }));
471            });
472        }
473
474        Ok(sliced)
475    }
476
477    fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
478        FilterArray::try_new(self.to_array(), mask)?
479            .into_array()
480            .optimize()
481    }
482
483    fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
484        DictArray::try_new(indices, self.to_array())?
485            .into_array()
486            .optimize()
487    }
488
489    fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
490        vortex_ensure!(index < self.len, OutOfBounds: index, 0, self.len);
491        if self.is_invalid(index)? {
492            return Ok(Scalar::null(self.dtype.clone()));
493        }
494        let scalar = <V::OperationsVTable as OperationsVTable<V>>::scalar_at(
495            &self.array,
496            index,
497            &mut LEGACY_SESSION.create_execution_ctx(),
498        )?;
499        vortex_ensure!(&self.dtype == scalar.dtype(), "Scalar dtype mismatch");
500        Ok(scalar)
501    }
502
503    fn is_valid(&self, index: usize) -> VortexResult<bool> {
504        vortex_ensure!(index < self.len, OutOfBounds: index, 0, self.len);
505        match self.validity()? {
506            Validity::NonNullable | Validity::AllValid => Ok(true),
507            Validity::AllInvalid => Ok(false),
508            Validity::Array(a) => a
509                .scalar_at(index)?
510                .as_bool()
511                .value()
512                .ok_or_else(|| vortex_err!("validity value at index {} is null", index)),
513        }
514    }
515
516    fn is_invalid(&self, index: usize) -> VortexResult<bool> {
517        Ok(!self.is_valid(index)?)
518    }
519
520    fn all_valid(&self) -> VortexResult<bool> {
521        match self.validity()? {
522            Validity::NonNullable | Validity::AllValid => Ok(true),
523            Validity::AllInvalid => Ok(false),
524            Validity::Array(a) => Ok(a.statistics().compute_min::<bool>().unwrap_or(false)),
525        }
526    }
527
528    fn all_invalid(&self) -> VortexResult<bool> {
529        match self.validity()? {
530            Validity::NonNullable | Validity::AllValid => Ok(false),
531            Validity::AllInvalid => Ok(true),
532            Validity::Array(a) => Ok(!a.statistics().compute_max::<bool>().unwrap_or(true)),
533        }
534    }
535
536    fn valid_count(&self) -> VortexResult<usize> {
537        if let Some(Precision::Exact(invalid_count)) =
538            self.statistics().get_as::<usize>(Stat::NullCount)
539        {
540            return Ok(self.len - invalid_count);
541        }
542
543        let count = match self.validity()? {
544            Validity::NonNullable | Validity::AllValid => self.len,
545            Validity::AllInvalid => 0,
546            Validity::Array(a) => {
547                let mut ctx = LEGACY_SESSION.create_execution_ctx();
548                let array_sum = sum(&a, &mut ctx)?;
549                array_sum
550                    .as_primitive()
551                    .as_::<usize>()
552                    .ok_or_else(|| vortex_err!("sum of validity array is null"))?
553            }
554        };
555        vortex_ensure!(count <= self.len, "Valid count exceeds array length");
556
557        self.statistics()
558            .set(Stat::NullCount, Precision::exact(self.len - count));
559
560        Ok(count)
561    }
562
563    fn invalid_count(&self) -> VortexResult<usize> {
564        Ok(self.len - self.valid_count()?)
565    }
566
567    fn validity(&self) -> VortexResult<Validity> {
568        if self.dtype.is_nullable() {
569            let validity = <V::ValidityVTable as ValidityVTable<V>>::validity(&self.array)?;
570            if let Validity::Array(array) = &validity {
571                vortex_ensure!(array.len() == self.len, "Validity array length mismatch");
572                vortex_ensure!(
573                    matches!(array.dtype(), DType::Bool(Nullability::NonNullable)),
574                    "Validity array is not non-nullable boolean: {}",
575                    self.typed_vtable().id(),
576                );
577            }
578            Ok(validity)
579        } else {
580            Ok(Validity::NonNullable)
581        }
582    }
583
584    fn validity_mask(&self) -> VortexResult<Mask> {
585        match self.validity()? {
586            Validity::NonNullable | Validity::AllValid => Ok(Mask::new_true(self.len)),
587            Validity::AllInvalid => Ok(Mask::new_false(self.len)),
588            Validity::Array(a) => Ok(a.to_bool().to_mask()),
589        }
590    }
591
592    fn to_canonical(&self) -> VortexResult<Canonical> {
593        self.to_array()
594            .execute(&mut LEGACY_SESSION.create_execution_ctx())
595    }
596
597    fn append_to_builder(
598        &self,
599        builder: &mut dyn ArrayBuilder,
600        ctx: &mut ExecutionCtx,
601    ) -> VortexResult<()> {
602        if builder.dtype() != &self.dtype {
603            vortex_panic!(
604                "Builder dtype mismatch: expected {}, got {}",
605                self.dtype,
606                builder.dtype(),
607            );
608        }
609        let len = builder.len();
610
611        V::append_to_builder(&self.array, builder, ctx)?;
612
613        assert_eq!(
614            len + self.len,
615            builder.len(),
616            "Builder length mismatch after writing array for encoding {}",
617            self.typed_vtable().id(),
618        );
619        Ok(())
620    }
621
622    fn statistics(&self) -> StatsSetRef<'_> {
623        self.stats.to_ref(self)
624    }
625
626    fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef> {
627        let mut inner = self.array.clone();
628        V::with_children(&mut inner, children)?;
629        // SAFETY: with_children preserves dtype and len.
630        Ok(unsafe {
631            Array::new_unchecked(
632                self.typed_vtable().clone(),
633                self.dtype.clone(),
634                self.len,
635                inner,
636                self.stats.clone(),
637            )
638        }
639        .into_array())
640    }
641}
642
643impl<V: VTable> ArrayHash for Array<V> {
644    fn array_hash<H: Hasher>(&self, state: &mut H, precision: hash::Precision) {
645        self.typed_vtable().id().hash(state);
646        V::array_hash(&self.array, state, precision);
647    }
648}
649
650impl<V: VTable> ArrayEq for Array<V> {
651    fn array_eq(&self, other: &Self, precision: hash::Precision) -> bool {
652        V::array_eq(&self.array, &other.array, precision)
653    }
654}
655
656impl<V: VTable> ArrayVisitor for Array<V> {
657    fn children(&self) -> Vec<ArrayRef> {
658        (0..V::nchildren(&self.array))
659            .map(|i| V::child(&self.array, i))
660            .collect()
661    }
662
663    fn nchildren(&self) -> usize {
664        V::nchildren(&self.array)
665    }
666
667    fn nth_child(&self, idx: usize) -> Option<ArrayRef> {
668        (idx < V::nchildren(&self.array)).then(|| V::child(&self.array, idx))
669    }
670
671    fn children_names(&self) -> Vec<String> {
672        (0..V::nchildren(&self.array))
673            .map(|i| V::child_name(&self.array, i))
674            .collect()
675    }
676
677    fn named_children(&self) -> Vec<(String, ArrayRef)> {
678        (0..V::nchildren(&self.array))
679            .map(|i| (V::child_name(&self.array, i), V::child(&self.array, i)))
680            .collect()
681    }
682
683    fn buffers(&self) -> Vec<ByteBuffer> {
684        (0..V::nbuffers(&self.array))
685            .map(|i| V::buffer(&self.array, i).to_host_sync())
686            .collect()
687    }
688
689    fn buffer_handles(&self) -> Vec<BufferHandle> {
690        (0..V::nbuffers(&self.array))
691            .map(|i| V::buffer(&self.array, i))
692            .collect()
693    }
694
695    fn buffer_names(&self) -> Vec<String> {
696        (0..V::nbuffers(&self.array))
697            .filter_map(|i| V::buffer_name(&self.array, i))
698            .collect()
699    }
700
701    fn named_buffers(&self) -> Vec<(String, BufferHandle)> {
702        (0..V::nbuffers(&self.array))
703            .filter_map(|i| {
704                V::buffer_name(&self.array, i).map(|name| (name, V::buffer(&self.array, i)))
705            })
706            .collect()
707    }
708
709    fn nbuffers(&self) -> usize {
710        V::nbuffers(&self.array)
711    }
712
713    fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
714        V::serialize(V::metadata(&self.array)?)
715    }
716
717    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
718        match V::metadata(&self.array) {
719            Err(e) => write!(f, "<serde error: {e}>"),
720            Ok(metadata) => Debug::fmt(&metadata, f),
721        }
722    }
723
724    fn is_host(&self) -> bool {
725        for array in self.depth_first_traversal() {
726            if !array.buffer_handles().iter().all(BufferHandle::is_on_host) {
727                return false;
728            }
729        }
730        true
731    }
732}
733
734impl<V: VTable> ReduceNode for Array<V> {
735    fn as_any(&self) -> &dyn Any {
736        self
737    }
738
739    fn node_dtype(&self) -> VortexResult<DType> {
740        Ok(self.dtype.clone())
741    }
742
743    fn scalar_fn(&self) -> Option<&ScalarFnRef> {
744        // Access as_opt via inner's Deref to dyn DynArray.
745        (*self.array)
746            .as_opt::<ScalarFnVTable>()
747            .map(|a| a.scalar_fn())
748    }
749
750    fn child(&self, idx: usize) -> ReduceNodeRef {
751        ArrayVisitor::nth_child(self, idx)
752            .unwrap_or_else(|| vortex_panic!("Child index out of bounds: {}", idx))
753    }
754
755    fn child_count(&self) -> usize {
756        ArrayVisitor::nchildren(self)
757    }
758}
759
760// =============================================================================
761// Legacy path: ArrayAdapter<V>
762// =============================================================================
763
764/// Adapter struct used to lift the [`VTable`] trait into an object-safe [`DynArray`]
765/// implementation.
766///
767/// Since this is a unit struct with `repr(transparent)`, we are able to turn un-adapted array
768/// structs into [`dyn Array`] using some cheeky casting inside [`std::ops::Deref`] and
769/// [`AsRef`]. See the `vtable!` macro for more details.
770#[repr(transparent)]
771pub struct ArrayAdapter<V: VTable>(V::Array);
772
773impl<V: VTable> ArrayAdapter<V> {
774    /// Provide a reference to the underlying array held within the adapter.
775    pub fn as_inner(&self) -> &V::Array {
776        &self.0
777    }
778
779    /// Consume the adapter and return the underlying array.
780    pub fn into_inner(self) -> V::Array {
781        self.0
782    }
783}
784
785impl<V: VTable> Debug for ArrayAdapter<V> {
786    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
787        self.0.fmt(f)
788    }
789}
790
791impl<V: VTable> ReduceNode for ArrayAdapter<V> {
792    fn as_any(&self) -> &dyn Any {
793        self
794    }
795
796    fn node_dtype(&self) -> VortexResult<DType> {
797        Ok(V::dtype(&self.0).clone())
798    }
799
800    fn scalar_fn(&self) -> Option<&ScalarFnRef> {
801        self.0.as_opt::<ScalarFnVTable>().map(|a| a.scalar_fn())
802    }
803
804    fn child(&self, idx: usize) -> ReduceNodeRef {
805        self.nth_child(idx)
806            .unwrap_or_else(|| vortex_panic!("Child index out of bounds: {}", idx))
807    }
808
809    fn child_count(&self) -> usize {
810        self.nchildren()
811    }
812}
813
814impl<V: VTable> DynArray for ArrayAdapter<V> {
815    fn as_any(&self) -> &dyn Any {
816        self
817    }
818
819    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
820        self
821    }
822
823    fn to_array(&self) -> ArrayRef {
824        self.0.clone().into_array()
825    }
826
827    fn len(&self) -> usize {
828        V::len(&self.0)
829    }
830
831    fn dtype(&self) -> &DType {
832        V::dtype(&self.0)
833    }
834
835    fn vtable(&self) -> &dyn DynVTable {
836        V::vtable(self.as_inner())
837    }
838
839    fn encoding_id(&self) -> ArrayId {
840        V::vtable(&self.0).id()
841    }
842
843    fn slice(&self, range: Range<usize>) -> VortexResult<ArrayRef> {
844        let start = range.start;
845        let stop = range.end;
846
847        if start == 0 && stop == self.len() {
848            return Ok(self.to_array());
849        }
850
851        vortex_ensure!(
852            start <= self.len(),
853            "OutOfBounds: start {start} > length {}",
854            self.len()
855        );
856        vortex_ensure!(
857            stop <= self.len(),
858            "OutOfBounds: stop {stop} > length {}",
859            self.len()
860        );
861
862        vortex_ensure!(start <= stop, "start ({start}) must be <= stop ({stop})");
863
864        if start == stop {
865            return Ok(Canonical::empty(self.dtype()).into_array());
866        }
867
868        let sliced = SliceArray::try_new(self.to_array(), range)?
869            .into_array()
870            .optimize()?;
871
872        // Propagate some stats from the original array to the sliced array.
873        if !sliced.is::<Constant>() {
874            self.statistics().with_iter(|iter| {
875                sliced.statistics().inherit(iter.filter(|(stat, value)| {
876                    matches!(
877                        stat,
878                        Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
879                    ) && value.as_ref().as_exact().is_some_and(|v| {
880                        Scalar::try_new(DType::Bool(Nullability::NonNullable), Some(v.clone()))
881                            .vortex_expect("A stat that was expected to be a boolean stat was not")
882                            .as_bool()
883                            .value()
884                            .unwrap_or_default()
885                    })
886                }));
887            });
888        }
889
890        Ok(sliced)
891    }
892
893    fn filter(&self, mask: Mask) -> VortexResult<ArrayRef> {
894        FilterArray::try_new(self.to_array(), mask)?
895            .into_array()
896            .optimize()
897    }
898
899    fn take(&self, indices: ArrayRef) -> VortexResult<ArrayRef> {
900        DictArray::try_new(indices, self.to_array())?
901            .into_array()
902            .optimize()
903    }
904
905    fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
906        vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len());
907        if self.is_invalid(index)? {
908            return Ok(Scalar::null(self.dtype().clone()));
909        }
910        let mut ctx = LEGACY_SESSION.create_execution_ctx();
911        let scalar =
912            <V::OperationsVTable as OperationsVTable<V>>::scalar_at(&self.0, index, &mut ctx)?;
913        vortex_ensure!(self.dtype() == scalar.dtype(), "Scalar dtype mismatch");
914        Ok(scalar)
915    }
916
917    fn is_valid(&self, index: usize) -> VortexResult<bool> {
918        vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len());
919        match self.validity()? {
920            Validity::NonNullable | Validity::AllValid => Ok(true),
921            Validity::AllInvalid => Ok(false),
922            Validity::Array(a) => a
923                .scalar_at(index)?
924                .as_bool()
925                .value()
926                .ok_or_else(|| vortex_err!("validity value at index {} is null", index)),
927        }
928    }
929
930    fn is_invalid(&self, index: usize) -> VortexResult<bool> {
931        Ok(!self.is_valid(index)?)
932    }
933
934    fn all_valid(&self) -> VortexResult<bool> {
935        match self.validity()? {
936            Validity::NonNullable | Validity::AllValid => Ok(true),
937            Validity::AllInvalid => Ok(false),
938            Validity::Array(a) => Ok(a.statistics().compute_min::<bool>().unwrap_or(false)),
939        }
940    }
941
942    fn all_invalid(&self) -> VortexResult<bool> {
943        match self.validity()? {
944            Validity::NonNullable | Validity::AllValid => Ok(false),
945            Validity::AllInvalid => Ok(true),
946            Validity::Array(a) => Ok(!a.statistics().compute_max::<bool>().unwrap_or(true)),
947        }
948    }
949
950    // TODO(ngates): deprecate this function since it requires compute.
951    fn valid_count(&self) -> VortexResult<usize> {
952        if let Some(Precision::Exact(invalid_count)) =
953            self.statistics().get_as::<usize>(Stat::NullCount)
954        {
955            return Ok(self.len() - invalid_count);
956        }
957
958        let count = match self.validity()? {
959            Validity::NonNullable | Validity::AllValid => self.len(),
960            Validity::AllInvalid => 0,
961            Validity::Array(a) => {
962                let mut ctx = LEGACY_SESSION.create_execution_ctx();
963                let array_sum = sum(&a, &mut ctx)?;
964                array_sum
965                    .as_primitive()
966                    .as_::<usize>()
967                    .ok_or_else(|| vortex_err!("sum of validity array is null"))?
968            }
969        };
970        vortex_ensure!(count <= self.len(), "Valid count exceeds array length");
971
972        self.statistics()
973            .set(Stat::NullCount, Precision::exact(self.len() - count));
974
975        Ok(count)
976    }
977
978    fn invalid_count(&self) -> VortexResult<usize> {
979        Ok(self.len() - self.valid_count()?)
980    }
981
982    fn validity(&self) -> VortexResult<Validity> {
983        if self.dtype().is_nullable() {
984            let validity = <V::ValidityVTable as ValidityVTable<V>>::validity(&self.0)?;
985            if let Validity::Array(array) = &validity {
986                vortex_ensure!(array.len() == self.len(), "Validity array length mismatch");
987                vortex_ensure!(
988                    matches!(array.dtype(), DType::Bool(Nullability::NonNullable)),
989                    "Validity array is not non-nullable boolean: {}",
990                    self.encoding_id(),
991                );
992            }
993            Ok(validity)
994        } else {
995            Ok(Validity::NonNullable)
996        }
997    }
998
999    fn validity_mask(&self) -> VortexResult<Mask> {
1000        match self.validity()? {
1001            Validity::NonNullable | Validity::AllValid => Ok(Mask::new_true(self.len())),
1002            Validity::AllInvalid => Ok(Mask::new_false(self.len())),
1003            Validity::Array(a) => Ok(a.to_bool().to_mask()),
1004        }
1005    }
1006
1007    fn to_canonical(&self) -> VortexResult<Canonical> {
1008        self.to_array()
1009            .execute(&mut LEGACY_SESSION.create_execution_ctx())
1010    }
1011
1012    fn append_to_builder(
1013        &self,
1014        builder: &mut dyn ArrayBuilder,
1015        ctx: &mut ExecutionCtx,
1016    ) -> VortexResult<()> {
1017        if builder.dtype() != self.dtype() {
1018            vortex_panic!(
1019                "Builder dtype mismatch: expected {}, got {}",
1020                self.dtype(),
1021                builder.dtype(),
1022            );
1023        }
1024        let len = builder.len();
1025
1026        V::append_to_builder(&self.0, builder, ctx)?;
1027
1028        assert_eq!(
1029            len + self.len(),
1030            builder.len(),
1031            "Builder length mismatch after writing array for encoding {}",
1032            self.encoding_id(),
1033        );
1034        Ok(())
1035    }
1036
1037    fn statistics(&self) -> StatsSetRef<'_> {
1038        V::stats(&self.0)
1039    }
1040
1041    fn with_children(&self, children: Vec<ArrayRef>) -> VortexResult<ArrayRef> {
1042        let mut this = self.0.clone();
1043        V::with_children(&mut this, children)?;
1044        Ok(this.into_array())
1045    }
1046}
1047
1048impl<V: VTable> ArrayHash for ArrayAdapter<V> {
1049    fn array_hash<H: Hasher>(&self, state: &mut H, precision: hash::Precision) {
1050        self.0.encoding_id().hash(state);
1051        V::array_hash(&self.0, state, precision);
1052    }
1053}
1054
1055impl<V: VTable> ArrayEq for ArrayAdapter<V> {
1056    fn array_eq(&self, other: &Self, precision: hash::Precision) -> bool {
1057        V::array_eq(&self.0, &other.0, precision)
1058    }
1059}
1060
1061impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
1062    fn children(&self) -> Vec<ArrayRef> {
1063        (0..V::nchildren(&self.0))
1064            .map(|i| V::child(&self.0, i))
1065            .collect()
1066    }
1067
1068    fn nchildren(&self) -> usize {
1069        V::nchildren(&self.0)
1070    }
1071
1072    fn nth_child(&self, idx: usize) -> Option<ArrayRef> {
1073        (idx < V::nchildren(&self.0)).then(|| V::child(&self.0, idx))
1074    }
1075
1076    fn children_names(&self) -> Vec<String> {
1077        (0..V::nchildren(&self.0))
1078            .map(|i| V::child_name(&self.0, i))
1079            .collect()
1080    }
1081
1082    fn named_children(&self) -> Vec<(String, ArrayRef)> {
1083        (0..V::nchildren(&self.0))
1084            .map(|i| (V::child_name(&self.0, i), V::child(&self.0, i)))
1085            .collect()
1086    }
1087
1088    fn buffers(&self) -> Vec<ByteBuffer> {
1089        (0..V::nbuffers(&self.0))
1090            .map(|i| V::buffer(&self.0, i).to_host_sync())
1091            .collect()
1092    }
1093
1094    fn buffer_handles(&self) -> Vec<BufferHandle> {
1095        (0..V::nbuffers(&self.0))
1096            .map(|i| V::buffer(&self.0, i))
1097            .collect()
1098    }
1099
1100    fn buffer_names(&self) -> Vec<String> {
1101        (0..V::nbuffers(&self.0))
1102            .filter_map(|i| V::buffer_name(&self.0, i))
1103            .collect()
1104    }
1105
1106    fn named_buffers(&self) -> Vec<(String, BufferHandle)> {
1107        (0..V::nbuffers(&self.0))
1108            .filter_map(|i| V::buffer_name(&self.0, i).map(|name| (name, V::buffer(&self.0, i))))
1109            .collect()
1110    }
1111
1112    fn nbuffers(&self) -> usize {
1113        V::nbuffers(&self.0)
1114    }
1115
1116    fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
1117        V::serialize(V::metadata(&self.0)?)
1118    }
1119
1120    fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1121        match V::metadata(&self.0) {
1122            Err(e) => write!(f, "<serde error: {e}>"),
1123            Ok(metadata) => Debug::fmt(&metadata, f),
1124        }
1125    }
1126
1127    fn is_host(&self) -> bool {
1128        for array in self.depth_first_traversal() {
1129            if !array.buffer_handles().iter().all(BufferHandle::is_on_host) {
1130                return false;
1131            }
1132        }
1133
1134        true
1135    }
1136}
1137
1138/// Implement a matcher for a specific VTable type.
1139///
1140/// During the migration, this tries both `Array<V>` (new path) and `ArrayAdapter<V>`
1141/// (legacy path). Returns `&V::Array` for backward compatibility.
1142impl<V: VTable> Matcher for V {
1143    type Match<'a> = &'a V::Array;
1144
1145    fn matches(array: &dyn DynArray) -> bool {
1146        DynArray::as_any(array).is::<Array<V>>() || DynArray::as_any(array).is::<ArrayAdapter<V>>()
1147    }
1148
1149    fn try_match<'a>(array: &'a dyn DynArray) -> Option<Self::Match<'a>> {
1150        // Try new Array<V> first.
1151        if let Some(typed) = DynArray::as_any(array).downcast_ref::<Array<V>>() {
1152            return Some(typed.inner());
1153        }
1154        // Fall back to legacy ArrayAdapter<V>.
1155        DynArray::as_any(array)
1156            .downcast_ref::<ArrayAdapter<V>>()
1157            .map(|adapter| adapter.as_inner())
1158    }
1159}