vortex_array/array/
mod.rs

1mod canonical;
2mod convert;
3mod implementation;
4mod statistics;
5mod validity;
6mod variants;
7mod visitor;
8
9use std::any::Any;
10use std::fmt::{Debug, Display, Formatter};
11use std::sync::Arc;
12
13pub use canonical::*;
14pub use convert::*;
15pub use implementation::*;
16pub use statistics::*;
17pub use validity::*;
18pub use variants::*;
19pub use visitor::*;
20use vortex_dtype::DType;
21use vortex_error::{VortexExpect, VortexResult};
22use vortex_mask::Mask;
23
24use crate::arrays::{
25    BoolEncoding, ExtensionEncoding, ListEncoding, NullEncoding, PrimitiveEncoding, StructEncoding,
26    VarBinEncoding, VarBinViewEncoding,
27};
28use crate::builders::ArrayBuilder;
29use crate::compute::{ComputeFn, InvocationArgs, Output};
30use crate::stats::StatsSetRef;
31use crate::vtable::{EncodingVTable, VTableRef};
32use crate::{Canonical, EncodingId};
33
34/// The base trait for all Vortex arrays.
35///
36/// Users should invoke functions on this trait. Implementations should implement the corresponding
37/// function on the `_Impl` traits, e.g. [`ArrayValidityImpl`]. The functions here dispatch to the
38/// implementations, while validating pre- and post-conditions.
39pub trait Array: Send + Sync + Debug + ArrayStatistics + ArrayVariants + ArrayVisitor {
40    /// Returns the array as a reference to a generic [`Any`] trait object.
41    fn as_any(&self) -> &dyn Any;
42
43    /// Returns the array as an [`Arc`] reference to a generic [`Any`] trait object.
44    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
45
46    /// Returns the array as an [`ArrayRef`].
47    fn to_array(&self) -> ArrayRef;
48
49    /// Converts the array into an [`ArrayRef`].
50    fn into_array(self) -> ArrayRef
51    where
52        Self: Sized;
53
54    /// Returns the length of the array.
55    fn len(&self) -> usize;
56
57    /// Returns whether the array is empty (has zero rows).
58    fn is_empty(&self) -> bool {
59        self.len() == 0
60    }
61
62    /// Returns the logical Vortex [`DType`] of the array.
63    fn dtype(&self) -> &DType;
64
65    /// Returns the encoding of the array.
66    fn encoding(&self) -> EncodingId;
67
68    /// Returns the encoding VTable.
69    fn vtable(&self) -> VTableRef;
70
71    /// Returns whether the array is of the given encoding.
72    fn is_encoding(&self, encoding: EncodingId) -> bool {
73        self.encoding() == encoding
74    }
75
76    /// Returns whether this array is an arrow encoding.
77    // TODO(ngates): this shouldn't live here.
78    fn is_arrow(&self) -> bool {
79        self.is_encoding(NullEncoding.id())
80            || self.is_encoding(BoolEncoding.id())
81            || self.is_encoding(PrimitiveEncoding.id())
82            || self.is_encoding(VarBinEncoding.id())
83            || self.is_encoding(VarBinViewEncoding.id())
84    }
85
86    /// Whether the array is of a canonical encoding.
87    // TODO(ngates): this shouldn't live here.
88    fn is_canonical(&self) -> bool {
89        self.is_encoding(NullEncoding.id())
90            || self.is_encoding(BoolEncoding.id())
91            || self.is_encoding(PrimitiveEncoding.id())
92            || self.is_encoding(StructEncoding.id())
93            || self.is_encoding(ListEncoding.id())
94            || self.is_encoding(VarBinViewEncoding.id())
95            || self.is_encoding(ExtensionEncoding.id())
96    }
97
98    /// Returns whether the item at `index` is valid.
99    fn is_valid(&self, index: usize) -> VortexResult<bool>;
100
101    /// Returns whether the item at `index` is invalid.
102    fn is_invalid(&self, index: usize) -> VortexResult<bool>;
103
104    /// Returns whether all items in the array are valid.
105    ///
106    /// This is usually cheaper than computing a precise `valid_count`.
107    fn all_valid(&self) -> VortexResult<bool>;
108
109    /// Returns whether the array is all invalid.
110    ///
111    /// This is usually cheaper than computing a precise `invalid_count`.
112    fn all_invalid(&self) -> VortexResult<bool>;
113
114    /// Returns the number of valid elements in the array.
115    fn valid_count(&self) -> VortexResult<usize>;
116
117    /// Returns the number of invalid elements in the array.
118    fn invalid_count(&self) -> VortexResult<usize>;
119
120    /// Returns the canonical validity mask for the array.
121    fn validity_mask(&self) -> VortexResult<Mask>;
122
123    /// Returns the canonical representation of the array.
124    fn to_canonical(&self) -> VortexResult<Canonical>;
125
126    /// Writes the array into the canonical builder.
127    ///
128    /// The [`DType`] of the builder must match that of the array.
129    fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()>;
130
131    /// Returns the statistics of the array.
132    // TODO(ngates): change how this works. It's weird.
133    fn statistics(&self) -> StatsSetRef<'_>;
134
135    /// Replaces the children of the array with the given array references.
136    fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef>;
137
138    /// Optionally invoke a kernel for the given compute function.
139    ///
140    /// These encoding-specific kernels are independent of kernels registered directly with
141    /// compute functions using [`ComputeFn::register_kernel`], and are attempted only if none of
142    /// the function-specific kernels returns a result.
143    ///
144    /// This allows encodings the opportunity to generically implement many compute functions
145    /// that share some property, for example [`ComputeFn::is_elementwise`], without prior
146    /// knowledge of the function itself, while still allowing users to override the implementation
147    /// of compute functions for built-in encodings. For an example, see the implementation for
148    /// chunked arrays.
149    ///
150    /// The first input in the [`InvocationArgs`] is always the array itself.
151    ///
152    /// Warning: do not call `compute_fn.invoke(args)` directly, as this will result in a recursive
153    /// call.
154    fn invoke(&self, compute_fn: &ComputeFn, args: &InvocationArgs)
155    -> VortexResult<Option<Output>>;
156}
157
158impl Array for Arc<dyn Array> {
159    fn as_any(&self) -> &dyn Any {
160        self.as_ref().as_any()
161    }
162
163    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
164        self
165    }
166
167    fn to_array(&self) -> ArrayRef {
168        self.clone()
169    }
170
171    fn into_array(self) -> ArrayRef {
172        self
173    }
174
175    fn len(&self) -> usize {
176        self.as_ref().len()
177    }
178
179    fn dtype(&self) -> &DType {
180        self.as_ref().dtype()
181    }
182
183    fn encoding(&self) -> EncodingId {
184        self.as_ref().encoding()
185    }
186
187    fn vtable(&self) -> VTableRef {
188        self.as_ref().vtable()
189    }
190
191    fn is_valid(&self, index: usize) -> VortexResult<bool> {
192        self.as_ref().is_valid(index)
193    }
194
195    fn is_invalid(&self, index: usize) -> VortexResult<bool> {
196        self.as_ref().is_invalid(index)
197    }
198
199    fn all_valid(&self) -> VortexResult<bool> {
200        self.as_ref().all_valid()
201    }
202
203    fn all_invalid(&self) -> VortexResult<bool> {
204        self.as_ref().all_invalid()
205    }
206
207    fn valid_count(&self) -> VortexResult<usize> {
208        self.as_ref().valid_count()
209    }
210
211    fn invalid_count(&self) -> VortexResult<usize> {
212        self.as_ref().invalid_count()
213    }
214
215    fn validity_mask(&self) -> VortexResult<Mask> {
216        self.as_ref().validity_mask()
217    }
218
219    fn to_canonical(&self) -> VortexResult<Canonical> {
220        self.as_ref().to_canonical()
221    }
222
223    fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
224        self.as_ref().append_to_builder(builder)
225    }
226
227    fn statistics(&self) -> StatsSetRef<'_> {
228        self.as_ref().statistics()
229    }
230
231    fn with_children(&self, children: &[ArrayRef]) -> VortexResult<ArrayRef> {
232        self.as_ref().with_children(children)
233    }
234
235    fn invoke(
236        &self,
237        compute_fn: &ComputeFn,
238        args: &InvocationArgs,
239    ) -> VortexResult<Option<Output>> {
240        self.as_ref().invoke(compute_fn, args)
241    }
242}
243
244/// A reference counted pointer to a dynamic [`Array`] trait object.
245pub type ArrayRef = Arc<dyn Array>;
246
247impl ToOwned for dyn Array {
248    type Owned = ArrayRef;
249
250    fn to_owned(&self) -> Self::Owned {
251        self.to_array()
252    }
253}
254
255impl<A: Array + Clone + 'static> TryFromArrayRef for A {
256    fn try_from_array(array: ArrayRef) -> Result<Self, ArrayRef> {
257        let fallback = array.clone();
258        if let Ok(array) = array.as_any_arc().downcast::<A>() {
259            // manually drop the fallback value so `Arc::unwrap_or_clone` doesn't always have to clone
260            drop(fallback);
261            Ok(Arc::unwrap_or_clone(array))
262        } else {
263            Err(fallback)
264        }
265    }
266}
267
268impl<A: Array + Clone + 'static> TryFromArrayRef for Arc<A> {
269    fn try_from_array(array: ArrayRef) -> Result<Self, ArrayRef> {
270        let fallback = array.clone();
271        array.as_any_arc().downcast::<A>().map_err(|_| fallback)
272    }
273}
274
275pub trait ArrayExt: Array {
276    /// Returns the array downcast to the given `A`.
277    fn as_<A: Array + 'static>(&self) -> &A {
278        self.as_any()
279            .downcast_ref::<A>()
280            .vortex_expect("Failed to downcast")
281    }
282
283    /// Returns the array downcast to the given `A`.
284    fn as_opt<A: Array + 'static>(&self) -> Option<&A> {
285        self.as_any().downcast_ref::<A>()
286    }
287
288    /// Is self an array with encoding `A`.
289    fn is<A: Array + 'static>(&self) -> bool {
290        self.as_opt::<A>().is_some()
291    }
292}
293
294impl<A: Array + ?Sized> ArrayExt for A {}
295
296impl Display for dyn Array {
297    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
298        write!(
299            f,
300            "{}({}, len={})",
301            self.encoding(),
302            self.dtype(),
303            self.len()
304        )
305    }
306}
307
308#[macro_export]
309macro_rules! try_from_array_ref {
310    ($Array:ty) => {
311        impl TryFrom<$crate::ArrayRef> for $Array {
312            type Error = vortex_error::VortexError;
313
314            fn try_from(value: $crate::ArrayRef) -> Result<Self, Self::Error> {
315                Ok(::std::sync::Arc::unwrap_or_clone(
316                    value.as_any_arc().downcast::<Self>().map_err(|_| {
317                        vortex_error::vortex_err!(
318                            "Cannot downcast to {}",
319                            std::any::type_name::<Self>()
320                        )
321                    })?,
322                ))
323            }
324        }
325    };
326}