vortex_array/array/
mod.rs

1mod canonical;
2mod convert;
3mod implementation;
4mod statistics;
5mod validity;
6mod variants;
7mod visitor;
8
9use std::any::{Any, type_name};
10use std::fmt::{Debug, Display, Formatter};
11use std::sync::Arc;
12
13pub use canonical::*;
14pub use convert::*;
15pub use implementation::*;
16pub use statistics::*;
17pub use validity::*;
18pub use variants::*;
19pub use visitor::*;
20use vortex_dtype::DType;
21use vortex_error::{VortexExpect, VortexResult, vortex_err};
22use vortex_mask::Mask;
23
24use crate::arrays::{
25    BoolEncoding, ExtensionEncoding, ListEncoding, NullEncoding, PrimitiveEncoding, StructEncoding,
26    VarBinEncoding, VarBinViewEncoding,
27};
28use crate::builders::ArrayBuilder;
29use crate::stats::StatsSetRef;
30use crate::vtable::{EncodingVTable, VTableRef};
31use crate::{Canonical, EncodingId};
32
33/// The base trait for all Vortex arrays.
34///
35/// Users should invoke functions on this trait. Implementations should implement the corresponding
36/// function on the `_Impl` traits, e.g. [`ArrayValidityImpl`]. The functions here dispatch to the
37/// implementations, while validating pre- and post-conditions.
38pub trait Array: Send + Sync + Debug + ArrayStatistics + ArrayVariants + ArrayVisitor {
39    /// Returns the array as a reference to a generic [`Any`] trait object.
40    fn as_any(&self) -> &dyn Any;
41
42    /// Returns the array as an [`Arc`] reference to a generic [`Any`] trait object.
43    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
44
45    /// Returns the array as an [`ArrayRef`].
46    fn to_array(&self) -> ArrayRef;
47
48    /// Converts the array into an [`ArrayRef`].
49    fn into_array(self) -> ArrayRef
50    where
51        Self: Sized;
52
53    /// Returns the length of the array.
54    fn len(&self) -> usize;
55
56    /// Returns whether the array is empty (has zero rows).
57    fn is_empty(&self) -> bool {
58        self.len() == 0
59    }
60
61    /// Returns the logical Vortex [`DType`] of the array.
62    fn dtype(&self) -> &DType;
63
64    /// Returns the encoding of the array.
65    fn encoding(&self) -> EncodingId;
66
67    /// Returns the encoding VTable.
68    fn vtable(&self) -> VTableRef;
69
70    /// Returns whether the array is of the given encoding.
71    fn is_encoding(&self, encoding: EncodingId) -> bool {
72        self.encoding() == encoding
73    }
74
75    /// Returns whether this array is an arrow encoding.
76    // TODO(ngates): this shouldn't live here.
77    fn is_arrow(&self) -> bool {
78        self.is_encoding(NullEncoding.id())
79            || self.is_encoding(BoolEncoding.id())
80            || self.is_encoding(PrimitiveEncoding.id())
81            || self.is_encoding(VarBinEncoding.id())
82            || self.is_encoding(VarBinViewEncoding.id())
83    }
84
85    /// Whether the array is of a canonical encoding.
86    // TODO(ngates): this shouldn't live here.
87    fn is_canonical(&self) -> bool {
88        self.is_encoding(NullEncoding.id())
89            || self.is_encoding(BoolEncoding.id())
90            || self.is_encoding(PrimitiveEncoding.id())
91            || self.is_encoding(StructEncoding.id())
92            || self.is_encoding(ListEncoding.id())
93            || self.is_encoding(VarBinViewEncoding.id())
94            || self.is_encoding(ExtensionEncoding.id())
95    }
96
97    /// Returns whether the item at `index` is valid.
98    fn is_valid(&self, index: usize) -> VortexResult<bool>;
99
100    /// Returns whether the item at `index` is invalid.
101    fn is_invalid(&self, index: usize) -> VortexResult<bool>;
102
103    /// Returns whether all items in the array are valid.
104    ///
105    /// This is usually cheaper than computing a precise `valid_count`.
106    fn all_valid(&self) -> VortexResult<bool>;
107
108    /// Returns whether the array is all invalid.
109    ///
110    /// This is usually cheaper than computing a precise `invalid_count`.
111    fn all_invalid(&self) -> VortexResult<bool>;
112
113    /// Returns the number of valid elements in the array.
114    fn valid_count(&self) -> VortexResult<usize>;
115
116    /// Returns the number of invalid elements in the array.
117    fn invalid_count(&self) -> VortexResult<usize>;
118
119    /// Returns the canonical validity mask for the array.
120    fn validity_mask(&self) -> VortexResult<Mask>;
121
122    /// Returns the canonical representation of the array.
123    fn to_canonical(&self) -> VortexResult<Canonical>;
124
125    /// Writes the array into the canonical builder.
126    ///
127    /// The [`DType`] of the builder must match that of the array.
128    fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()>;
129
130    /// Returns the statistics of the array.
131    // TODO(ngates): change how this works. It's weird.
132    fn statistics(&self) -> StatsSetRef<'_>;
133}
134
135impl Array for Arc<dyn Array> {
136    fn as_any(&self) -> &dyn Any {
137        self.as_ref().as_any()
138    }
139
140    fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
141        self
142    }
143
144    fn to_array(&self) -> ArrayRef {
145        self.clone()
146    }
147
148    fn into_array(self) -> ArrayRef {
149        self
150    }
151
152    fn len(&self) -> usize {
153        self.as_ref().len()
154    }
155
156    fn dtype(&self) -> &DType {
157        self.as_ref().dtype()
158    }
159
160    fn encoding(&self) -> EncodingId {
161        self.as_ref().encoding()
162    }
163
164    fn vtable(&self) -> VTableRef {
165        self.as_ref().vtable()
166    }
167
168    fn is_valid(&self, index: usize) -> VortexResult<bool> {
169        self.as_ref().is_valid(index)
170    }
171
172    fn is_invalid(&self, index: usize) -> VortexResult<bool> {
173        self.as_ref().is_invalid(index)
174    }
175
176    fn all_valid(&self) -> VortexResult<bool> {
177        self.as_ref().all_valid()
178    }
179
180    fn all_invalid(&self) -> VortexResult<bool> {
181        self.as_ref().all_invalid()
182    }
183
184    fn valid_count(&self) -> VortexResult<usize> {
185        self.as_ref().valid_count()
186    }
187
188    fn invalid_count(&self) -> VortexResult<usize> {
189        self.as_ref().invalid_count()
190    }
191
192    fn validity_mask(&self) -> VortexResult<Mask> {
193        self.as_ref().validity_mask()
194    }
195
196    fn to_canonical(&self) -> VortexResult<Canonical> {
197        self.as_ref().to_canonical()
198    }
199
200    fn append_to_builder(&self, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
201        self.as_ref().append_to_builder(builder)
202    }
203
204    fn statistics(&self) -> StatsSetRef<'_> {
205        self.as_ref().statistics()
206    }
207}
208
209/// A reference counted pointer to a dynamic [`Array`] trait object.
210pub type ArrayRef = Arc<dyn Array>;
211
212impl ToOwned for dyn Array {
213    type Owned = ArrayRef;
214
215    fn to_owned(&self) -> Self::Owned {
216        self.to_array()
217    }
218}
219
220impl<A: Array + Clone + 'static> TryFromArrayRef for A {
221    fn try_from_array(array: ArrayRef) -> VortexResult<Self> {
222        Ok(Arc::unwrap_or_clone(
223            array
224                .as_any_arc()
225                .downcast::<A>()
226                .map_err(|_| vortex_err!("Cannot downcast to {}", type_name::<A>()))?,
227        ))
228    }
229}
230
231impl<A: Array + Clone + 'static> TryFromArrayRef for Arc<A> {
232    fn try_from_array(array: ArrayRef) -> VortexResult<Self> {
233        array
234            .as_any_arc()
235            .downcast::<A>()
236            .map_err(|_| vortex_err!("Cannot downcast to {}", type_name::<A>()))
237    }
238}
239
240pub trait ArrayExt: Array {
241    /// Returns the array downcast to the given `A`.
242    fn as_<A: Array + 'static>(&self) -> &A {
243        self.as_any()
244            .downcast_ref::<A>()
245            .vortex_expect("Failed to downcast")
246    }
247
248    /// Returns the array downcast to the given `A`.
249    fn as_opt<A: Array + 'static>(&self) -> Option<&A> {
250        self.as_any().downcast_ref::<A>()
251    }
252}
253
254impl<A: Array + ?Sized> ArrayExt for A {}
255
256impl Display for dyn Array {
257    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
258        write!(
259            f,
260            "{}({}, len={})",
261            self.encoding(),
262            self.dtype(),
263            self.len()
264        )
265    }
266}
267
268#[macro_export]
269macro_rules! try_from_array_ref {
270    ($Array:ty) => {
271        impl TryFrom<$crate::ArrayRef> for $Array {
272            type Error = vortex_error::VortexError;
273
274            fn try_from(value: $crate::ArrayRef) -> Result<Self, Self::Error> {
275                Ok(::std::sync::Arc::unwrap_or_clone(
276                    value.as_any_arc().downcast::<Self>().map_err(|_| {
277                        vortex_error::vortex_err!(
278                            "Cannot downcast to {}",
279                            std::any::type_name::<Self>()
280                        )
281                    })?,
282                ))
283            }
284        }
285    };
286}