vortex_array/arrow/
array.rs

1use std::fmt::Debug;
2
3use arrow_array::ArrayRef as ArrowArrayRef;
4use vortex_dtype::arrow::FromArrowType;
5use vortex_dtype::{DType, Nullability};
6use vortex_error::{VortexResult, vortex_bail};
7use vortex_mask::Mask;
8use vortex_scalar::Scalar;
9
10use crate::arrow::FromArrowArray;
11use crate::stats::{ArrayStats, StatsSetRef};
12use crate::vtable::{
13    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityVTable,
14    VisitorVTable,
15};
16use crate::{
17    Array, ArrayBufferVisitor, ArrayChildVisitor, ArrayRef, Canonical, EncodingId, EncodingRef,
18    IntoArray, vtable,
19};
20
21vtable!(Arrow);
22
23impl VTable for ArrowVTable {
24    type Array = ArrowArray;
25    type Encoding = ArrowEncoding;
26    type ArrayVTable = Self;
27    type CanonicalVTable = Self;
28    type OperationsVTable = Self;
29    type ValidityVTable = Self;
30    type VisitorVTable = Self;
31    type ComputeVTable = NotSupported;
32    type EncodeVTable = NotSupported;
33    type SerdeVTable = NotSupported;
34
35    fn id(_encoding: &Self::Encoding) -> EncodingId {
36        EncodingId::new_ref("vortex.arrow")
37    }
38
39    fn encoding(_array: &Self::Array) -> EncodingRef {
40        EncodingRef::new_ref(ArrowEncoding.as_ref())
41    }
42}
43
44/// A Vortex array that wraps an in-memory Arrow array.
45// TODO(ngates): consider having each Arrow encoding be a separate encoding ID.
46#[derive(Clone, Debug)]
47pub struct ArrowEncoding;
48
49#[derive(Clone, Debug)]
50pub struct ArrowArray {
51    inner: ArrowArrayRef,
52    dtype: DType,
53    stats_set: ArrayStats,
54}
55
56impl ArrowArray {
57    pub fn new(arrow_array: ArrowArrayRef, nullability: Nullability) -> Self {
58        let dtype = DType::from_arrow((arrow_array.data_type(), nullability));
59        Self {
60            inner: arrow_array,
61            dtype,
62            stats_set: Default::default(),
63        }
64    }
65
66    pub fn inner(&self) -> &ArrowArrayRef {
67        &self.inner
68    }
69}
70
71impl ArrayVTable<ArrowVTable> for ArrowVTable {
72    fn len(array: &ArrowArray) -> usize {
73        array.inner.len()
74    }
75
76    fn dtype(array: &ArrowArray) -> &DType {
77        &array.dtype
78    }
79
80    fn stats(array: &ArrowArray) -> StatsSetRef<'_> {
81        array.stats_set.to_ref(array.as_ref())
82    }
83}
84
85impl CanonicalVTable<ArrowVTable> for ArrowVTable {
86    fn canonicalize(array: &ArrowArray) -> VortexResult<Canonical> {
87        ArrayRef::from_arrow(array.inner.as_ref(), array.dtype.is_nullable()).to_canonical()
88    }
89}
90
91impl OperationsVTable<ArrowVTable> for ArrowVTable {
92    fn slice(array: &ArrowArray, start: usize, stop: usize) -> VortexResult<ArrayRef> {
93        let inner = array.inner.slice(start, stop - start);
94        let new_array = ArrowArray {
95            inner,
96            dtype: array.dtype.clone(),
97            stats_set: Default::default(),
98        };
99        Ok(new_array.into_array())
100    }
101
102    fn scalar_at(_array: &ArrowArray, _index: usize) -> VortexResult<Scalar> {
103        vortex_bail!("Not supported")
104    }
105}
106
107impl ValidityVTable<ArrowVTable> for ArrowVTable {
108    fn is_valid(array: &ArrowArray, index: usize) -> VortexResult<bool> {
109        Ok(array.inner.is_valid(index))
110    }
111
112    fn all_valid(array: &ArrowArray) -> VortexResult<bool> {
113        Ok(array.inner.logical_null_count() == 0)
114    }
115
116    fn all_invalid(array: &ArrowArray) -> VortexResult<bool> {
117        Ok(array.inner.logical_null_count() == array.inner.len())
118    }
119
120    fn validity_mask(array: &ArrowArray) -> VortexResult<Mask> {
121        Ok(array
122            .inner
123            .logical_nulls()
124            .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone()))
125            .unwrap_or_else(|| Mask::new_true(array.inner.len())))
126    }
127}
128
129impl VisitorVTable<ArrowVTable> for ArrowVTable {
130    fn visit_buffers(_array: &ArrowArray, _visitor: &mut dyn ArrayBufferVisitor) {}
131
132    fn visit_children(_array: &ArrowArray, _visitor: &mut dyn ArrayChildVisitor) {}
133}