vortex_array/arrow/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use arrow_array::ArrayRef as ArrowArrayRef;
7use vortex_dtype::arrow::FromArrowType;
8use vortex_dtype::{DType, Nullability};
9use vortex_error::{VortexResult, vortex_panic};
10use vortex_mask::Mask;
11use vortex_scalar::Scalar;
12
13use crate::arrow::FromArrowArray;
14use crate::stats::{ArrayStats, StatsSetRef};
15use crate::vtable::{
16    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityVTable,
17    VisitorVTable,
18};
19use crate::{
20    Array, ArrayBufferVisitor, ArrayChildVisitor, ArrayRef, Canonical, EncodingId, EncodingRef,
21    IntoArray, vtable,
22};
23
24vtable!(Arrow);
25
26impl VTable for ArrowVTable {
27    type Array = ArrowArray;
28    type Encoding = ArrowEncoding;
29    type ArrayVTable = Self;
30    type CanonicalVTable = Self;
31    type OperationsVTable = Self;
32    type ValidityVTable = Self;
33    type VisitorVTable = Self;
34    type ComputeVTable = NotSupported;
35    type EncodeVTable = NotSupported;
36    type PipelineVTable = NotSupported;
37    type SerdeVTable = NotSupported;
38
39    fn id(_encoding: &Self::Encoding) -> EncodingId {
40        EncodingId::new_ref("vortex.arrow")
41    }
42
43    fn encoding(_array: &Self::Array) -> EncodingRef {
44        EncodingRef::new_ref(ArrowEncoding.as_ref())
45    }
46}
47
48/// A Vortex array that wraps an in-memory Arrow array.
49// TODO(ngates): consider having each Arrow encoding be a separate encoding ID.
50#[derive(Clone, Debug)]
51pub struct ArrowEncoding;
52
53#[derive(Clone, Debug)]
54pub struct ArrowArray {
55    inner: ArrowArrayRef,
56    dtype: DType,
57    stats_set: ArrayStats,
58}
59
60impl ArrowArray {
61    pub fn new(arrow_array: ArrowArrayRef, nullability: Nullability) -> Self {
62        let dtype = DType::from_arrow((arrow_array.data_type(), nullability));
63        Self {
64            inner: arrow_array,
65            dtype,
66            stats_set: Default::default(),
67        }
68    }
69
70    pub fn inner(&self) -> &ArrowArrayRef {
71        &self.inner
72    }
73}
74
75impl ArrayVTable<ArrowVTable> for ArrowVTable {
76    fn len(array: &ArrowArray) -> usize {
77        array.inner.len()
78    }
79
80    fn dtype(array: &ArrowArray) -> &DType {
81        &array.dtype
82    }
83
84    fn stats(array: &ArrowArray) -> StatsSetRef<'_> {
85        array.stats_set.to_ref(array.as_ref())
86    }
87}
88
89impl CanonicalVTable<ArrowVTable> for ArrowVTable {
90    fn canonicalize(array: &ArrowArray) -> VortexResult<Canonical> {
91        ArrayRef::from_arrow(array.inner.as_ref(), array.dtype.is_nullable()).to_canonical()
92    }
93}
94
95impl OperationsVTable<ArrowVTable> for ArrowVTable {
96    fn slice(array: &ArrowArray, start: usize, stop: usize) -> ArrayRef {
97        let inner = array.inner.slice(start, stop - start);
98        let new_array = ArrowArray {
99            inner,
100            dtype: array.dtype.clone(),
101            stats_set: Default::default(),
102        };
103        new_array.into_array()
104    }
105
106    fn scalar_at(_array: &ArrowArray, _index: usize) -> Scalar {
107        vortex_panic!("Not supported")
108    }
109}
110
111impl ValidityVTable<ArrowVTable> for ArrowVTable {
112    fn is_valid(array: &ArrowArray, index: usize) -> VortexResult<bool> {
113        Ok(array.inner.is_valid(index))
114    }
115
116    fn all_valid(array: &ArrowArray) -> VortexResult<bool> {
117        Ok(array.inner.logical_null_count() == 0)
118    }
119
120    fn all_invalid(array: &ArrowArray) -> VortexResult<bool> {
121        Ok(array.inner.logical_null_count() == array.inner.len())
122    }
123
124    fn validity_mask(array: &ArrowArray) -> VortexResult<Mask> {
125        Ok(array
126            .inner
127            .logical_nulls()
128            .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone()))
129            .unwrap_or_else(|| Mask::new_true(array.inner.len())))
130    }
131}
132
133impl VisitorVTable<ArrowVTable> for ArrowVTable {
134    fn visit_buffers(_array: &ArrowArray, _visitor: &mut dyn ArrayBufferVisitor) {}
135
136    fn visit_children(_array: &ArrowArray, _visitor: &mut dyn ArrayChildVisitor) {}
137}