vortex_array/arrow/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5
6use arrow_array::ArrayRef as ArrowArrayRef;
7use vortex_dtype::arrow::FromArrowType;
8use vortex_dtype::{DType, Nullability};
9use vortex_error::{VortexResult, vortex_panic};
10use vortex_mask::Mask;
11use vortex_scalar::Scalar;
12
13use crate::arrow::FromArrowArray;
14use crate::stats::{ArrayStats, StatsSetRef};
15use crate::vtable::{
16    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityVTable,
17    VisitorVTable,
18};
19use crate::{
20    Array, ArrayBufferVisitor, ArrayChildVisitor, ArrayRef, Canonical, EncodingId, EncodingRef,
21    IntoArray, vtable,
22};
23
24vtable!(Arrow);
25
26impl VTable for ArrowVTable {
27    type Array = ArrowArray;
28    type Encoding = ArrowEncoding;
29    type ArrayVTable = Self;
30    type CanonicalVTable = Self;
31    type OperationsVTable = Self;
32    type ValidityVTable = Self;
33    type VisitorVTable = Self;
34    type ComputeVTable = NotSupported;
35    type EncodeVTable = NotSupported;
36    type SerdeVTable = NotSupported;
37
38    fn id(_encoding: &Self::Encoding) -> EncodingId {
39        EncodingId::new_ref("vortex.arrow")
40    }
41
42    fn encoding(_array: &Self::Array) -> EncodingRef {
43        EncodingRef::new_ref(ArrowEncoding.as_ref())
44    }
45}
46
47/// A Vortex array that wraps an in-memory Arrow array.
48// TODO(ngates): consider having each Arrow encoding be a separate encoding ID.
49#[derive(Clone, Debug)]
50pub struct ArrowEncoding;
51
52#[derive(Clone, Debug)]
53pub struct ArrowArray {
54    inner: ArrowArrayRef,
55    dtype: DType,
56    stats_set: ArrayStats,
57}
58
59impl ArrowArray {
60    pub fn new(arrow_array: ArrowArrayRef, nullability: Nullability) -> Self {
61        let dtype = DType::from_arrow((arrow_array.data_type(), nullability));
62        Self {
63            inner: arrow_array,
64            dtype,
65            stats_set: Default::default(),
66        }
67    }
68
69    pub fn inner(&self) -> &ArrowArrayRef {
70        &self.inner
71    }
72}
73
74impl ArrayVTable<ArrowVTable> for ArrowVTable {
75    fn len(array: &ArrowArray) -> usize {
76        array.inner.len()
77    }
78
79    fn dtype(array: &ArrowArray) -> &DType {
80        &array.dtype
81    }
82
83    fn stats(array: &ArrowArray) -> StatsSetRef<'_> {
84        array.stats_set.to_ref(array.as_ref())
85    }
86}
87
88impl CanonicalVTable<ArrowVTable> for ArrowVTable {
89    fn canonicalize(array: &ArrowArray) -> VortexResult<Canonical> {
90        ArrayRef::from_arrow(array.inner.as_ref(), array.dtype.is_nullable()).to_canonical()
91    }
92}
93
94impl OperationsVTable<ArrowVTable> for ArrowVTable {
95    fn slice(array: &ArrowArray, start: usize, stop: usize) -> ArrayRef {
96        let inner = array.inner.slice(start, stop - start);
97        let new_array = ArrowArray {
98            inner,
99            dtype: array.dtype.clone(),
100            stats_set: Default::default(),
101        };
102        new_array.into_array()
103    }
104
105    fn scalar_at(_array: &ArrowArray, _index: usize) -> Scalar {
106        vortex_panic!("Not supported")
107    }
108}
109
110impl ValidityVTable<ArrowVTable> for ArrowVTable {
111    fn is_valid(array: &ArrowArray, index: usize) -> VortexResult<bool> {
112        Ok(array.inner.is_valid(index))
113    }
114
115    fn all_valid(array: &ArrowArray) -> VortexResult<bool> {
116        Ok(array.inner.logical_null_count() == 0)
117    }
118
119    fn all_invalid(array: &ArrowArray) -> VortexResult<bool> {
120        Ok(array.inner.logical_null_count() == array.inner.len())
121    }
122
123    fn validity_mask(array: &ArrowArray) -> VortexResult<Mask> {
124        Ok(array
125            .inner
126            .logical_nulls()
127            .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone()))
128            .unwrap_or_else(|| Mask::new_true(array.inner.len())))
129    }
130}
131
132impl VisitorVTable<ArrowVTable> for ArrowVTable {
133    fn visit_buffers(_array: &ArrowArray, _visitor: &mut dyn ArrayBufferVisitor) {}
134
135    fn visit_children(_array: &ArrowArray, _visitor: &mut dyn ArrayChildVisitor) {}
136}