vortex_array/arrow/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::ops::Range;
6
7use arrow_array::ArrayRef as ArrowArrayRef;
8use vortex_dtype::arrow::FromArrowType;
9use vortex_dtype::{DType, Nullability};
10use vortex_error::{VortexResult, vortex_panic};
11use vortex_mask::Mask;
12use vortex_scalar::Scalar;
13
14use crate::arrow::FromArrowArray;
15use crate::stats::{ArrayStats, StatsSetRef};
16use crate::vtable::{
17    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityVTable,
18    VisitorVTable,
19};
20use crate::{
21    Array, ArrayBufferVisitor, ArrayChildVisitor, ArrayRef, Canonical, EncodingId, EncodingRef,
22    IntoArray, vtable,
23};
24
25vtable!(Arrow);
26
27impl VTable for ArrowVTable {
28    type Array = ArrowArray;
29    type Encoding = ArrowEncoding;
30    type ArrayVTable = Self;
31    type CanonicalVTable = Self;
32    type OperationsVTable = Self;
33    type ValidityVTable = Self;
34    type VisitorVTable = Self;
35    type ComputeVTable = NotSupported;
36    type EncodeVTable = NotSupported;
37    type PipelineVTable = NotSupported;
38    type SerdeVTable = NotSupported;
39
40    fn id(_encoding: &Self::Encoding) -> EncodingId {
41        EncodingId::new_ref("vortex.arrow")
42    }
43
44    fn encoding(_array: &Self::Array) -> EncodingRef {
45        EncodingRef::new_ref(ArrowEncoding.as_ref())
46    }
47}
48
49/// A Vortex array that wraps an in-memory Arrow array.
50// TODO(ngates): consider having each Arrow encoding be a separate encoding ID.
51#[derive(Clone, Debug)]
52pub struct ArrowEncoding;
53
54#[derive(Clone, Debug)]
55pub struct ArrowArray {
56    inner: ArrowArrayRef,
57    dtype: DType,
58    stats_set: ArrayStats,
59}
60
61impl ArrowArray {
62    pub fn new(arrow_array: ArrowArrayRef, nullability: Nullability) -> Self {
63        let dtype = DType::from_arrow((arrow_array.data_type(), nullability));
64        Self {
65            inner: arrow_array,
66            dtype,
67            stats_set: Default::default(),
68        }
69    }
70
71    pub fn inner(&self) -> &ArrowArrayRef {
72        &self.inner
73    }
74}
75
76impl ArrayVTable<ArrowVTable> for ArrowVTable {
77    fn len(array: &ArrowArray) -> usize {
78        array.inner.len()
79    }
80
81    fn dtype(array: &ArrowArray) -> &DType {
82        &array.dtype
83    }
84
85    fn stats(array: &ArrowArray) -> StatsSetRef<'_> {
86        array.stats_set.to_ref(array.as_ref())
87    }
88}
89
90impl CanonicalVTable<ArrowVTable> for ArrowVTable {
91    fn canonicalize(array: &ArrowArray) -> VortexResult<Canonical> {
92        ArrayRef::from_arrow(array.inner.as_ref(), array.dtype.is_nullable()).to_canonical()
93    }
94}
95
96impl OperationsVTable<ArrowVTable> for ArrowVTable {
97    fn slice(array: &ArrowArray, range: Range<usize>) -> ArrayRef {
98        let inner = array.inner.slice(range.start, range.len());
99        let new_array = ArrowArray {
100            inner,
101            dtype: array.dtype.clone(),
102            stats_set: Default::default(),
103        };
104        new_array.into_array()
105    }
106
107    fn scalar_at(_array: &ArrowArray, _index: usize) -> Scalar {
108        vortex_panic!("Not supported")
109    }
110}
111
112impl ValidityVTable<ArrowVTable> for ArrowVTable {
113    fn is_valid(array: &ArrowArray, index: usize) -> bool {
114        array.inner.is_valid(index)
115    }
116
117    fn all_valid(array: &ArrowArray) -> bool {
118        array.inner.logical_null_count() == 0
119    }
120
121    fn all_invalid(array: &ArrowArray) -> bool {
122        array.inner.logical_null_count() == array.inner.len()
123    }
124
125    fn validity_mask(array: &ArrowArray) -> Mask {
126        array
127            .inner
128            .logical_nulls()
129            .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone()))
130            .unwrap_or_else(|| Mask::new_true(array.inner.len()))
131    }
132}
133
134impl VisitorVTable<ArrowVTable> for ArrowVTable {
135    fn visit_buffers(_array: &ArrowArray, _visitor: &mut dyn ArrayBufferVisitor) {}
136
137    fn visit_children(_array: &ArrowArray, _visitor: &mut dyn ArrayChildVisitor) {}
138}