vortex_array/arrow/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::hash::Hash;
6use std::ops::Range;
7
8use arrow_array::ArrayRef as ArrowArrayRef;
9use vortex_buffer::BitBuffer;
10use vortex_dtype::DType;
11use vortex_dtype::Nullability;
12use vortex_dtype::arrow::FromArrowType;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_panic;
17use vortex_mask::Mask;
18use vortex_scalar::Scalar;
19
20use crate::Array;
21use crate::ArrayBufferVisitor;
22use crate::ArrayChildVisitor;
23use crate::ArrayRef;
24use crate::Canonical;
25use crate::EmptyMetadata;
26use crate::IntoArray;
27use crate::Precision;
28use crate::arrays::BoolArray;
29use crate::arrow::FromArrowArray;
30use crate::buffer::BufferHandle;
31use crate::serde::ArrayChildren;
32use crate::stats::ArrayStats;
33use crate::stats::StatsSetRef;
34use crate::validity::Validity;
35use crate::vtable;
36use crate::vtable::ArrayId;
37use crate::vtable::ArrayVTable;
38use crate::vtable::ArrayVTableExt;
39use crate::vtable::BaseArrayVTable;
40use crate::vtable::CanonicalVTable;
41use crate::vtable::NotSupported;
42use crate::vtable::OperationsVTable;
43use crate::vtable::VTable;
44use crate::vtable::ValidityVTable;
45use crate::vtable::VisitorVTable;
46
47vtable!(Arrow);
48
49impl VTable for ArrowVTable {
50    type Array = ArrowArray;
51
52    type Metadata = EmptyMetadata;
53
54    type ArrayVTable = Self;
55    type CanonicalVTable = Self;
56    type OperationsVTable = Self;
57    type ValidityVTable = Self;
58    type VisitorVTable = Self;
59    type ComputeVTable = NotSupported;
60    type EncodeVTable = NotSupported;
61
62    fn id(&self) -> ArrayId {
63        ArrayId::new_ref("vortex.arrow")
64    }
65
66    fn encoding(_array: &Self::Array) -> ArrayVTable {
67        ArrowVTable.as_vtable()
68    }
69
70    fn metadata(_array: &Self::Array) -> VortexResult<Self::Metadata> {
71        Ok(EmptyMetadata)
72    }
73
74    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
75        Ok(None)
76    }
77
78    fn deserialize(_buffer: &[u8]) -> VortexResult<Self::Metadata> {
79        Ok(EmptyMetadata)
80    }
81
82    fn build(
83        &self,
84        _dtype: &DType,
85        _len: usize,
86        _metadata: &Self::Metadata,
87        _buffers: &[BufferHandle],
88        _children: &dyn ArrayChildren,
89    ) -> VortexResult<Self::Array> {
90        vortex_bail!("ArrowArray cannot be deserialized")
91    }
92
93    fn with_children(_array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
94        vortex_ensure!(
95            children.is_empty(),
96            "ArrowArray has no children, got {}",
97            children.len()
98        );
99        Ok(())
100    }
101}
102
103/// A Vortex array that wraps an in-memory Arrow array.
104// TODO(ngates): consider having each Arrow encoding be a separate encoding ID.
105#[derive(Debug)]
106pub struct ArrowVTable;
107
108#[derive(Clone, Debug)]
109pub struct ArrowArray {
110    inner: ArrowArrayRef,
111    dtype: DType,
112    stats_set: ArrayStats,
113}
114
115impl ArrowArray {
116    pub fn new(arrow_array: ArrowArrayRef, nullability: Nullability) -> Self {
117        let dtype = DType::from_arrow((arrow_array.data_type(), nullability));
118        Self {
119            inner: arrow_array,
120            dtype,
121            stats_set: Default::default(),
122        }
123    }
124
125    pub fn inner(&self) -> &ArrowArrayRef {
126        &self.inner
127    }
128}
129
130impl BaseArrayVTable<ArrowVTable> for ArrowVTable {
131    fn len(array: &ArrowArray) -> usize {
132        array.inner.len()
133    }
134
135    fn dtype(array: &ArrowArray) -> &DType {
136        &array.dtype
137    }
138
139    fn stats(array: &ArrowArray) -> StatsSetRef<'_> {
140        array.stats_set.to_ref(array.as_ref())
141    }
142
143    fn array_hash<H: std::hash::Hasher>(array: &ArrowArray, state: &mut H, _precision: Precision) {
144        array.dtype.hash(state);
145        // Hash based on pointer to the inner Arrow array since Arrow doesn't support hashing.
146        std::sync::Arc::as_ptr(&array.inner).hash(state);
147    }
148
149    fn array_eq(array: &ArrowArray, other: &ArrowArray, _precision: Precision) -> bool {
150        array.dtype == other.dtype && std::sync::Arc::ptr_eq(&array.inner, &other.inner)
151    }
152}
153
154impl CanonicalVTable<ArrowVTable> for ArrowVTable {
155    fn canonicalize(array: &ArrowArray) -> Canonical {
156        ArrayRef::from_arrow(array.inner.as_ref(), array.dtype.is_nullable()).to_canonical()
157    }
158}
159
160impl OperationsVTable<ArrowVTable> for ArrowVTable {
161    fn slice(array: &ArrowArray, range: Range<usize>) -> ArrayRef {
162        let inner = array.inner.slice(range.start, range.len());
163        let new_array = ArrowArray {
164            inner,
165            dtype: array.dtype.clone(),
166            stats_set: Default::default(),
167        };
168        new_array.into_array()
169    }
170
171    fn scalar_at(_array: &ArrowArray, _index: usize) -> Scalar {
172        vortex_panic!("Not supported")
173    }
174}
175
176impl ValidityVTable<ArrowVTable> for ArrowVTable {
177    fn is_valid(array: &ArrowArray, index: usize) -> bool {
178        array.inner.is_valid(index)
179    }
180
181    fn all_valid(array: &ArrowArray) -> bool {
182        array.inner.logical_null_count() == 0
183    }
184
185    fn all_invalid(array: &ArrowArray) -> bool {
186        array.inner.logical_null_count() == array.inner.len()
187    }
188
189    fn validity(array: &ArrowArray) -> VortexResult<Validity> {
190        Ok(match array.inner.logical_nulls() {
191            None => Validity::AllValid,
192            Some(null_buffer) => match null_buffer.null_count() {
193                0 => Validity::AllValid,
194                n if n == array.inner.len() => Validity::AllInvalid,
195                _ => Validity::Array(
196                    BoolArray::new(
197                        BitBuffer::from(null_buffer.inner().clone()),
198                        Validity::NonNullable,
199                    )
200                    .into_array(),
201                ),
202            },
203        })
204    }
205
206    fn validity_mask(array: &ArrowArray) -> Mask {
207        array
208            .inner
209            .logical_nulls()
210            .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone().into()))
211            .unwrap_or_else(|| Mask::new_true(array.inner.len()))
212    }
213}
214
215impl VisitorVTable<ArrowVTable> for ArrowVTable {
216    fn visit_buffers(_array: &ArrowArray, _visitor: &mut dyn ArrayBufferVisitor) {}
217
218    fn visit_children(_array: &ArrowArray, _visitor: &mut dyn ArrayChildVisitor) {}
219}