vortex_array/arrow/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::hash::Hash;
6use std::ops::Range;
7
8use arrow_array::ArrayRef as ArrowArrayRef;
9use vortex_buffer::BufferHandle;
10use vortex_dtype::DType;
11use vortex_dtype::Nullability;
12use vortex_dtype::arrow::FromArrowType;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_panic;
16use vortex_mask::Mask;
17use vortex_scalar::Scalar;
18
19use crate::Array;
20use crate::ArrayBufferVisitor;
21use crate::ArrayChildVisitor;
22use crate::ArrayRef;
23use crate::Canonical;
24use crate::EmptyMetadata;
25use crate::IntoArray;
26use crate::Precision;
27use crate::arrow::FromArrowArray;
28use crate::serde::ArrayChildren;
29use crate::stats::ArrayStats;
30use crate::stats::StatsSetRef;
31use crate::vtable;
32use crate::vtable::ArrayId;
33use crate::vtable::ArrayVTable;
34use crate::vtable::ArrayVTableExt;
35use crate::vtable::BaseArrayVTable;
36use crate::vtable::CanonicalVTable;
37use crate::vtable::NotSupported;
38use crate::vtable::OperationsVTable;
39use crate::vtable::VTable;
40use crate::vtable::ValidityVTable;
41use crate::vtable::VisitorVTable;
42
43vtable!(Arrow);
44
45impl VTable for ArrowVTable {
46    type Array = ArrowArray;
47
48    type Metadata = EmptyMetadata;
49
50    type ArrayVTable = Self;
51    type CanonicalVTable = Self;
52    type OperationsVTable = Self;
53    type ValidityVTable = Self;
54    type VisitorVTable = Self;
55    type ComputeVTable = NotSupported;
56    type EncodeVTable = NotSupported;
57
58    fn id(&self) -> ArrayId {
59        ArrayId::new_ref("vortex.arrow")
60    }
61
62    fn encoding(_array: &Self::Array) -> ArrayVTable {
63        ArrowVTable.as_vtable()
64    }
65
66    fn metadata(_array: &Self::Array) -> VortexResult<Self::Metadata> {
67        Ok(EmptyMetadata)
68    }
69
70    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
71        Ok(None)
72    }
73
74    fn deserialize(_buffer: &[u8]) -> VortexResult<Self::Metadata> {
75        Ok(EmptyMetadata)
76    }
77
78    fn build(
79        &self,
80        _dtype: &DType,
81        _len: usize,
82        _metadata: &Self::Metadata,
83        _buffers: &[BufferHandle],
84        _children: &dyn ArrayChildren,
85    ) -> VortexResult<Self::Array> {
86        vortex_bail!("ArrowArray cannot be deserialized")
87    }
88}
89
90/// A Vortex array that wraps an in-memory Arrow array.
91// TODO(ngates): consider having each Arrow encoding be a separate encoding ID.
92#[derive(Debug)]
93pub struct ArrowVTable;
94
95#[derive(Clone, Debug)]
96pub struct ArrowArray {
97    inner: ArrowArrayRef,
98    dtype: DType,
99    stats_set: ArrayStats,
100}
101
102impl ArrowArray {
103    pub fn new(arrow_array: ArrowArrayRef, nullability: Nullability) -> Self {
104        let dtype = DType::from_arrow((arrow_array.data_type(), nullability));
105        Self {
106            inner: arrow_array,
107            dtype,
108            stats_set: Default::default(),
109        }
110    }
111
112    pub fn inner(&self) -> &ArrowArrayRef {
113        &self.inner
114    }
115}
116
117impl BaseArrayVTable<ArrowVTable> for ArrowVTable {
118    fn len(array: &ArrowArray) -> usize {
119        array.inner.len()
120    }
121
122    fn dtype(array: &ArrowArray) -> &DType {
123        &array.dtype
124    }
125
126    fn stats(array: &ArrowArray) -> StatsSetRef<'_> {
127        array.stats_set.to_ref(array.as_ref())
128    }
129
130    fn array_hash<H: std::hash::Hasher>(array: &ArrowArray, state: &mut H, _precision: Precision) {
131        array.dtype.hash(state);
132        // Hash based on pointer to the inner Arrow array since Arrow doesn't support hashing.
133        std::sync::Arc::as_ptr(&array.inner).hash(state);
134    }
135
136    fn array_eq(array: &ArrowArray, other: &ArrowArray, _precision: Precision) -> bool {
137        array.dtype == other.dtype && std::sync::Arc::ptr_eq(&array.inner, &other.inner)
138    }
139}
140
141impl CanonicalVTable<ArrowVTable> for ArrowVTable {
142    fn canonicalize(array: &ArrowArray) -> Canonical {
143        ArrayRef::from_arrow(array.inner.as_ref(), array.dtype.is_nullable()).to_canonical()
144    }
145}
146
147impl OperationsVTable<ArrowVTable> for ArrowVTable {
148    fn slice(array: &ArrowArray, range: Range<usize>) -> ArrayRef {
149        let inner = array.inner.slice(range.start, range.len());
150        let new_array = ArrowArray {
151            inner,
152            dtype: array.dtype.clone(),
153            stats_set: Default::default(),
154        };
155        new_array.into_array()
156    }
157
158    fn scalar_at(_array: &ArrowArray, _index: usize) -> Scalar {
159        vortex_panic!("Not supported")
160    }
161}
162
163impl ValidityVTable<ArrowVTable> for ArrowVTable {
164    fn is_valid(array: &ArrowArray, index: usize) -> bool {
165        array.inner.is_valid(index)
166    }
167
168    fn all_valid(array: &ArrowArray) -> bool {
169        array.inner.logical_null_count() == 0
170    }
171
172    fn all_invalid(array: &ArrowArray) -> bool {
173        array.inner.logical_null_count() == array.inner.len()
174    }
175
176    fn validity_mask(array: &ArrowArray) -> Mask {
177        array
178            .inner
179            .logical_nulls()
180            .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone().into()))
181            .unwrap_or_else(|| Mask::new_true(array.inner.len()))
182    }
183}
184
185impl VisitorVTable<ArrowVTable> for ArrowVTable {
186    fn visit_buffers(_array: &ArrowArray, _visitor: &mut dyn ArrayBufferVisitor) {}
187
188    fn visit_children(_array: &ArrowArray, _visitor: &mut dyn ArrayChildVisitor) {}
189}