vortex_array/arrow/
array.rs1use std::fmt::Debug;
5use std::hash::Hash;
6use std::ops::Range;
7
8use arrow_array::ArrayRef as ArrowArrayRef;
9use vortex_buffer::BitBuffer;
10use vortex_dtype::DType;
11use vortex_dtype::Nullability;
12use vortex_dtype::arrow::FromArrowType;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_panic;
17use vortex_mask::Mask;
18use vortex_scalar::Scalar;
19
20use crate::Array;
21use crate::ArrayBufferVisitor;
22use crate::ArrayChildVisitor;
23use crate::ArrayRef;
24use crate::Canonical;
25use crate::EmptyMetadata;
26use crate::IntoArray;
27use crate::Precision;
28use crate::arrays::BoolArray;
29use crate::arrow::FromArrowArray;
30use crate::buffer::BufferHandle;
31use crate::serde::ArrayChildren;
32use crate::stats::ArrayStats;
33use crate::stats::StatsSetRef;
34use crate::validity::Validity;
35use crate::vtable;
36use crate::vtable::ArrayId;
37use crate::vtable::ArrayVTable;
38use crate::vtable::ArrayVTableExt;
39use crate::vtable::BaseArrayVTable;
40use crate::vtable::CanonicalVTable;
41use crate::vtable::NotSupported;
42use crate::vtable::OperationsVTable;
43use crate::vtable::VTable;
44use crate::vtable::ValidityVTable;
45use crate::vtable::VisitorVTable;
46
47vtable!(Arrow);
48
49impl VTable for ArrowVTable {
50 type Array = ArrowArray;
51
52 type Metadata = EmptyMetadata;
53
54 type ArrayVTable = Self;
55 type CanonicalVTable = Self;
56 type OperationsVTable = Self;
57 type ValidityVTable = Self;
58 type VisitorVTable = Self;
59 type ComputeVTable = NotSupported;
60 type EncodeVTable = NotSupported;
61
62 fn id(&self) -> ArrayId {
63 ArrayId::new_ref("vortex.arrow")
64 }
65
66 fn encoding(_array: &Self::Array) -> ArrayVTable {
67 ArrowVTable.as_vtable()
68 }
69
70 fn metadata(_array: &Self::Array) -> VortexResult<Self::Metadata> {
71 Ok(EmptyMetadata)
72 }
73
74 fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
75 Ok(None)
76 }
77
78 fn deserialize(_buffer: &[u8]) -> VortexResult<Self::Metadata> {
79 Ok(EmptyMetadata)
80 }
81
82 fn build(
83 &self,
84 _dtype: &DType,
85 _len: usize,
86 _metadata: &Self::Metadata,
87 _buffers: &[BufferHandle],
88 _children: &dyn ArrayChildren,
89 ) -> VortexResult<Self::Array> {
90 vortex_bail!("ArrowArray cannot be deserialized")
91 }
92
93 fn with_children(_array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
94 vortex_ensure!(
95 children.is_empty(),
96 "ArrowArray has no children, got {}",
97 children.len()
98 );
99 Ok(())
100 }
101}
102
103#[derive(Debug)]
106pub struct ArrowVTable;
107
108#[derive(Clone, Debug)]
109pub struct ArrowArray {
110 inner: ArrowArrayRef,
111 dtype: DType,
112 stats_set: ArrayStats,
113}
114
115impl ArrowArray {
116 pub fn new(arrow_array: ArrowArrayRef, nullability: Nullability) -> Self {
117 let dtype = DType::from_arrow((arrow_array.data_type(), nullability));
118 Self {
119 inner: arrow_array,
120 dtype,
121 stats_set: Default::default(),
122 }
123 }
124
125 pub fn inner(&self) -> &ArrowArrayRef {
126 &self.inner
127 }
128}
129
130impl BaseArrayVTable<ArrowVTable> for ArrowVTable {
131 fn len(array: &ArrowArray) -> usize {
132 array.inner.len()
133 }
134
135 fn dtype(array: &ArrowArray) -> &DType {
136 &array.dtype
137 }
138
139 fn stats(array: &ArrowArray) -> StatsSetRef<'_> {
140 array.stats_set.to_ref(array.as_ref())
141 }
142
143 fn array_hash<H: std::hash::Hasher>(array: &ArrowArray, state: &mut H, _precision: Precision) {
144 array.dtype.hash(state);
145 std::sync::Arc::as_ptr(&array.inner).hash(state);
147 }
148
149 fn array_eq(array: &ArrowArray, other: &ArrowArray, _precision: Precision) -> bool {
150 array.dtype == other.dtype && std::sync::Arc::ptr_eq(&array.inner, &other.inner)
151 }
152}
153
154impl CanonicalVTable<ArrowVTable> for ArrowVTable {
155 fn canonicalize(array: &ArrowArray) -> Canonical {
156 ArrayRef::from_arrow(array.inner.as_ref(), array.dtype.is_nullable()).to_canonical()
157 }
158}
159
160impl OperationsVTable<ArrowVTable> for ArrowVTable {
161 fn slice(array: &ArrowArray, range: Range<usize>) -> ArrayRef {
162 let inner = array.inner.slice(range.start, range.len());
163 let new_array = ArrowArray {
164 inner,
165 dtype: array.dtype.clone(),
166 stats_set: Default::default(),
167 };
168 new_array.into_array()
169 }
170
171 fn scalar_at(_array: &ArrowArray, _index: usize) -> Scalar {
172 vortex_panic!("Not supported")
173 }
174}
175
176impl ValidityVTable<ArrowVTable> for ArrowVTable {
177 fn is_valid(array: &ArrowArray, index: usize) -> bool {
178 array.inner.is_valid(index)
179 }
180
181 fn all_valid(array: &ArrowArray) -> bool {
182 array.inner.logical_null_count() == 0
183 }
184
185 fn all_invalid(array: &ArrowArray) -> bool {
186 array.inner.logical_null_count() == array.inner.len()
187 }
188
189 fn validity(array: &ArrowArray) -> VortexResult<Validity> {
190 Ok(match array.inner.logical_nulls() {
191 None => Validity::AllValid,
192 Some(null_buffer) => match null_buffer.null_count() {
193 0 => Validity::AllValid,
194 n if n == array.inner.len() => Validity::AllInvalid,
195 _ => Validity::Array(
196 BoolArray::new(
197 BitBuffer::from(null_buffer.inner().clone()),
198 Validity::NonNullable,
199 )
200 .into_array(),
201 ),
202 },
203 })
204 }
205
206 fn validity_mask(array: &ArrowArray) -> Mask {
207 array
208 .inner
209 .logical_nulls()
210 .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone().into()))
211 .unwrap_or_else(|| Mask::new_true(array.inner.len()))
212 }
213}
214
215impl VisitorVTable<ArrowVTable> for ArrowVTable {
216 fn visit_buffers(_array: &ArrowArray, _visitor: &mut dyn ArrayBufferVisitor) {}
217
218 fn visit_children(_array: &ArrowArray, _visitor: &mut dyn ArrayChildVisitor) {}
219}