vortex_array/arrow/
array.rs1use std::fmt::Debug;
5use std::hash::Hash;
6use std::ops::Range;
7
8use arrow_array::ArrayRef as ArrowArrayRef;
9use vortex_buffer::BufferHandle;
10use vortex_dtype::DType;
11use vortex_dtype::Nullability;
12use vortex_dtype::arrow::FromArrowType;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_panic;
16use vortex_mask::Mask;
17use vortex_scalar::Scalar;
18
19use crate::Array;
20use crate::ArrayBufferVisitor;
21use crate::ArrayChildVisitor;
22use crate::ArrayRef;
23use crate::Canonical;
24use crate::EmptyMetadata;
25use crate::IntoArray;
26use crate::Precision;
27use crate::arrow::FromArrowArray;
28use crate::serde::ArrayChildren;
29use crate::stats::ArrayStats;
30use crate::stats::StatsSetRef;
31use crate::vtable;
32use crate::vtable::ArrayId;
33use crate::vtable::ArrayVTable;
34use crate::vtable::ArrayVTableExt;
35use crate::vtable::BaseArrayVTable;
36use crate::vtable::CanonicalVTable;
37use crate::vtable::NotSupported;
38use crate::vtable::OperationsVTable;
39use crate::vtable::VTable;
40use crate::vtable::ValidityVTable;
41use crate::vtable::VisitorVTable;
42
43vtable!(Arrow);
44
45impl VTable for ArrowVTable {
46 type Array = ArrowArray;
47
48 type Metadata = EmptyMetadata;
49
50 type ArrayVTable = Self;
51 type CanonicalVTable = Self;
52 type OperationsVTable = Self;
53 type ValidityVTable = Self;
54 type VisitorVTable = Self;
55 type ComputeVTable = NotSupported;
56 type EncodeVTable = NotSupported;
57
58 fn id(&self) -> ArrayId {
59 ArrayId::new_ref("vortex.arrow")
60 }
61
62 fn encoding(_array: &Self::Array) -> ArrayVTable {
63 ArrowVTable.as_vtable()
64 }
65
66 fn metadata(_array: &Self::Array) -> VortexResult<Self::Metadata> {
67 Ok(EmptyMetadata)
68 }
69
70 fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
71 Ok(None)
72 }
73
74 fn deserialize(_buffer: &[u8]) -> VortexResult<Self::Metadata> {
75 Ok(EmptyMetadata)
76 }
77
78 fn build(
79 &self,
80 _dtype: &DType,
81 _len: usize,
82 _metadata: &Self::Metadata,
83 _buffers: &[BufferHandle],
84 _children: &dyn ArrayChildren,
85 ) -> VortexResult<Self::Array> {
86 vortex_bail!("ArrowArray cannot be deserialized")
87 }
88}
89
90#[derive(Debug)]
93pub struct ArrowVTable;
94
95#[derive(Clone, Debug)]
96pub struct ArrowArray {
97 inner: ArrowArrayRef,
98 dtype: DType,
99 stats_set: ArrayStats,
100}
101
102impl ArrowArray {
103 pub fn new(arrow_array: ArrowArrayRef, nullability: Nullability) -> Self {
104 let dtype = DType::from_arrow((arrow_array.data_type(), nullability));
105 Self {
106 inner: arrow_array,
107 dtype,
108 stats_set: Default::default(),
109 }
110 }
111
112 pub fn inner(&self) -> &ArrowArrayRef {
113 &self.inner
114 }
115}
116
117impl BaseArrayVTable<ArrowVTable> for ArrowVTable {
118 fn len(array: &ArrowArray) -> usize {
119 array.inner.len()
120 }
121
122 fn dtype(array: &ArrowArray) -> &DType {
123 &array.dtype
124 }
125
126 fn stats(array: &ArrowArray) -> StatsSetRef<'_> {
127 array.stats_set.to_ref(array.as_ref())
128 }
129
130 fn array_hash<H: std::hash::Hasher>(array: &ArrowArray, state: &mut H, _precision: Precision) {
131 array.dtype.hash(state);
132 std::sync::Arc::as_ptr(&array.inner).hash(state);
134 }
135
136 fn array_eq(array: &ArrowArray, other: &ArrowArray, _precision: Precision) -> bool {
137 array.dtype == other.dtype && std::sync::Arc::ptr_eq(&array.inner, &other.inner)
138 }
139}
140
141impl CanonicalVTable<ArrowVTable> for ArrowVTable {
142 fn canonicalize(array: &ArrowArray) -> Canonical {
143 ArrayRef::from_arrow(array.inner.as_ref(), array.dtype.is_nullable()).to_canonical()
144 }
145}
146
147impl OperationsVTable<ArrowVTable> for ArrowVTable {
148 fn slice(array: &ArrowArray, range: Range<usize>) -> ArrayRef {
149 let inner = array.inner.slice(range.start, range.len());
150 let new_array = ArrowArray {
151 inner,
152 dtype: array.dtype.clone(),
153 stats_set: Default::default(),
154 };
155 new_array.into_array()
156 }
157
158 fn scalar_at(_array: &ArrowArray, _index: usize) -> Scalar {
159 vortex_panic!("Not supported")
160 }
161}
162
163impl ValidityVTable<ArrowVTable> for ArrowVTable {
164 fn is_valid(array: &ArrowArray, index: usize) -> bool {
165 array.inner.is_valid(index)
166 }
167
168 fn all_valid(array: &ArrowArray) -> bool {
169 array.inner.logical_null_count() == 0
170 }
171
172 fn all_invalid(array: &ArrowArray) -> bool {
173 array.inner.logical_null_count() == array.inner.len()
174 }
175
176 fn validity_mask(array: &ArrowArray) -> Mask {
177 array
178 .inner
179 .logical_nulls()
180 .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone().into()))
181 .unwrap_or_else(|| Mask::new_true(array.inner.len()))
182 }
183}
184
185impl VisitorVTable<ArrowVTable> for ArrowVTable {
186 fn visit_buffers(_array: &ArrowArray, _visitor: &mut dyn ArrayBufferVisitor) {}
187
188 fn visit_children(_array: &ArrowArray, _visitor: &mut dyn ArrayChildVisitor) {}
189}