Skip to main content

vortex_array/arrow/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::hash::Hash;
6
7use arrow_array::ArrayRef as ArrowArrayRef;
8use vortex_buffer::BitBuffer;
9use vortex_dtype::DType;
10use vortex_dtype::Nullability;
11use vortex_dtype::arrow::FromArrowType;
12use vortex_error::VortexResult;
13use vortex_error::vortex_bail;
14use vortex_error::vortex_ensure;
15use vortex_session::VortexSession;
16
17use crate::ArrayBufferVisitor;
18use crate::ArrayChildVisitor;
19use crate::ArrayRef;
20use crate::EmptyMetadata;
21use crate::ExecutionCtx;
22use crate::IntoArray;
23use crate::Precision;
24use crate::arrays::BoolArray;
25use crate::arrow::FromArrowArray;
26use crate::buffer::BufferHandle;
27use crate::scalar::Scalar;
28use crate::serde::ArrayChildren;
29use crate::stats::ArrayStats;
30use crate::stats::StatsSetRef;
31use crate::validity::Validity;
32use crate::vtable;
33use crate::vtable::ArrayId;
34use crate::vtable::BaseArrayVTable;
35use crate::vtable::OperationsVTable;
36use crate::vtable::VTable;
37use crate::vtable::ValidityVTable;
38use crate::vtable::VisitorVTable;
39
40vtable!(Arrow);
41
42impl VTable for ArrowVTable {
43    type Array = ArrowArray;
44
45    type Metadata = EmptyMetadata;
46
47    type ArrayVTable = Self;
48    type OperationsVTable = Self;
49    type ValidityVTable = Self;
50    type VisitorVTable = Self;
51
52    fn id(_array: &Self::Array) -> ArrayId {
53        ArrowVTable::ID
54    }
55
56    fn metadata(_array: &Self::Array) -> VortexResult<Self::Metadata> {
57        Ok(EmptyMetadata)
58    }
59
60    fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
61        Ok(None)
62    }
63
64    fn deserialize(
65        _bytes: &[u8],
66        _dtype: &DType,
67        _len: usize,
68        _buffers: &[BufferHandle],
69        _session: &VortexSession,
70    ) -> VortexResult<Self::Metadata> {
71        Ok(EmptyMetadata)
72    }
73
74    fn build(
75        _dtype: &DType,
76        _len: usize,
77        _metadata: &Self::Metadata,
78        _buffers: &[BufferHandle],
79        _children: &dyn ArrayChildren,
80    ) -> VortexResult<Self::Array> {
81        vortex_bail!("ArrowArray cannot be deserialized")
82    }
83
84    fn with_children(_array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
85        vortex_ensure!(
86            children.is_empty(),
87            "ArrowArray has no children, got {}",
88            children.len()
89        );
90        Ok(())
91    }
92
93    fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
94        ArrayRef::from_arrow(array.inner.as_ref(), array.dtype.is_nullable())
95    }
96}
97
98/// A Vortex array that wraps an in-memory Arrow array.
99// TODO(ngates): consider having each Arrow encoding be a separate encoding ID.
100#[derive(Debug)]
101pub struct ArrowVTable;
102
103impl ArrowVTable {
104    pub const ID: ArrayId = ArrayId::new_ref("vortex.arrow");
105}
106
107#[derive(Clone, Debug)]
108pub struct ArrowArray {
109    inner: ArrowArrayRef,
110    dtype: DType,
111    stats_set: ArrayStats,
112}
113
114impl ArrowArray {
115    pub fn new(arrow_array: ArrowArrayRef, nullability: Nullability) -> Self {
116        let dtype = DType::from_arrow((arrow_array.data_type(), nullability));
117        Self {
118            inner: arrow_array,
119            dtype,
120            stats_set: Default::default(),
121        }
122    }
123
124    pub fn inner(&self) -> &ArrowArrayRef {
125        &self.inner
126    }
127}
128
129impl BaseArrayVTable<ArrowVTable> for ArrowVTable {
130    fn len(array: &ArrowArray) -> usize {
131        array.inner.len()
132    }
133
134    fn dtype(array: &ArrowArray) -> &DType {
135        &array.dtype
136    }
137
138    fn stats(array: &ArrowArray) -> StatsSetRef<'_> {
139        array.stats_set.to_ref(array.as_ref())
140    }
141
142    fn array_hash<H: std::hash::Hasher>(array: &ArrowArray, state: &mut H, _precision: Precision) {
143        array.dtype.hash(state);
144        // Hash based on pointer to the inner Arrow array since Arrow doesn't support hashing.
145        std::sync::Arc::as_ptr(&array.inner).hash(state);
146    }
147
148    fn array_eq(array: &ArrowArray, other: &ArrowArray, _precision: Precision) -> bool {
149        array.dtype == other.dtype && std::sync::Arc::ptr_eq(&array.inner, &other.inner)
150    }
151}
152
153impl OperationsVTable<ArrowVTable> for ArrowVTable {
154    fn scalar_at(_array: &ArrowArray, _index: usize) -> VortexResult<Scalar> {
155        vortex_bail!("ArrowArray does not support scalar_at")
156    }
157}
158
159impl ValidityVTable<ArrowVTable> for ArrowVTable {
160    fn validity(array: &ArrowArray) -> VortexResult<Validity> {
161        Ok(match array.inner.logical_nulls() {
162            None => Validity::AllValid,
163            Some(null_buffer) => match null_buffer.null_count() {
164                0 => Validity::AllValid,
165                n if n == array.inner.len() => Validity::AllInvalid,
166                _ => Validity::Array(
167                    BoolArray::new(
168                        BitBuffer::from(null_buffer.inner().clone()),
169                        Validity::NonNullable,
170                    )
171                    .into_array(),
172                ),
173            },
174        })
175    }
176}
177
178impl VisitorVTable<ArrowVTable> for ArrowVTable {
179    fn visit_buffers(_array: &ArrowArray, _visitor: &mut dyn ArrayBufferVisitor) {}
180
181    fn visit_children(_array: &ArrowArray, _visitor: &mut dyn ArrayChildVisitor) {}
182
183    fn nchildren(_array: &ArrowArray) -> usize {
184        0
185    }
186
187    fn nth_child(_array: &ArrowArray, _idx: usize) -> Option<ArrayRef> {
188        None
189    }
190}