vortex_array/arrow/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::hash::Hash;
6use std::ops::Range;
7
8use arrow_array::ArrayRef as ArrowArrayRef;
9use vortex_dtype::arrow::FromArrowType;
10use vortex_dtype::{DType, Nullability};
11use vortex_error::vortex_panic;
12use vortex_mask::Mask;
13use vortex_scalar::Scalar;
14
15use crate::arrow::FromArrowArray;
16use crate::stats::{ArrayStats, StatsSetRef};
17use crate::vtable::{
18    ArrayVTable, CanonicalVTable, NotSupported, OperationsVTable, VTable, ValidityVTable,
19    VisitorVTable,
20};
21use crate::{
22    Array, ArrayBufferVisitor, ArrayChildVisitor, ArrayRef, Canonical, EncodingId, EncodingRef,
23    IntoArray, Precision, vtable,
24};
25
26vtable!(Arrow);
27
28impl VTable for ArrowVTable {
29    type Array = ArrowArray;
30    type Encoding = ArrowEncoding;
31    type ArrayVTable = Self;
32    type CanonicalVTable = Self;
33    type OperationsVTable = Self;
34    type ValidityVTable = Self;
35    type VisitorVTable = Self;
36    type ComputeVTable = NotSupported;
37    type EncodeVTable = NotSupported;
38    type OperatorVTable = NotSupported;
39    type SerdeVTable = NotSupported;
40
41    fn id(_encoding: &Self::Encoding) -> EncodingId {
42        EncodingId::new_ref("vortex.arrow")
43    }
44
45    fn encoding(_array: &Self::Array) -> EncodingRef {
46        EncodingRef::new_ref(ArrowEncoding.as_ref())
47    }
48}
49
50/// A Vortex array that wraps an in-memory Arrow array.
51// TODO(ngates): consider having each Arrow encoding be a separate encoding ID.
52#[derive(Clone, Debug)]
53pub struct ArrowEncoding;
54
55#[derive(Clone, Debug)]
56pub struct ArrowArray {
57    inner: ArrowArrayRef,
58    dtype: DType,
59    stats_set: ArrayStats,
60}
61
62impl ArrowArray {
63    pub fn new(arrow_array: ArrowArrayRef, nullability: Nullability) -> Self {
64        let dtype = DType::from_arrow((arrow_array.data_type(), nullability));
65        Self {
66            inner: arrow_array,
67            dtype,
68            stats_set: Default::default(),
69        }
70    }
71
72    pub fn inner(&self) -> &ArrowArrayRef {
73        &self.inner
74    }
75}
76
77impl ArrayVTable<ArrowVTable> for ArrowVTable {
78    fn len(array: &ArrowArray) -> usize {
79        array.inner.len()
80    }
81
82    fn dtype(array: &ArrowArray) -> &DType {
83        &array.dtype
84    }
85
86    fn stats(array: &ArrowArray) -> StatsSetRef<'_> {
87        array.stats_set.to_ref(array.as_ref())
88    }
89
90    fn array_hash<H: std::hash::Hasher>(array: &ArrowArray, state: &mut H, _precision: Precision) {
91        array.dtype.hash(state);
92        // Hash based on pointer to the inner Arrow array since Arrow doesn't support hashing.
93        std::sync::Arc::as_ptr(&array.inner).hash(state);
94    }
95
96    fn array_eq(array: &ArrowArray, other: &ArrowArray, _precision: Precision) -> bool {
97        array.dtype == other.dtype && std::sync::Arc::ptr_eq(&array.inner, &other.inner)
98    }
99}
100
101impl CanonicalVTable<ArrowVTable> for ArrowVTable {
102    fn canonicalize(array: &ArrowArray) -> Canonical {
103        ArrayRef::from_arrow(array.inner.as_ref(), array.dtype.is_nullable()).to_canonical()
104    }
105}
106
107impl OperationsVTable<ArrowVTable> for ArrowVTable {
108    fn slice(array: &ArrowArray, range: Range<usize>) -> ArrayRef {
109        let inner = array.inner.slice(range.start, range.len());
110        let new_array = ArrowArray {
111            inner,
112            dtype: array.dtype.clone(),
113            stats_set: Default::default(),
114        };
115        new_array.into_array()
116    }
117
118    fn scalar_at(_array: &ArrowArray, _index: usize) -> Scalar {
119        vortex_panic!("Not supported")
120    }
121}
122
123impl ValidityVTable<ArrowVTable> for ArrowVTable {
124    fn is_valid(array: &ArrowArray, index: usize) -> bool {
125        array.inner.is_valid(index)
126    }
127
128    fn all_valid(array: &ArrowArray) -> bool {
129        array.inner.logical_null_count() == 0
130    }
131
132    fn all_invalid(array: &ArrowArray) -> bool {
133        array.inner.logical_null_count() == array.inner.len()
134    }
135
136    fn validity_mask(array: &ArrowArray) -> Mask {
137        array
138            .inner
139            .logical_nulls()
140            .map(|null_buffer| Mask::from_buffer(null_buffer.inner().clone().into()))
141            .unwrap_or_else(|| Mask::new_true(array.inner.len()))
142    }
143}
144
145impl VisitorVTable<ArrowVTable> for ArrowVTable {
146    fn visit_buffers(_array: &ArrowArray, _visitor: &mut dyn ArrayBufferVisitor) {}
147
148    fn visit_children(_array: &ArrowArray, _visitor: &mut dyn ArrayChildVisitor) {}
149}