vortex_compute/arrow/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Conversion logic between Vortex vector types and Arrow types.
5
6use arrow_array::Array;
7use arrow_array::BooleanArray;
8use arrow_array::FixedSizeListArray;
9use arrow_array::GenericByteViewArray;
10use arrow_array::NullArray;
11use arrow_array::PrimitiveArray;
12use arrow_array::StructArray;
13use arrow_array::types::Decimal32Type;
14use arrow_array::types::Decimal64Type;
15use arrow_array::types::Decimal128Type;
16use arrow_array::types::Decimal256Type;
17use arrow_array::types::Float16Type;
18use arrow_array::types::Float32Type;
19use arrow_array::types::Float64Type;
20use arrow_array::types::Int8Type;
21use arrow_array::types::Int16Type;
22use arrow_array::types::Int32Type;
23use arrow_array::types::Int64Type;
24use arrow_array::types::StringViewType;
25use arrow_array::types::UInt8Type;
26use arrow_array::types::UInt16Type;
27use arrow_array::types::UInt32Type;
28use arrow_array::types::UInt64Type;
29use arrow_schema::DataType;
30use vortex_error::VortexExpect;
31use vortex_error::VortexResult;
32use vortex_error::vortex_bail;
33use vortex_vector::Datum;
34use vortex_vector::ScalarOps;
35use vortex_vector::Vector;
36use vortex_vector::VectorMutOps;
37
38mod binaryview;
39mod bool;
40mod decimal;
41mod fixed_size_list;
42mod list;
43mod null;
44mod primitive;
45mod struct_;
46mod vector;
47
48/// Trait for converting Vortex vector objects into Arrow.
49pub trait IntoArrow {
50    /// The output Arrow type.
51    type Output;
52
53    /// Convert the Vortex vector object into an Arrow object.
54    fn into_arrow(self) -> VortexResult<Self::Output>;
55}
56
57/// Trait for converting Arrow objects into Vortex vector objects.
58pub trait IntoVector {
59    /// The output Vortex vector type.
60    type Output;
61
62    /// Convert the Arrow object into a Vortex vector object.
63    fn into_vector(self) -> VortexResult<Self::Output>;
64}
65
66impl IntoArrow for Datum {
67    type Output = Box<dyn arrow_array::Datum>;
68
69    fn into_arrow(self) -> VortexResult<Self::Output> {
70        match self {
71            Datum::Scalar(s) => Ok(Box::new(arrow_array::Scalar::new(
72                s.repeat(1).freeze().into_arrow()?,
73            ))),
74            Datum::Vector(v) => Ok(Box::new(v.into_arrow()?)),
75        }
76    }
77}
78
79impl IntoVector for &dyn Array {
80    type Output = Vector;
81
82    #[allow(clippy::unwrap_used)]
83    fn into_vector(self) -> VortexResult<Self::Output> {
84        // The downcast_ref calls below are guaranteed to succeed because we match on data_type()
85        // first and each branch only attempts to downcast to the corresponding Arrow type.
86        match self.data_type() {
87            DataType::Null => self
88                .as_any()
89                .downcast_ref::<NullArray>()
90                .vortex_expect("NullArray downcast")
91                .into_vector()
92                .map(Vector::from),
93            DataType::Boolean => self
94                .as_any()
95                .downcast_ref::<BooleanArray>()
96                .vortex_expect("BooleanArray downcast")
97                .into_vector()
98                .map(Vector::from),
99            DataType::Int8 => self
100                .as_any()
101                .downcast_ref::<PrimitiveArray<Int8Type>>()
102                .vortex_expect("Int8Array downcast")
103                .into_vector()
104                .map(Vector::from),
105            DataType::Int16 => self
106                .as_any()
107                .downcast_ref::<PrimitiveArray<Int16Type>>()
108                .vortex_expect("Int16Array downcast")
109                .into_vector()
110                .map(Vector::from),
111            DataType::Int32 => self
112                .as_any()
113                .downcast_ref::<PrimitiveArray<Int32Type>>()
114                .vortex_expect("Int32Array downcast")
115                .into_vector()
116                .map(Vector::from),
117            DataType::Int64 => self
118                .as_any()
119                .downcast_ref::<PrimitiveArray<Int64Type>>()
120                .vortex_expect("Int64Array downcast")
121                .into_vector()
122                .map(Vector::from),
123            DataType::UInt8 => self
124                .as_any()
125                .downcast_ref::<PrimitiveArray<UInt8Type>>()
126                .vortex_expect("UInt8Array downcast")
127                .into_vector()
128                .map(Vector::from),
129            DataType::UInt16 => self
130                .as_any()
131                .downcast_ref::<PrimitiveArray<UInt16Type>>()
132                .vortex_expect("UInt16Array downcast")
133                .into_vector()
134                .map(Vector::from),
135            DataType::UInt32 => self
136                .as_any()
137                .downcast_ref::<PrimitiveArray<UInt32Type>>()
138                .vortex_expect("UInt32Array downcast")
139                .into_vector()
140                .map(Vector::from),
141            DataType::UInt64 => self
142                .as_any()
143                .downcast_ref::<PrimitiveArray<UInt64Type>>()
144                .vortex_expect("UInt64Array downcast")
145                .into_vector()
146                .map(Vector::from),
147            DataType::Float16 => self
148                .as_any()
149                .downcast_ref::<PrimitiveArray<Float16Type>>()
150                .vortex_expect("Float16Array downcast")
151                .into_vector()
152                .map(Vector::from),
153            DataType::Float32 => self
154                .as_any()
155                .downcast_ref::<PrimitiveArray<Float32Type>>()
156                .vortex_expect("Float32Array downcast")
157                .into_vector()
158                .map(Vector::from),
159            DataType::Float64 => self
160                .as_any()
161                .downcast_ref::<PrimitiveArray<Float64Type>>()
162                .vortex_expect("Float64Array downcast")
163                .into_vector()
164                .map(Vector::from),
165            DataType::Timestamp(..)
166            | DataType::Date32
167            | DataType::Date64
168            | DataType::Time32(_)
169            | DataType::Time64(_)
170            | DataType::Duration(_)
171            | DataType::Interval(_) => {
172                vortex_bail!("Temporal types not yet supported: {}", self.data_type())
173            }
174            DataType::Binary | DataType::LargeBinary | DataType::FixedSizeBinary(_) => {
175                vortex_bail!("Binary types not yet supported: {}", self.data_type())
176            }
177            DataType::BinaryView => {
178                vortex_bail!("BinaryView not yet supported: {}", self.data_type())
179            }
180            DataType::Utf8 | DataType::LargeUtf8 => {
181                vortex_bail!("Utf8/LargeUtf8 not yet supported: {}", self.data_type())
182            }
183            DataType::Utf8View => self
184                .as_any()
185                .downcast_ref::<GenericByteViewArray<StringViewType>>()
186                .vortex_expect("StringViewArray downcast")
187                .into_vector()
188                .map(Vector::from),
189            DataType::List(_)
190            | DataType::ListView(_)
191            | DataType::LargeList(_)
192            | DataType::LargeListView(_) => {
193                vortex_bail!("List types not yet supported: {}", self.data_type())
194            }
195            DataType::FixedSizeList(..) => self
196                .as_any()
197                .downcast_ref::<FixedSizeListArray>()
198                .vortex_expect("FixedSizeListArray downcast")
199                .into_vector()
200                .map(Vector::from),
201            DataType::Struct(_) => self
202                .as_any()
203                .downcast_ref::<StructArray>()
204                .vortex_expect("StructArray downcast")
205                .into_vector()
206                .map(Vector::from),
207            DataType::Union(..) => {
208                vortex_bail!("Union type not supported: {}", self.data_type())
209            }
210            DataType::Dictionary(..) => {
211                vortex_bail!("Dictionary type not supported: {}", self.data_type())
212            }
213            DataType::Decimal32(..) => self
214                .as_any()
215                .downcast_ref::<PrimitiveArray<Decimal32Type>>()
216                .vortex_expect("Decimal32Array downcast")
217                .into_vector()
218                .map(Vector::from),
219            DataType::Decimal64(..) => self
220                .as_any()
221                .downcast_ref::<PrimitiveArray<Decimal64Type>>()
222                .vortex_expect("Decimal64Array downcast")
223                .into_vector()
224                .map(Vector::from),
225            DataType::Decimal128(..) => self
226                .as_any()
227                .downcast_ref::<PrimitiveArray<Decimal128Type>>()
228                .vortex_expect("Decimal128Array downcast")
229                .into_vector()
230                .map(Vector::from),
231            DataType::Decimal256(..) => self
232                .as_any()
233                .downcast_ref::<PrimitiveArray<Decimal256Type>>()
234                .vortex_expect("Decimal256Array downcast")
235                .into_vector()
236                .map(Vector::from),
237            DataType::Map(..) => {
238                vortex_bail!("Map type not supported: {}", self.data_type())
239            }
240            DataType::RunEndEncoded(..) => {
241                vortex_bail!("RunEndEncoded type not supported: {}", self.data_type())
242            }
243        }
244    }
245}
246
247/// Converts an Arrow [`NullBuffer`](arrow_buffer::NullBuffer) to a Vortex [`Mask`](vortex_mask::Mask).
248pub(crate) fn nulls_to_mask(
249    nulls: Option<&arrow_buffer::NullBuffer>,
250    len: usize,
251) -> vortex_mask::Mask {
252    use vortex_buffer::BitBuffer;
253    use vortex_mask::Mask;
254
255    match nulls {
256        None => Mask::AllTrue(len),
257        Some(nulls) => {
258            let inner = nulls.inner();
259            // Arrow stores validity as "1 = valid, 0 = null" which matches our Mask semantics
260            let bit_buffer = BitBuffer::from(inner.clone());
261            Mask::from_buffer(bit_buffer)
262        }
263    }
264}