vortex_array/arrow/executor/
mod.rs1#![expect(
5 deprecated,
6 reason = "This module defines and implements a deprecated trait `ArrowArrayExecutor`"
7)]
8
9pub mod bool;
10mod byte;
11pub mod byte_view;
12mod decimal;
13mod dictionary;
14mod fixed_size_list;
15mod list;
16mod list_view;
17pub mod null;
18pub mod primitive;
19mod run_end;
20mod struct_;
21mod temporal;
22mod validity;
23
24use arrow_array::ArrayRef as ArrowArrayRef;
25use arrow_array::RecordBatch;
26use arrow_array::cast::AsArray;
27use arrow_array::types::*;
28use arrow_schema::DataType;
29use arrow_schema::Field;
30use arrow_schema::FieldRef;
31use arrow_schema::Schema;
32use itertools::Itertools;
33use vortex_array::dtype::arrow::to_data_type_naive;
34use vortex_error::VortexResult;
35use vortex_error::vortex_bail;
36use vortex_error::vortex_ensure;
37
38use crate::ArrayRef;
39use crate::arrays::List;
40use crate::arrays::VarBin;
41use crate::arrays::list::ListArrayExt;
42use crate::arrays::varbin::VarBinArrayExt;
43use crate::arrow::executor::bool::to_arrow_bool;
44use crate::arrow::executor::byte::to_arrow_byte_array;
45use crate::arrow::executor::byte_view::to_arrow_byte_view;
46use crate::arrow::executor::decimal::to_arrow_decimal;
47use crate::arrow::executor::dictionary::to_arrow_dictionary;
48use crate::arrow::executor::fixed_size_list::to_arrow_fixed_list;
49use crate::arrow::executor::list::to_arrow_list;
50use crate::arrow::executor::list_view::to_arrow_list_view;
51use crate::arrow::executor::null::to_arrow_null;
52use crate::arrow::executor::primitive::to_arrow_primitive;
53use crate::arrow::executor::run_end::to_arrow_run_end;
54use crate::arrow::executor::struct_::to_arrow_struct;
55use crate::arrow::executor::temporal::to_arrow_date;
56use crate::arrow::executor::temporal::to_arrow_time;
57use crate::arrow::executor::temporal::to_arrow_timestamp;
58use crate::arrow::session::ArrowSessionExt;
59use crate::dtype::DType;
60use crate::dtype::PType;
61use crate::executor::ExecutionCtx;
62
63#[deprecated(note = "Use an `ArrowSession` to perform conversions to/from Arrow arrays")]
65pub trait ArrowArrayExecutor: Sized {
66 #[deprecated(note = "Use an `ArrowSession` to perform conversions to/from Arrow arrays")]
71 fn execute_arrow(
72 self,
73 data_type: Option<&DataType>,
74 ctx: &mut ExecutionCtx,
75 ) -> VortexResult<ArrowArrayRef>;
76
77 #[deprecated(note = "Use an `ArrowSession` to perform conversions to/from Arrow arrays")]
79 fn execute_record_batch(
80 self,
81 schema: &Schema,
82 ctx: &mut ExecutionCtx,
83 ) -> VortexResult<RecordBatch> {
84 let array = self.execute_arrow(Some(&DataType::Struct(schema.fields.clone())), ctx)?;
85 Ok(RecordBatch::from(array.as_struct()))
86 }
87
88 #[deprecated(note = "Use an `ArrowSession` to perform conversions to/from Arrow arrays")]
90 fn execute_record_batches(
91 self,
92 schema: &Schema,
93 ctx: &mut ExecutionCtx,
94 ) -> VortexResult<Vec<RecordBatch>>;
95}
96
97#[expect(deprecated, reason = "backward compatibility")]
98impl ArrowArrayExecutor for ArrayRef {
99 fn execute_arrow(
100 self,
101 data_type: Option<&DataType>,
102 ctx: &mut ExecutionCtx,
103 ) -> VortexResult<ArrowArrayRef> {
104 let target = data_type.map(|dt| Field::new("", dt.clone(), self.dtype().is_nullable()));
105 let session = ctx.session().clone();
106 session.arrow().execute_arrow(self, target.as_ref(), ctx)
107 }
108
109 fn execute_record_batches(
110 self,
111 schema: &Schema,
112 ctx: &mut ExecutionCtx,
113 ) -> VortexResult<Vec<RecordBatch>> {
114 self.to_array_iterator()
115 .map(|a| a?.execute_record_batch(schema, ctx))
116 .try_collect()
117 }
118}
119
120pub(crate) fn execute_arrow_naive(
126 array: ArrayRef,
127 data_type: Option<&DataType>,
128 ctx: &mut ExecutionCtx,
129) -> VortexResult<ArrowArrayRef> {
130 let len = array.len();
131
132 let resolved_type: DataType = match data_type {
133 Some(dt) => dt.clone(),
134 None => infer_nearest_arrow_type(&array)?,
135 };
136
137 let arrow = match &resolved_type {
138 DataType::Null => to_arrow_null(array, ctx),
139 DataType::Boolean => to_arrow_bool(array, ctx),
140 DataType::Int8 => to_arrow_primitive::<Int8Type>(array, ctx),
141 DataType::Int16 => to_arrow_primitive::<Int16Type>(array, ctx),
142 DataType::Int32 => to_arrow_primitive::<Int32Type>(array, ctx),
143 DataType::Int64 => to_arrow_primitive::<Int64Type>(array, ctx),
144 DataType::UInt8 => to_arrow_primitive::<UInt8Type>(array, ctx),
145 DataType::UInt16 => to_arrow_primitive::<UInt16Type>(array, ctx),
146 DataType::UInt32 => to_arrow_primitive::<UInt32Type>(array, ctx),
147 DataType::UInt64 => to_arrow_primitive::<UInt64Type>(array, ctx),
148 DataType::Float16 => to_arrow_primitive::<Float16Type>(array, ctx),
149 DataType::Float32 => to_arrow_primitive::<Float32Type>(array, ctx),
150 DataType::Float64 => to_arrow_primitive::<Float64Type>(array, ctx),
151 DataType::Binary => to_arrow_byte_array::<BinaryType>(array, ctx),
152 DataType::LargeBinary => to_arrow_byte_array::<LargeBinaryType>(array, ctx),
153 DataType::Utf8 => to_arrow_byte_array::<Utf8Type>(array, ctx),
154 DataType::LargeUtf8 => to_arrow_byte_array::<LargeUtf8Type>(array, ctx),
155 DataType::BinaryView => to_arrow_byte_view::<BinaryViewType>(array, ctx),
156 DataType::Utf8View => to_arrow_byte_view::<StringViewType>(array, ctx),
157 DataType::List(elements_field) => to_arrow_list::<i32>(array, elements_field, ctx),
159 DataType::LargeList(elements_field) => to_arrow_list::<i64>(array, elements_field, ctx),
161 DataType::FixedSizeList(elements_field, list_size) => {
163 to_arrow_fixed_list(array, *list_size, elements_field, ctx)
164 }
165 DataType::ListView(elements_field) => to_arrow_list_view::<i32>(array, elements_field, ctx),
167 DataType::LargeListView(elements_field) => {
169 to_arrow_list_view::<i64>(array, elements_field, ctx)
170 }
171 DataType::Struct(fields) => {
172 let fields = if data_type.is_none() {
173 None
174 } else {
175 Some(fields)
176 };
177 to_arrow_struct(array, fields, ctx)
178 }
179 DataType::Dictionary(codes_type, values_type) => {
181 to_arrow_dictionary(array, codes_type, values_type, ctx)
182 }
183 dt @ DataType::Decimal32(..) => to_arrow_decimal(array, dt, ctx),
184 dt @ DataType::Decimal64(..) => to_arrow_decimal(array, dt, ctx),
185 dt @ DataType::Decimal128(..) => to_arrow_decimal(array, dt, ctx),
186 dt @ DataType::Decimal256(..) => to_arrow_decimal(array, dt, ctx),
187 DataType::RunEndEncoded(ends_type, values_type) => {
189 to_arrow_run_end(array, ends_type.data_type(), values_type, ctx)
190 }
191 dt @ (DataType::Date32 | DataType::Date64) => to_arrow_date(array, dt, ctx),
192 dt @ (DataType::Time32(_) | DataType::Time64(_)) => to_arrow_time(array, dt, ctx),
193 dt @ DataType::Timestamp(..) => to_arrow_timestamp(array, dt, ctx),
194 DataType::FixedSizeBinary(_)
195 | DataType::Map(..)
196 | DataType::Duration(_)
197 | DataType::Interval(_)
198 | DataType::Union(..) => {
199 vortex_bail!("Conversion to Arrow type {resolved_type} is not supported");
200 }
201 }?;
202
203 vortex_ensure!(
204 arrow.len() == len,
205 "Arrow array length does not match Vortex array length after conversion to {:?}",
206 arrow
207 );
208
209 Ok(arrow)
210}
211
212fn infer_nearest_arrow_type(array: &ArrayRef) -> VortexResult<DataType> {
219 if let Some(varbin) = array.as_opt::<VarBin>() {
221 let offsets_ptype = PType::try_from(varbin.offsets().dtype())?;
222 let use_large = matches!(offsets_ptype, PType::I64 | PType::U64);
223
224 return Ok(match (varbin.dtype(), use_large) {
225 (DType::Utf8(_), false) => DataType::Utf8,
226 (DType::Utf8(_), true) => DataType::LargeUtf8,
227 (DType::Binary(_), false) => DataType::Binary,
228 (DType::Binary(_), true) => DataType::LargeBinary,
229 _ => unreachable!("VarBinArray must have Utf8 or Binary dtype"),
230 });
231 }
232
233 if let Some(list) = array.as_opt::<List>() {
235 let offsets_ptype = PType::try_from(list.offsets().dtype())?;
236 let use_large = matches!(offsets_ptype, PType::I64 | PType::U64);
237 let elem_dtype = infer_nearest_arrow_type(list.elements())?;
239 let field = FieldRef::new(Field::new_list_field(
240 elem_dtype,
241 list.elements().dtype().is_nullable(),
242 ));
243
244 return Ok(if use_large {
245 DataType::LargeList(field)
246 } else {
247 DataType::List(field)
248 });
249 }
250
251 to_data_type_naive(array.dtype())
253}