#![expect(
deprecated,
reason = "This module defines and implements a deprecated trait `ArrowArrayExecutor`"
)]
pub mod bool;
mod byte;
pub mod byte_view;
mod decimal;
mod dictionary;
mod fixed_size_list;
mod list;
mod list_view;
pub mod null;
pub mod primitive;
mod run_end;
mod struct_;
mod temporal;
mod validity;
use arrow_array::ArrayRef as ArrowArrayRef;
use arrow_array::RecordBatch;
use arrow_array::cast::AsArray;
use arrow_array::types::*;
use arrow_schema::DataType;
use arrow_schema::Field;
use arrow_schema::FieldRef;
use arrow_schema::Schema;
use itertools::Itertools;
use vortex_array::dtype::arrow::to_data_type_naive;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_ensure;
use crate::ArrayRef;
use crate::arrays::List;
use crate::arrays::VarBin;
use crate::arrays::list::ListArrayExt;
use crate::arrays::varbin::VarBinArrayExt;
use crate::arrow::executor::bool::to_arrow_bool;
use crate::arrow::executor::byte::to_arrow_byte_array;
use crate::arrow::executor::byte_view::to_arrow_byte_view;
use crate::arrow::executor::decimal::to_arrow_decimal;
use crate::arrow::executor::dictionary::to_arrow_dictionary;
use crate::arrow::executor::fixed_size_list::to_arrow_fixed_list;
use crate::arrow::executor::list::to_arrow_list;
use crate::arrow::executor::list_view::to_arrow_list_view;
use crate::arrow::executor::null::to_arrow_null;
use crate::arrow::executor::primitive::to_arrow_primitive;
use crate::arrow::executor::run_end::to_arrow_run_end;
use crate::arrow::executor::struct_::to_arrow_struct;
use crate::arrow::executor::temporal::to_arrow_date;
use crate::arrow::executor::temporal::to_arrow_time;
use crate::arrow::executor::temporal::to_arrow_timestamp;
use crate::arrow::session::ArrowSessionExt;
use crate::dtype::DType;
use crate::dtype::PType;
use crate::executor::ExecutionCtx;
#[deprecated(note = "Use an `ArrowSession` to perform conversions to/from Arrow arrays")]
pub trait ArrowArrayExecutor: Sized {
#[deprecated(note = "Use an `ArrowSession` to perform conversions to/from Arrow arrays")]
fn execute_arrow(
self,
data_type: Option<&DataType>,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrowArrayRef>;
#[deprecated(note = "Use an `ArrowSession` to perform conversions to/from Arrow arrays")]
fn execute_record_batch(
self,
schema: &Schema,
ctx: &mut ExecutionCtx,
) -> VortexResult<RecordBatch> {
let array = self.execute_arrow(Some(&DataType::Struct(schema.fields.clone())), ctx)?;
Ok(RecordBatch::from(array.as_struct()))
}
#[deprecated(note = "Use an `ArrowSession` to perform conversions to/from Arrow arrays")]
fn execute_record_batches(
self,
schema: &Schema,
ctx: &mut ExecutionCtx,
) -> VortexResult<Vec<RecordBatch>>;
}
#[expect(deprecated, reason = "backward compatibility")]
impl ArrowArrayExecutor for ArrayRef {
fn execute_arrow(
self,
data_type: Option<&DataType>,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrowArrayRef> {
let target = data_type.map(|dt| Field::new("", dt.clone(), self.dtype().is_nullable()));
let session = ctx.session().clone();
session.arrow().execute_arrow(self, target.as_ref(), ctx)
}
fn execute_record_batches(
self,
schema: &Schema,
ctx: &mut ExecutionCtx,
) -> VortexResult<Vec<RecordBatch>> {
self.to_array_iterator()
.map(|a| a?.execute_record_batch(schema, ctx))
.try_collect()
}
}
pub(crate) fn execute_arrow_naive(
array: ArrayRef,
data_type: Option<&DataType>,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrowArrayRef> {
let len = array.len();
let resolved_type: DataType = match data_type {
Some(dt) => dt.clone(),
None => infer_nearest_arrow_type(&array)?,
};
let arrow = match &resolved_type {
DataType::Null => to_arrow_null(array, ctx),
DataType::Boolean => to_arrow_bool(array, ctx),
DataType::Int8 => to_arrow_primitive::<Int8Type>(array, ctx),
DataType::Int16 => to_arrow_primitive::<Int16Type>(array, ctx),
DataType::Int32 => to_arrow_primitive::<Int32Type>(array, ctx),
DataType::Int64 => to_arrow_primitive::<Int64Type>(array, ctx),
DataType::UInt8 => to_arrow_primitive::<UInt8Type>(array, ctx),
DataType::UInt16 => to_arrow_primitive::<UInt16Type>(array, ctx),
DataType::UInt32 => to_arrow_primitive::<UInt32Type>(array, ctx),
DataType::UInt64 => to_arrow_primitive::<UInt64Type>(array, ctx),
DataType::Float16 => to_arrow_primitive::<Float16Type>(array, ctx),
DataType::Float32 => to_arrow_primitive::<Float32Type>(array, ctx),
DataType::Float64 => to_arrow_primitive::<Float64Type>(array, ctx),
DataType::Binary => to_arrow_byte_array::<BinaryType>(array, ctx),
DataType::LargeBinary => to_arrow_byte_array::<LargeBinaryType>(array, ctx),
DataType::Utf8 => to_arrow_byte_array::<Utf8Type>(array, ctx),
DataType::LargeUtf8 => to_arrow_byte_array::<LargeUtf8Type>(array, ctx),
DataType::BinaryView => to_arrow_byte_view::<BinaryViewType>(array, ctx),
DataType::Utf8View => to_arrow_byte_view::<StringViewType>(array, ctx),
DataType::List(elements_field) => to_arrow_list::<i32>(array, elements_field, ctx),
DataType::LargeList(elements_field) => to_arrow_list::<i64>(array, elements_field, ctx),
DataType::FixedSizeList(elements_field, list_size) => {
to_arrow_fixed_list(array, *list_size, elements_field, ctx)
}
DataType::ListView(elements_field) => to_arrow_list_view::<i32>(array, elements_field, ctx),
DataType::LargeListView(elements_field) => {
to_arrow_list_view::<i64>(array, elements_field, ctx)
}
DataType::Struct(fields) => {
let fields = if data_type.is_none() {
None
} else {
Some(fields)
};
to_arrow_struct(array, fields, ctx)
}
DataType::Dictionary(codes_type, values_type) => {
to_arrow_dictionary(array, codes_type, values_type, ctx)
}
dt @ DataType::Decimal32(..) => to_arrow_decimal(array, dt, ctx),
dt @ DataType::Decimal64(..) => to_arrow_decimal(array, dt, ctx),
dt @ DataType::Decimal128(..) => to_arrow_decimal(array, dt, ctx),
dt @ DataType::Decimal256(..) => to_arrow_decimal(array, dt, ctx),
DataType::RunEndEncoded(ends_type, values_type) => {
to_arrow_run_end(array, ends_type.data_type(), values_type, ctx)
}
dt @ (DataType::Date32 | DataType::Date64) => to_arrow_date(array, dt, ctx),
dt @ (DataType::Time32(_) | DataType::Time64(_)) => to_arrow_time(array, dt, ctx),
dt @ DataType::Timestamp(..) => to_arrow_timestamp(array, dt, ctx),
DataType::FixedSizeBinary(_)
| DataType::Map(..)
| DataType::Duration(_)
| DataType::Interval(_)
| DataType::Union(..) => {
vortex_bail!("Conversion to Arrow type {resolved_type} is not supported");
}
}?;
vortex_ensure!(
arrow.len() == len,
"Arrow array length does not match Vortex array length after conversion to {:?}",
arrow
);
Ok(arrow)
}
fn infer_nearest_arrow_type(array: &ArrayRef) -> VortexResult<DataType> {
if let Some(varbin) = array.as_opt::<VarBin>() {
let offsets_ptype = PType::try_from(varbin.offsets().dtype())?;
let use_large = matches!(offsets_ptype, PType::I64 | PType::U64);
return Ok(match (varbin.dtype(), use_large) {
(DType::Utf8(_), false) => DataType::Utf8,
(DType::Utf8(_), true) => DataType::LargeUtf8,
(DType::Binary(_), false) => DataType::Binary,
(DType::Binary(_), true) => DataType::LargeBinary,
_ => unreachable!("VarBinArray must have Utf8 or Binary dtype"),
});
}
if let Some(list) = array.as_opt::<List>() {
let offsets_ptype = PType::try_from(list.offsets().dtype())?;
let use_large = matches!(offsets_ptype, PType::I64 | PType::U64);
let elem_dtype = infer_nearest_arrow_type(list.elements())?;
let field = FieldRef::new(Field::new_list_field(
elem_dtype,
list.elements().dtype().is_nullable(),
));
return Ok(if use_large {
DataType::LargeList(field)
} else {
DataType::List(field)
});
}
to_data_type_naive(array.dtype())
}