vortex_array/array/varbin/compute/
to_arrow.rsuse std::sync::Arc;
use arrow_array::{ArrayRef, GenericBinaryArray, GenericStringArray, OffsetSizeTrait};
use arrow_schema::DataType;
use vortex_dtype::{DType, NativePType, Nullability, PType};
use vortex_error::{vortex_bail, VortexResult};
use crate::array::{VarBinArray, VarBinEncoding};
use crate::compute::{try_cast, ToArrowFn};
use crate::IntoArrayVariant;
impl ToArrowFn<VarBinArray> for VarBinEncoding {
fn preferred_arrow_data_type(&self, array: &VarBinArray) -> VortexResult<Option<DataType>> {
let offsets_ptype = PType::try_from(array.offsets().dtype())?;
Ok(Some(match array.dtype() {
DType::Utf8(_) => match offsets_ptype {
PType::I64 | PType::U64 => DataType::LargeUtf8,
_ => DataType::Utf8,
},
DType::Binary(_) => match offsets_ptype {
PType::I64 | PType::U64 => DataType::LargeBinary,
_ => DataType::Binary,
},
_ => vortex_bail!("Unsupported DType"),
}))
}
fn to_arrow(
&self,
array: &VarBinArray,
data_type: &DataType,
) -> VortexResult<Option<ArrayRef>> {
let array_ref = match data_type {
DataType::BinaryView | DataType::FixedSizeBinary(_) | DataType::Utf8View => {
return Ok(None);
}
DataType::Binary | DataType::Utf8 => {
varbin_to_arrow::<i32>(array)
}
DataType::LargeBinary | DataType::LargeUtf8 => {
varbin_to_arrow::<i64>(array)
}
_ => {
vortex_bail!("Unsupported data type: {data_type}")
}
}?;
Ok(Some(if array_ref.data_type() != data_type {
arrow_cast::cast(array_ref.as_ref(), data_type)?
} else {
array_ref
}))
}
}
pub(crate) fn varbin_to_arrow<O: NativePType + OffsetSizeTrait>(
varbin_array: &VarBinArray,
) -> VortexResult<ArrayRef> {
let offsets = try_cast(
varbin_array.offsets(),
&DType::Primitive(O::PTYPE, Nullability::NonNullable),
)?
.into_primitive()
.map_err(|err| err.with_context("Failed to canonicalize offsets"))?;
let nulls = varbin_array.logical_validity()?.to_null_buffer();
let data = varbin_array.bytes();
Ok(match varbin_array.dtype() {
DType::Binary(_) => Arc::new(unsafe {
GenericBinaryArray::new_unchecked(
offsets.buffer::<O>().into_arrow_offset_buffer(),
data.into_arrow_buffer(),
nulls,
)
}),
DType::Utf8(_) => Arc::new(unsafe {
GenericStringArray::new_unchecked(
offsets.buffer::<O>().into_arrow_offset_buffer(),
data.into_arrow_buffer(),
nulls,
)
}),
_ => vortex_bail!(
"expected utf8 or binary instead of {}",
varbin_array.dtype()
),
})
}