vortex_array/arrays/varbin/compute/
to_arrow.rs1use std::sync::Arc;
2
3use arrow_array::{ArrayRef, GenericBinaryArray, GenericStringArray, OffsetSizeTrait};
4use arrow_schema::DataType;
5use vortex_dtype::{DType, NativePType, Nullability, PType};
6use vortex_error::{VortexResult, vortex_bail};
7
8use crate::arrays::{VarBinArray, VarBinEncoding};
9use crate::compute::{ToArrowFn, cast};
10use crate::{Array, ToCanonical};
11
12impl ToArrowFn<&VarBinArray> for VarBinEncoding {
13 fn preferred_arrow_data_type(&self, array: &VarBinArray) -> VortexResult<Option<DataType>> {
14 let offsets_ptype = PType::try_from(array.offsets().dtype())?;
15 Ok(Some(match array.dtype() {
16 DType::Utf8(_) => match offsets_ptype {
17 PType::I64 | PType::U64 => DataType::LargeUtf8,
18 _ => DataType::Utf8,
19 },
20 DType::Binary(_) => match offsets_ptype {
21 PType::I64 | PType::U64 => DataType::LargeBinary,
22 _ => DataType::Binary,
23 },
24 _ => vortex_bail!("Unsupported DType"),
25 }))
26 }
27
28 fn to_arrow(
29 &self,
30 array: &VarBinArray,
31 data_type: &DataType,
32 ) -> VortexResult<Option<ArrayRef>> {
33 let array_ref = match data_type {
34 DataType::FixedSizeBinary(_) => {
35 return Ok(None);
37 }
38 DataType::BinaryView | DataType::Utf8View => Ok(arrow_cast::cast(
39 &*varbin_to_arrow::<i32>(array)?,
40 data_type,
41 )?),
42 DataType::Binary | DataType::Utf8 => {
43 varbin_to_arrow::<i32>(array)
45 }
46 DataType::LargeBinary | DataType::LargeUtf8 => {
47 varbin_to_arrow::<i64>(array)
49 }
50 _ => {
51 vortex_bail!("Unsupported data type: {data_type}")
53 }
54 }?;
55
56 Ok(Some(if array_ref.data_type() != data_type {
57 arrow_cast::cast(array_ref.as_ref(), data_type)?
58 } else {
59 array_ref
60 }))
61 }
62}
63
64pub(crate) fn varbin_to_arrow<O: NativePType + OffsetSizeTrait>(
66 varbin_array: &VarBinArray,
67) -> VortexResult<ArrayRef> {
68 let offsets = cast(
69 varbin_array.offsets(),
70 &DType::Primitive(O::PTYPE, Nullability::NonNullable),
71 )?
72 .to_primitive()
73 .map_err(|err| err.with_context("Failed to canonicalize offsets"))?;
74
75 let nulls = varbin_array.validity_mask()?.to_null_buffer();
76 let data = varbin_array.bytes().clone();
77
78 Ok(match varbin_array.dtype() {
80 DType::Binary(_) => Arc::new(unsafe {
81 GenericBinaryArray::new_unchecked(
82 offsets.buffer::<O>().into_arrow_offset_buffer(),
83 data.into_arrow_buffer(),
84 nulls,
85 )
86 }),
87 DType::Utf8(_) => Arc::new(unsafe {
88 GenericStringArray::new_unchecked(
89 offsets.buffer::<O>().into_arrow_offset_buffer(),
90 data.into_arrow_buffer(),
91 nulls,
92 )
93 }),
94 _ => vortex_bail!(
95 "expected utf8 or binary instead of {}",
96 varbin_array.dtype()
97 ),
98 })
99}