vortex_array/arrays/varbin/compute/
to_arrow.rs1use std::sync::Arc;
2
3use arrow_array::{ArrayRef, GenericBinaryArray, GenericStringArray, OffsetSizeTrait};
4use arrow_schema::DataType;
5use vortex_dtype::{DType, NativePType, Nullability, PType};
6use vortex_error::{VortexResult, vortex_bail};
7
8use crate::arrays::{VarBinArray, VarBinEncoding};
9use crate::compute::{ToArrowFn, try_cast};
10use crate::{Array, ToCanonical};
11
12impl ToArrowFn<&VarBinArray> for VarBinEncoding {
13 fn preferred_arrow_data_type(&self, array: &VarBinArray) -> VortexResult<Option<DataType>> {
14 let offsets_ptype = PType::try_from(array.offsets().dtype())?;
15 Ok(Some(match array.dtype() {
16 DType::Utf8(_) => match offsets_ptype {
17 PType::I64 | PType::U64 => DataType::LargeUtf8,
18 _ => DataType::Utf8,
19 },
20 DType::Binary(_) => match offsets_ptype {
21 PType::I64 | PType::U64 => DataType::LargeBinary,
22 _ => DataType::Binary,
23 },
24 _ => vortex_bail!("Unsupported DType"),
25 }))
26 }
27
28 fn to_arrow(
29 &self,
30 array: &VarBinArray,
31 data_type: &DataType,
32 ) -> VortexResult<Option<ArrayRef>> {
33 let array_ref = match data_type {
34 DataType::BinaryView | DataType::FixedSizeBinary(_) | DataType::Utf8View => {
35 return Ok(None);
37 }
38 DataType::Binary | DataType::Utf8 => {
39 varbin_to_arrow::<i32>(array)
41 }
42 DataType::LargeBinary | DataType::LargeUtf8 => {
43 varbin_to_arrow::<i64>(array)
45 }
46 _ => {
47 vortex_bail!("Unsupported data type: {data_type}")
49 }
50 }?;
51
52 Ok(Some(if array_ref.data_type() != data_type {
53 arrow_cast::cast(array_ref.as_ref(), data_type)?
54 } else {
55 array_ref
56 }))
57 }
58}
59
60pub(crate) fn varbin_to_arrow<O: NativePType + OffsetSizeTrait>(
62 varbin_array: &VarBinArray,
63) -> VortexResult<ArrayRef> {
64 let offsets = try_cast(
65 varbin_array.offsets(),
66 &DType::Primitive(O::PTYPE, Nullability::NonNullable),
67 )?
68 .to_primitive()
69 .map_err(|err| err.with_context("Failed to canonicalize offsets"))?;
70
71 let nulls = varbin_array.validity_mask()?.to_null_buffer();
72 let data = varbin_array.bytes().clone();
73
74 Ok(match varbin_array.dtype() {
76 DType::Binary(_) => Arc::new(unsafe {
77 GenericBinaryArray::new_unchecked(
78 offsets.buffer::<O>().into_arrow_offset_buffer(),
79 data.into_arrow_buffer(),
80 nulls,
81 )
82 }),
83 DType::Utf8(_) => Arc::new(unsafe {
84 GenericStringArray::new_unchecked(
85 offsets.buffer::<O>().into_arrow_offset_buffer(),
86 data.into_arrow_buffer(),
87 nulls,
88 )
89 }),
90 _ => vortex_bail!(
91 "expected utf8 or binary instead of {}",
92 varbin_array.dtype()
93 ),
94 })
95}