vortex_array/arrow/compute/to_arrow/
varbin.rs1use std::sync::Arc;
2
3use arrow_array::{
4 ArrayRef as ArrowArrayRef, GenericBinaryArray, GenericStringArray, OffsetSizeTrait,
5};
6use arrow_schema::DataType;
7use vortex_dtype::{DType, NativePType, Nullability, PType};
8use vortex_error::{VortexResult, vortex_bail};
9
10use crate::arrays::{VarBinArray, VarBinVTable};
11use crate::arrow::compute::{ToArrowKernel, ToArrowKernelAdapter};
12use crate::compute::cast;
13use crate::{Array, ToCanonical, register_kernel};
14
15impl ToArrowKernel for VarBinVTable {
16 fn to_arrow(
17 &self,
18 array: &VarBinArray,
19 arrow_type: Option<&DataType>,
20 ) -> VortexResult<Option<ArrowArrayRef>> {
21 let offsets_ptype = PType::try_from(array.offsets().dtype())?;
22
23 match arrow_type {
24 None => match array.dtype() {
26 DType::Binary(_) => match offsets_ptype {
27 PType::I64 | PType::U64 => to_arrow::<i64>(array),
28 _ => to_arrow::<i32>(array),
29 },
30 DType::Utf8(_) => match offsets_ptype {
31 PType::I64 | PType::U64 => to_arrow::<i64>(array),
32 _ => to_arrow::<i32>(array),
33 },
34 _ => unreachable!("Unsupported DType"),
35 },
36 Some(DataType::Binary) if array.dtype().is_binary() => to_arrow::<i32>(array),
38 Some(DataType::LargeBinary) if array.dtype().is_binary() => to_arrow::<i64>(array),
39 Some(DataType::Utf8) if array.dtype().is_utf8() => to_arrow::<i32>(array),
40 Some(DataType::LargeUtf8) if array.dtype().is_utf8() => to_arrow::<i64>(array),
41 Some(DataType::BinaryView) | Some(DataType::Utf8View) => {
43 return Ok(None);
44 }
45 Some(_) => {
47 vortex_bail!("Cannot convert VarBin to Arrow type {arrow_type:?}");
48 }
49 }
50 .map(Some)
51 }
52}
53
54register_kernel!(ToArrowKernelAdapter(VarBinVTable).lift());
55
56fn to_arrow<O: NativePType + OffsetSizeTrait>(array: &VarBinArray) -> VortexResult<ArrowArrayRef> {
57 let offsets = cast(
58 array.offsets(),
59 &DType::Primitive(O::PTYPE, Nullability::NonNullable),
60 )?
61 .to_primitive()
62 .map_err(|err| err.with_context("Failed to canonicalize offsets"))?;
63
64 let nulls = array.validity_mask()?.to_null_buffer();
65 let data = array.bytes().clone();
66
67 Ok(match array.dtype() {
69 DType::Binary(_) => Arc::new(unsafe {
70 GenericBinaryArray::new_unchecked(
71 offsets.buffer::<O>().into_arrow_offset_buffer(),
72 data.into_arrow_buffer(),
73 nulls,
74 )
75 }),
76 DType::Utf8(_) => Arc::new(unsafe {
77 GenericStringArray::new_unchecked(
78 offsets.buffer::<O>().into_arrow_offset_buffer(),
79 data.into_arrow_buffer(),
80 nulls,
81 )
82 }),
83 _ => unreachable!("expected utf8 or binary instead of {}", array.dtype()),
84 })
85}