vortex_array/arrow/compute/to_arrow/
varbin.rs1use std::sync::Arc;
5
6use arrow_array::{
7 ArrayRef as ArrowArrayRef, GenericBinaryArray, GenericStringArray, OffsetSizeTrait,
8};
9use arrow_schema::DataType;
10use vortex_dtype::{DType, NativePType, Nullability, PType};
11use vortex_error::{VortexResult, vortex_bail, vortex_panic};
12
13use crate::arrays::{VarBinArray, VarBinVTable};
14use crate::arrow::compute::{ToArrowKernel, ToArrowKernelAdapter};
15use crate::compute::cast;
16use crate::{Array, ToCanonical, register_kernel};
17
18impl ToArrowKernel for VarBinVTable {
19 fn to_arrow(
20 &self,
21 array: &VarBinArray,
22 arrow_type: Option<&DataType>,
23 ) -> VortexResult<Option<ArrowArrayRef>> {
24 let offsets_ptype = PType::try_from(array.offsets().dtype())?;
25
26 match arrow_type {
27 None => match array.dtype() {
29 DType::Binary(_) => match offsets_ptype {
30 PType::I64 | PType::U64 => to_arrow::<i64>(array),
31 PType::U8 | PType::U16 | PType::U32 | PType::I8 | PType::I16 | PType::I32 => {
32 to_arrow::<i32>(array)
33 }
34 PType::F16 | PType::F32 | PType::F64 => {
35 vortex_panic!("offsets array were somehow floating point")
36 }
37 },
38 DType::Utf8(_) => match offsets_ptype {
39 PType::I64 | PType::U64 => to_arrow::<i64>(array),
40 PType::U8 | PType::U16 | PType::U32 | PType::I8 | PType::I16 | PType::I32 => {
41 to_arrow::<i32>(array)
42 }
43 PType::F16 | PType::F32 | PType::F64 => {
44 vortex_panic!("offsets array were somehow floating point")
45 }
46 },
47 dtype => unreachable!("Unsupported DType {dtype}"),
48 },
49 Some(DataType::Binary) if array.dtype().is_binary() => to_arrow::<i32>(array),
51 Some(DataType::LargeBinary) if array.dtype().is_binary() => to_arrow::<i64>(array),
52 Some(DataType::Utf8) if array.dtype().is_utf8() => to_arrow::<i32>(array),
53 Some(DataType::LargeUtf8) if array.dtype().is_utf8() => to_arrow::<i64>(array),
54 Some(DataType::BinaryView) | Some(DataType::Utf8View) => {
56 return Ok(None);
57 }
58 Some(_) => {
60 vortex_bail!("Cannot convert VarBin to Arrow type {arrow_type:?}");
61 }
62 }
63 .map(Some)
64 }
65}
66
67register_kernel!(ToArrowKernelAdapter(VarBinVTable).lift());
68
69fn to_arrow<O: NativePType + OffsetSizeTrait>(array: &VarBinArray) -> VortexResult<ArrowArrayRef> {
70 let offsets = cast(
71 array.offsets(),
72 &DType::Primitive(O::PTYPE, Nullability::NonNullable),
73 )?
74 .to_primitive()
75 .map_err(|err| err.with_context("Failed to canonicalize offsets"))?;
76
77 let nulls = array.validity_mask().to_null_buffer();
78 let data = array.bytes().clone();
79
80 Ok(match array.dtype() {
82 DType::Binary(_) => Arc::new(unsafe {
83 GenericBinaryArray::new_unchecked(
84 offsets.buffer::<O>().into_arrow_offset_buffer(),
85 data.into_arrow_buffer(),
86 nulls,
87 )
88 }),
89 DType::Utf8(_) => Arc::new(unsafe {
90 GenericStringArray::new_unchecked(
91 offsets.buffer::<O>().into_arrow_offset_buffer(),
92 data.into_arrow_buffer(),
93 nulls,
94 )
95 }),
96 dtype => {
97 unreachable!("expected utf8 or binary instead of {dtype}")
98 }
99 })
100}