vortex_array/arrow/compute/to_arrow/
varbin.rs1use std::sync::Arc;
5
6use arrow_array::ArrayRef as ArrowArrayRef;
7use arrow_array::GenericBinaryArray;
8use arrow_array::GenericStringArray;
9use arrow_array::OffsetSizeTrait;
10use arrow_schema::DataType;
11use vortex_dtype::DType;
12use vortex_dtype::IntegerPType;
13use vortex_dtype::Nullability;
14use vortex_dtype::PType;
15use vortex_error::VortexResult;
16use vortex_error::vortex_bail;
17use vortex_error::vortex_panic;
18
19use crate::Array;
20use crate::ToCanonical;
21use crate::arrays::VarBinArray;
22use crate::arrays::VarBinVTable;
23use crate::arrow::compute::ToArrowKernel;
24use crate::arrow::compute::ToArrowKernelAdapter;
25use crate::arrow::compute::to_arrow::null_buffer::to_null_buffer;
26use crate::compute::cast;
27use crate::register_kernel;
28
29impl ToArrowKernel for VarBinVTable {
30 fn to_arrow(
31 &self,
32 array: &VarBinArray,
33 arrow_type: Option<&DataType>,
34 ) -> VortexResult<Option<ArrowArrayRef>> {
35 let offsets_ptype = PType::try_from(array.offsets().dtype())?;
36
37 match arrow_type {
38 None => match array.dtype() {
40 DType::Binary(_) => match offsets_ptype {
41 PType::I64 | PType::U64 => to_arrow::<i64>(array),
42 PType::U8 | PType::U16 | PType::U32 | PType::I8 | PType::I16 | PType::I32 => {
43 to_arrow::<i32>(array)
44 }
45 PType::F16 | PType::F32 | PType::F64 => {
46 vortex_panic!("offsets array were somehow floating point")
47 }
48 },
49 DType::Utf8(_) => match offsets_ptype {
50 PType::I64 | PType::U64 => to_arrow::<i64>(array),
51 PType::U8 | PType::U16 | PType::U32 | PType::I8 | PType::I16 | PType::I32 => {
52 to_arrow::<i32>(array)
53 }
54 PType::F16 | PType::F32 | PType::F64 => {
55 vortex_panic!("offsets array were somehow floating point")
56 }
57 },
58 dtype => unreachable!("Unsupported DType {dtype}"),
59 },
60 Some(DataType::Binary) if array.dtype().is_binary() => to_arrow::<i32>(array),
62 Some(DataType::LargeBinary) if array.dtype().is_binary() => to_arrow::<i64>(array),
63 Some(DataType::Utf8) if array.dtype().is_utf8() => to_arrow::<i32>(array),
64 Some(DataType::LargeUtf8) if array.dtype().is_utf8() => to_arrow::<i64>(array),
65 Some(DataType::BinaryView) | Some(DataType::Utf8View) => {
67 return Ok(None);
68 }
69 Some(_) => {
71 vortex_bail!("Cannot convert VarBin to Arrow type {arrow_type:?}");
72 }
73 }
74 .map(Some)
75 }
76}
77
78register_kernel!(ToArrowKernelAdapter(VarBinVTable).lift());
79
80fn to_arrow<O: IntegerPType + OffsetSizeTrait>(array: &VarBinArray) -> VortexResult<ArrowArrayRef> {
81 let offsets = cast(
82 array.offsets(),
83 &DType::Primitive(O::PTYPE, Nullability::NonNullable),
84 )?
85 .to_primitive();
86
87 let nulls = to_null_buffer(array.validity_mask());
88 let data = array.bytes().clone();
89
90 Ok(match array.dtype() {
92 DType::Binary(_) => Arc::new(unsafe {
93 GenericBinaryArray::new_unchecked(
94 offsets.buffer::<O>().into_arrow_offset_buffer(),
95 data.into_arrow_buffer(),
96 nulls,
97 )
98 }),
99 DType::Utf8(_) => Arc::new(unsafe {
100 GenericStringArray::new_unchecked(
101 offsets.buffer::<O>().into_arrow_offset_buffer(),
102 data.into_arrow_buffer(),
103 nulls,
104 )
105 }),
106 dtype => {
107 unreachable!("expected utf8 or binary instead of {dtype}")
108 }
109 })
110}