vortex_array/arrow/compute/to_arrow/
varbin.rs1use std::sync::Arc;
5
6use arrow_array::ArrayRef as ArrowArrayRef;
7use arrow_array::GenericBinaryArray;
8use arrow_array::GenericStringArray;
9use arrow_array::OffsetSizeTrait;
10use arrow_schema::DataType;
11use vortex_dtype::DType;
12use vortex_dtype::IntegerPType;
13use vortex_dtype::Nullability;
14use vortex_dtype::PType;
15use vortex_dtype::PTypeDowncastExt;
16use vortex_error::VortexResult;
17use vortex_error::vortex_bail;
18use vortex_error::vortex_panic;
19
20use crate::Array;
21use crate::LEGACY_SESSION;
22use crate::VectorExecutor;
23use crate::arrays::VarBinArray;
24use crate::arrays::VarBinVTable;
25use crate::arrow::compute::ToArrowKernel;
26use crate::arrow::compute::ToArrowKernelAdapter;
27use crate::arrow::null_buffer::to_null_buffer;
28use crate::compute::cast;
29use crate::register_kernel;
30
31impl ToArrowKernel for VarBinVTable {
32 fn to_arrow(
33 &self,
34 array: &VarBinArray,
35 arrow_type: Option<&DataType>,
36 ) -> VortexResult<Option<ArrowArrayRef>> {
37 let offsets_ptype = PType::try_from(array.offsets().dtype())?;
38
39 match arrow_type {
40 None => match array.dtype() {
42 DType::Binary(_) => match offsets_ptype {
43 PType::I64 | PType::U64 => to_arrow::<i64>(array),
44 PType::U8 | PType::U16 | PType::U32 | PType::I8 | PType::I16 | PType::I32 => {
45 to_arrow::<i32>(array)
46 }
47 PType::F16 | PType::F32 | PType::F64 => {
48 vortex_panic!("offsets array were somehow floating point")
49 }
50 },
51 DType::Utf8(_) => match offsets_ptype {
52 PType::I64 | PType::U64 => to_arrow::<i64>(array),
53 PType::U8 | PType::U16 | PType::U32 | PType::I8 | PType::I16 | PType::I32 => {
54 to_arrow::<i32>(array)
55 }
56 PType::F16 | PType::F32 | PType::F64 => {
57 vortex_panic!("offsets array were somehow floating point")
58 }
59 },
60 dtype => unreachable!("Unsupported DType {dtype}"),
61 },
62 Some(DataType::Binary) if array.dtype().is_binary() => to_arrow::<i32>(array),
64 Some(DataType::LargeBinary) if array.dtype().is_binary() => to_arrow::<i64>(array),
65 Some(DataType::Utf8) if array.dtype().is_utf8() => to_arrow::<i32>(array),
66 Some(DataType::LargeUtf8) if array.dtype().is_utf8() => to_arrow::<i64>(array),
67 Some(DataType::BinaryView) | Some(DataType::Utf8View) => {
69 return Ok(None);
70 }
71 Some(_) => {
73 vortex_bail!("Cannot convert VarBin to Arrow type {arrow_type:?}");
74 }
75 }
76 .map(Some)
77 }
78}
79
80register_kernel!(ToArrowKernelAdapter(VarBinVTable).lift());
81
82fn to_arrow<O: IntegerPType + OffsetSizeTrait>(array: &VarBinArray) -> VortexResult<ArrowArrayRef> {
83 let offsets = cast(
84 array.offsets(),
85 &DType::Primitive(O::PTYPE, Nullability::NonNullable),
86 )?
87 .execute_vector(&LEGACY_SESSION)?
88 .into_primitive()
89 .downcast::<O>()
90 .into_nonnull_buffer();
91
92 let nulls = to_null_buffer(array.validity_mask());
93 let data = array.bytes().clone();
94
95 Ok(match array.dtype() {
97 DType::Binary(_) => Arc::new(unsafe {
98 GenericBinaryArray::new_unchecked(
99 offsets.into_arrow_offset_buffer(),
100 data.into_arrow_buffer(),
101 nulls,
102 )
103 }),
104 DType::Utf8(_) => Arc::new(unsafe {
105 GenericStringArray::new_unchecked(
106 offsets.into_arrow_offset_buffer(),
107 data.into_arrow_buffer(),
108 nulls,
109 )
110 }),
111 dtype => {
112 unreachable!("expected utf8 or binary instead of {dtype}")
113 }
114 })
115}