vortex_array/arrow/compute/to_arrow/
varbin.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use arrow_array::ArrayRef as ArrowArrayRef;
7use arrow_array::GenericBinaryArray;
8use arrow_array::GenericStringArray;
9use arrow_array::OffsetSizeTrait;
10use arrow_schema::DataType;
11use vortex_dtype::DType;
12use vortex_dtype::IntegerPType;
13use vortex_dtype::Nullability;
14use vortex_dtype::PType;
15use vortex_dtype::PTypeDowncastExt;
16use vortex_error::VortexResult;
17use vortex_error::vortex_bail;
18use vortex_error::vortex_panic;
19
20use crate::Array;
21use crate::LEGACY_SESSION;
22use crate::VectorExecutor;
23use crate::arrays::VarBinArray;
24use crate::arrays::VarBinVTable;
25use crate::arrow::compute::ToArrowKernel;
26use crate::arrow::compute::ToArrowKernelAdapter;
27use crate::arrow::null_buffer::to_null_buffer;
28use crate::compute::cast;
29use crate::register_kernel;
30
31impl ToArrowKernel for VarBinVTable {
32    fn to_arrow(
33        &self,
34        array: &VarBinArray,
35        arrow_type: Option<&DataType>,
36    ) -> VortexResult<Option<ArrowArrayRef>> {
37        let offsets_ptype = PType::try_from(array.offsets().dtype())?;
38
39        match arrow_type {
40            // Emit out preferred Arrow VarBin array.
41            None => match array.dtype() {
42                DType::Binary(_) => match offsets_ptype {
43                    PType::I64 | PType::U64 => to_arrow::<i64>(array),
44                    PType::U8 | PType::U16 | PType::U32 | PType::I8 | PType::I16 | PType::I32 => {
45                        to_arrow::<i32>(array)
46                    }
47                    PType::F16 | PType::F32 | PType::F64 => {
48                        vortex_panic!("offsets array were somehow floating point")
49                    }
50                },
51                DType::Utf8(_) => match offsets_ptype {
52                    PType::I64 | PType::U64 => to_arrow::<i64>(array),
53                    PType::U8 | PType::U16 | PType::U32 | PType::I8 | PType::I16 | PType::I32 => {
54                        to_arrow::<i32>(array)
55                    }
56                    PType::F16 | PType::F32 | PType::F64 => {
57                        vortex_panic!("offsets array were somehow floating point")
58                    }
59                },
60                dtype => unreachable!("Unsupported DType {dtype}"),
61            },
62            // Emit the requested Arrow array.
63            Some(DataType::Binary) if array.dtype().is_binary() => to_arrow::<i32>(array),
64            Some(DataType::LargeBinary) if array.dtype().is_binary() => to_arrow::<i64>(array),
65            Some(DataType::Utf8) if array.dtype().is_utf8() => to_arrow::<i32>(array),
66            Some(DataType::LargeUtf8) if array.dtype().is_utf8() => to_arrow::<i64>(array),
67            // Allow fallback to canonicalize to a VarBinView and try again.
68            Some(DataType::BinaryView) | Some(DataType::Utf8View) => {
69                return Ok(None);
70            }
71            // Any other type is not supported.
72            Some(_) => {
73                vortex_bail!("Cannot convert VarBin to Arrow type {arrow_type:?}");
74            }
75        }
76        .map(Some)
77    }
78}
79
80register_kernel!(ToArrowKernelAdapter(VarBinVTable).lift());
81
82fn to_arrow<O: IntegerPType + OffsetSizeTrait>(array: &VarBinArray) -> VortexResult<ArrowArrayRef> {
83    let offsets = cast(
84        array.offsets(),
85        &DType::Primitive(O::PTYPE, Nullability::NonNullable),
86    )?
87    .execute_vector(&LEGACY_SESSION)?
88    .into_primitive()
89    .downcast::<O>()
90    .into_nonnull_buffer();
91
92    let nulls = to_null_buffer(array.validity_mask());
93    let data = array.bytes().clone();
94
95    // Match on the `DType`.
96    Ok(match array.dtype() {
97        DType::Binary(_) => Arc::new(unsafe {
98            GenericBinaryArray::new_unchecked(
99                offsets.into_arrow_offset_buffer(),
100                data.into_arrow_buffer(),
101                nulls,
102            )
103        }),
104        DType::Utf8(_) => Arc::new(unsafe {
105            GenericStringArray::new_unchecked(
106                offsets.into_arrow_offset_buffer(),
107                data.into_arrow_buffer(),
108                nulls,
109            )
110        }),
111        dtype => {
112            unreachable!("expected utf8 or binary instead of {dtype}")
113        }
114    })
115}