vortex_array/arrow/compute/to_arrow/
varbin.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use arrow_array::{
7    ArrayRef as ArrowArrayRef, GenericBinaryArray, GenericStringArray, OffsetSizeTrait,
8};
9use arrow_schema::DataType;
10use vortex_dtype::{DType, NativePType, Nullability, PType};
11use vortex_error::{VortexResult, vortex_bail};
12
13use crate::arrays::{VarBinArray, VarBinVTable};
14use crate::arrow::compute::{ToArrowKernel, ToArrowKernelAdapter};
15use crate::compute::cast;
16use crate::{Array, ToCanonical, register_kernel};
17
18impl ToArrowKernel for VarBinVTable {
19    fn to_arrow(
20        &self,
21        array: &VarBinArray,
22        arrow_type: Option<&DataType>,
23    ) -> VortexResult<Option<ArrowArrayRef>> {
24        let offsets_ptype = PType::try_from(array.offsets().dtype())?;
25
26        match arrow_type {
27            // Emit out preferred Arrow VarBin array.
28            None => match array.dtype() {
29                DType::Binary(_) => match offsets_ptype {
30                    PType::I64 | PType::U64 => to_arrow::<i64>(array),
31                    _ => to_arrow::<i32>(array),
32                },
33                DType::Utf8(_) => match offsets_ptype {
34                    PType::I64 | PType::U64 => to_arrow::<i64>(array),
35                    _ => to_arrow::<i32>(array),
36                },
37                _ => unreachable!("Unsupported DType"),
38            },
39            // Emit the requested Arrow array.
40            Some(DataType::Binary) if array.dtype().is_binary() => to_arrow::<i32>(array),
41            Some(DataType::LargeBinary) if array.dtype().is_binary() => to_arrow::<i64>(array),
42            Some(DataType::Utf8) if array.dtype().is_utf8() => to_arrow::<i32>(array),
43            Some(DataType::LargeUtf8) if array.dtype().is_utf8() => to_arrow::<i64>(array),
44            // Allow fallback to canonicalize to a VarBinView and try again.
45            Some(DataType::BinaryView) | Some(DataType::Utf8View) => {
46                return Ok(None);
47            }
48            // Any other type is not supported.
49            Some(_) => {
50                vortex_bail!("Cannot convert VarBin to Arrow type {arrow_type:?}");
51            }
52        }
53        .map(Some)
54    }
55}
56
57register_kernel!(ToArrowKernelAdapter(VarBinVTable).lift());
58
59fn to_arrow<O: NativePType + OffsetSizeTrait>(array: &VarBinArray) -> VortexResult<ArrowArrayRef> {
60    let offsets = cast(
61        array.offsets(),
62        &DType::Primitive(O::PTYPE, Nullability::NonNullable),
63    )?
64    .to_primitive()
65    .map_err(|err| err.with_context("Failed to canonicalize offsets"))?;
66
67    let nulls = array.validity_mask()?.to_null_buffer();
68    let data = array.bytes().clone();
69
70    // Switch on DType.
71    Ok(match array.dtype() {
72        DType::Binary(_) => Arc::new(unsafe {
73            GenericBinaryArray::new_unchecked(
74                offsets.buffer::<O>().into_arrow_offset_buffer(),
75                data.into_arrow_buffer(),
76                nulls,
77            )
78        }),
79        DType::Utf8(_) => Arc::new(unsafe {
80            GenericStringArray::new_unchecked(
81                offsets.buffer::<O>().into_arrow_offset_buffer(),
82                data.into_arrow_buffer(),
83                nulls,
84            )
85        }),
86        _ => unreachable!("expected utf8 or binary instead of {}", array.dtype()),
87    })
88}