vortex_array/arrow/compute/to_arrow/
varbin.rs

1use std::sync::Arc;
2
3use arrow_array::{
4    ArrayRef as ArrowArrayRef, GenericBinaryArray, GenericStringArray, OffsetSizeTrait,
5};
6use arrow_schema::DataType;
7use vortex_dtype::{DType, NativePType, Nullability, PType};
8use vortex_error::{VortexResult, vortex_bail};
9
10use crate::arrays::{VarBinArray, VarBinVTable};
11use crate::arrow::compute::{ToArrowKernel, ToArrowKernelAdapter};
12use crate::compute::cast;
13use crate::{Array, ToCanonical, register_kernel};
14
15impl ToArrowKernel for VarBinVTable {
16    fn to_arrow(
17        &self,
18        array: &VarBinArray,
19        arrow_type: Option<&DataType>,
20    ) -> VortexResult<Option<ArrowArrayRef>> {
21        let offsets_ptype = PType::try_from(array.offsets().dtype())?;
22
23        match arrow_type {
24            // Emit out preferred Arrow VarBin array.
25            None => match array.dtype() {
26                DType::Binary(_) => match offsets_ptype {
27                    PType::I64 | PType::U64 => to_arrow::<i64>(array),
28                    _ => to_arrow::<i32>(array),
29                },
30                DType::Utf8(_) => match offsets_ptype {
31                    PType::I64 | PType::U64 => to_arrow::<i64>(array),
32                    _ => to_arrow::<i32>(array),
33                },
34                _ => unreachable!("Unsupported DType"),
35            },
36            // Emit the requested Arrow array.
37            Some(DataType::Binary) if array.dtype().is_binary() => to_arrow::<i32>(array),
38            Some(DataType::LargeBinary) if array.dtype().is_binary() => to_arrow::<i64>(array),
39            Some(DataType::Utf8) if array.dtype().is_utf8() => to_arrow::<i32>(array),
40            Some(DataType::LargeUtf8) if array.dtype().is_utf8() => to_arrow::<i64>(array),
41            // Allow fallback to canonicalize to a VarBinView and try again.
42            Some(DataType::BinaryView) | Some(DataType::Utf8View) => {
43                return Ok(None);
44            }
45            // Any other type is not supported.
46            Some(_) => {
47                vortex_bail!("Cannot convert VarBin to Arrow type {arrow_type:?}");
48            }
49        }
50        .map(Some)
51    }
52}
53
54register_kernel!(ToArrowKernelAdapter(VarBinVTable).lift());
55
56fn to_arrow<O: NativePType + OffsetSizeTrait>(array: &VarBinArray) -> VortexResult<ArrowArrayRef> {
57    let offsets = cast(
58        array.offsets(),
59        &DType::Primitive(O::PTYPE, Nullability::NonNullable),
60    )?
61    .to_primitive()
62    .map_err(|err| err.with_context("Failed to canonicalize offsets"))?;
63
64    let nulls = array.validity_mask()?.to_null_buffer();
65    let data = array.bytes().clone();
66
67    // Switch on DType.
68    Ok(match array.dtype() {
69        DType::Binary(_) => Arc::new(unsafe {
70            GenericBinaryArray::new_unchecked(
71                offsets.buffer::<O>().into_arrow_offset_buffer(),
72                data.into_arrow_buffer(),
73                nulls,
74            )
75        }),
76        DType::Utf8(_) => Arc::new(unsafe {
77            GenericStringArray::new_unchecked(
78                offsets.buffer::<O>().into_arrow_offset_buffer(),
79                data.into_arrow_buffer(),
80                nulls,
81            )
82        }),
83        _ => unreachable!("expected utf8 or binary instead of {}", array.dtype()),
84    })
85}