Skip to main content

vortex_array/arrow/executor/
byte_view.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use arrow_array::ArrayRef as ArrowArrayRef;
7use arrow_array::GenericByteViewArray;
8use arrow_array::types::ByteViewType;
9use arrow_buffer::ScalarBuffer;
10use vortex_error::VortexResult;
11
12use crate::ArrayRef;
13use crate::ExecutionCtx;
14use crate::arrays::VarBinViewArray;
15use crate::arrow::executor::validity::to_arrow_null_buffer;
16use crate::arrow::null_buffer::to_null_buffer;
17use crate::builtins::ArrayBuiltins;
18use crate::dtype::DType;
19use crate::dtype::Nullability;
20use crate::dtype::arrow::FromArrowType;
21
22/// Convert a canonical VarBinViewArray directly to Arrow.
23pub fn canonical_varbinview_to_arrow<T: ByteViewType>(
24    array: &VarBinViewArray,
25) -> VortexResult<ArrowArrayRef> {
26    let views =
27        ScalarBuffer::<u128>::from(array.views_handle().as_host().clone().into_arrow_buffer());
28    let buffers: Vec<_> = array
29        .data_buffers()
30        .iter()
31        .map(|buffer| buffer.as_host().clone().into_arrow_buffer())
32        .collect();
33    let nulls = to_null_buffer(array.validity_mask()?);
34
35    // SAFETY: our own VarBinView array is considered safe.
36    Ok(Arc::new(unsafe {
37        GenericByteViewArray::<T>::new_unchecked(views, buffers, nulls)
38    }))
39}
40
41pub fn execute_varbinview_to_arrow<T: ByteViewType>(
42    array: &VarBinViewArray,
43    ctx: &mut ExecutionCtx,
44) -> VortexResult<ArrowArrayRef> {
45    let views =
46        ScalarBuffer::<u128>::from(array.views_handle().as_host().clone().into_arrow_buffer());
47    let buffers: Vec<_> = array
48        .data_buffers()
49        .iter()
50        .map(|buffer| buffer.as_host().clone().into_arrow_buffer())
51        .collect();
52    let nulls = to_arrow_null_buffer(array.validity()?, array.len(), ctx)?;
53
54    // SAFETY: our own VarBinView array is considered safe.
55    Ok(Arc::new(unsafe {
56        GenericByteViewArray::<T>::new_unchecked(views, buffers, nulls)
57    }))
58}
59
60pub(super) fn to_arrow_byte_view<T: ByteViewType>(
61    array: ArrayRef,
62    ctx: &mut ExecutionCtx,
63) -> VortexResult<ArrowArrayRef> {
64    // First we cast the array into the desired ByteView type.
65    // We do this in case the vortex array is Utf8, and we want Binary or vice versa. By casting
66    // first, we may push this down through the Vortex array tree. We choose nullable to be most
67    // flexible since there's no prescribed nullability in Arrow types.
68    let array = array.cast(DType::from_arrow((&T::DATA_TYPE, Nullability::Nullable)))?;
69
70    let varbinview = array.execute::<VarBinViewArray>(ctx)?;
71    canonical_varbinview_to_arrow::<T>(&varbinview)
72}