Skip to main content

vortex_array/arrow/executor/
byte_view.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use arrow_array::ArrayRef as ArrowArrayRef;
7use arrow_array::GenericByteViewArray;
8use arrow_array::types::ByteViewType;
9use arrow_buffer::ScalarBuffer;
10use vortex_error::VortexResult;
11
12use crate::ArrayRef;
13use crate::ExecutionCtx;
14use crate::arrays::VarBinViewArray;
15use crate::arrow::executor::validity::to_arrow_null_buffer;
16use crate::arrow::null_buffer::to_null_buffer;
17use crate::builtins::ArrayBuiltins;
18use crate::dtype::DType;
19use crate::dtype::Nullability;
20use crate::dtype::arrow::FromArrowType;
21
22/// Convert a canonical VarBinViewArray directly to Arrow.
23pub fn canonical_varbinview_to_arrow<T: ByteViewType>(
24    array: &VarBinViewArray,
25    ctx: &mut ExecutionCtx,
26) -> VortexResult<ArrowArrayRef> {
27    let views =
28        ScalarBuffer::<u128>::from(array.views_handle().as_host().clone().into_arrow_buffer());
29    let buffers: Vec<_> = array
30        .data_buffers()
31        .iter()
32        .map(|buffer| buffer.as_host().clone().into_arrow_buffer())
33        .collect();
34    let nulls = to_null_buffer(
35        array
36            .as_ref()
37            .validity()?
38            .to_mask(array.as_ref().len(), ctx)?,
39    );
40
41    // SAFETY: our own VarBinView array is considered safe.
42    Ok(Arc::new(unsafe {
43        GenericByteViewArray::<T>::new_unchecked(views, buffers, nulls)
44    }))
45}
46
47pub fn execute_varbinview_to_arrow<T: ByteViewType>(
48    array: &VarBinViewArray,
49    ctx: &mut ExecutionCtx,
50) -> VortexResult<ArrowArrayRef> {
51    let views =
52        ScalarBuffer::<u128>::from(array.views_handle().as_host().clone().into_arrow_buffer());
53    let buffers: Vec<_> = array
54        .data_buffers()
55        .iter()
56        .map(|buffer| buffer.as_host().clone().into_arrow_buffer())
57        .collect();
58    let nulls = to_arrow_null_buffer(array.validity()?, array.len(), ctx)?;
59
60    // SAFETY: our own VarBinView array is considered safe.
61    Ok(Arc::new(unsafe {
62        GenericByteViewArray::<T>::new_unchecked(views, buffers, nulls)
63    }))
64}
65
66pub(super) fn to_arrow_byte_view<T: ByteViewType>(
67    array: ArrayRef,
68    ctx: &mut ExecutionCtx,
69) -> VortexResult<ArrowArrayRef> {
70    // First we cast the array into the desired ByteView type.
71    // We do this in case the vortex array is Utf8, and we want Binary or vice versa. By casting
72    // first, we may push this down through the Vortex array tree. We choose nullable to be most
73    // flexible since there's no prescribed nullability in Arrow types.
74    let array = array.cast(DType::from_arrow((&T::DATA_TYPE, Nullability::Nullable)))?;
75
76    let varbinview = array.execute::<VarBinViewArray>(ctx)?;
77    canonical_varbinview_to_arrow::<T>(&varbinview, ctx)
78}