vortex_array/arrow/compute/to_arrow/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use arrow_array::{ArrayRef as ArrowArrayRef, GenericListArray, OffsetSizeTrait};
7use arrow_schema::{DataType, Field, FieldRef};
8use vortex_dtype::{DType, IntegerPType};
9use vortex_error::{VortexResult, vortex_bail};
10
11use crate::arrays::{ListArray, ListVTable, list_view_from_list};
12use crate::arrow::IntoArrowArray;
13use crate::arrow::compute::to_arrow::null_buffer::to_null_buffer;
14use crate::arrow::compute::{ToArrowKernel, ToArrowKernelAdapter};
15use crate::compute::cast;
16use crate::{IntoArray, ToCanonical, register_kernel};
17
18impl ToArrowKernel for ListVTable {
19    fn to_arrow(
20        &self,
21        array: &ListArray,
22        arrow_type: Option<&DataType>,
23    ) -> VortexResult<Option<ArrowArrayRef>> {
24        match arrow_type {
25            None => {
26                // Default to a `ListArray` with `i32` offsets (preferred) when no `arrow_type` is
27                // specified.
28                list_array_to_arrow_list::<i32>(array, None)
29            }
30            Some(DataType::List(field)) => list_array_to_arrow_list::<i32>(array, Some(field)),
31            Some(DataType::LargeList(field)) => list_array_to_arrow_list::<i64>(array, Some(field)),
32            Some(dt @ DataType::ListView(_)) | Some(dt @ DataType::LargeListView(_)) => {
33                // Convert `ListArray` to `ListViewArray`, then use the canonical conversion.
34                let list_view = list_view_from_list(array.clone());
35                Ok(list_view.into_array().into_arrow(dt)?)
36            }
37            _ => vortex_bail!(
38                "Cannot convert `ListArray` to non-list Arrow type: {:?}",
39                arrow_type
40            ),
41        }
42        .map(Some)
43    }
44}
45
46register_kernel!(ToArrowKernelAdapter(ListVTable).lift());
47
48/// Converts a Vortex [`ListArray`] directly into an arrow [`GenericListArray`].
49fn list_array_to_arrow_list<O: IntegerPType + OffsetSizeTrait>(
50    array: &ListArray,
51    element: Option<&FieldRef>,
52) -> VortexResult<ArrowArrayRef> {
53    // First we cast the offsets and sizes into the specified width (determined by `O::PTYPE`).
54    let offsets_dtype = DType::Primitive(O::PTYPE, array.dtype().nullability());
55    let offsets = cast(array.offsets(), &offsets_dtype)
56        .map_err(|err| err.with_context(format!("Failed to cast offsets to {offsets_dtype}")))?
57        .to_primitive();
58
59    // Convert `offsets` and `validity` to Arrow buffers.
60    let arrow_offsets = offsets.buffer::<O>().into_arrow_offset_buffer();
61    let nulls = to_null_buffer(array.validity_mask());
62
63    // Convert the child `elements` array to Arrow.
64    let (elements, element_field) = {
65        if let Some(element) = element {
66            // Convert elements to the specific Arrow type the caller wants.
67            (
68                array.elements().clone().into_arrow(element.data_type())?,
69                element.clone(),
70            )
71        } else {
72            // Otherwise, convert into whatever Arrow prefers.
73            let elements = array.elements().clone().into_arrow_preferred()?;
74            let element_field = Arc::new(Field::new_list_field(
75                elements.data_type().clone(),
76                array.elements().dtype().is_nullable(),
77            ));
78            (elements, element_field)
79        }
80    };
81
82    Ok(Arc::new(GenericListArray::new(
83        element_field,
84        arrow_offsets,
85        elements,
86        nulls,
87    )))
88}