vortex_array/compute/
to_arrow.rs

1use arrow_array::{Array as ArrowArray, ArrayRef as ArrowArrayRef};
2use arrow_schema::DataType;
3use vortex_error::{VortexExpect, VortexResult, vortex_err};
4
5use crate::encoding::Encoding;
6use crate::{Array, IntoArray};
7
8/// Trait for Arrow conversion compute function.
9pub trait ToArrowFn<A> {
10    /// Return the preferred Arrow [`DataType`] of the encoding, or None of the canonical
11    /// [`DataType`] for the array's Vortex [`vortex_dtype::DType`] should be used.
12    fn preferred_arrow_data_type(&self, _array: A) -> VortexResult<Option<DataType>> {
13        Ok(None)
14    }
15
16    /// Convert the array to an Arrow array of the given type.
17    ///
18    /// Implementation can return None if the conversion cannot be specialized by this encoding.
19    /// In this case, the default conversion via `to_canonical` will be used.
20    fn to_arrow(&self, array: A, data_type: &DataType) -> VortexResult<Option<ArrowArrayRef>>;
21}
22
23impl<E: Encoding> ToArrowFn<&dyn Array> for E
24where
25    E: for<'a> ToArrowFn<&'a E::Array>,
26{
27    fn preferred_arrow_data_type(&self, array: &dyn Array) -> VortexResult<Option<DataType>> {
28        let array_ref = array
29            .as_any()
30            .downcast_ref::<E::Array>()
31            .vortex_expect("Failed to downcast array");
32        ToArrowFn::preferred_arrow_data_type(self, array_ref)
33    }
34
35    fn to_arrow(
36        &self,
37        array: &dyn Array,
38        data_type: &DataType,
39    ) -> VortexResult<Option<ArrowArrayRef>> {
40        let array_ref = array
41            .as_any()
42            .downcast_ref::<E::Array>()
43            .vortex_expect("Failed to downcast array");
44        ToArrowFn::to_arrow(self, array_ref, data_type)
45    }
46}
47
48/// Return the preferred Arrow [`DataType`] of the array.
49pub fn preferred_arrow_data_type(array: &dyn Array) -> VortexResult<DataType> {
50    if let Some(result) = array
51        .vtable()
52        .to_arrow_fn()
53        .and_then(|f| f.preferred_arrow_data_type(array).transpose())
54        .transpose()?
55    {
56        return Ok(result);
57    }
58
59    // Otherwise, we use the default.
60    array.dtype().to_arrow_dtype()
61}
62
63pub fn to_arrow_preferred(array: &dyn Array) -> VortexResult<ArrowArrayRef> {
64    let data_type = preferred_arrow_data_type(array)?;
65    to_arrow(array, &data_type)
66}
67
68/// Convert the array to an Arrow array of the given type.
69pub fn to_arrow(array: &dyn Array, data_type: &DataType) -> VortexResult<ArrowArrayRef> {
70    if let Some(result) = array
71        .vtable()
72        .to_arrow_fn()
73        .and_then(|f| f.to_arrow(array, data_type).transpose())
74        .transpose()?
75    {
76        assert_eq!(
77            result.data_type(),
78            data_type,
79            "ToArrowFn returned wrong data type"
80        );
81        return Ok(result);
82    }
83
84    // Fall back to canonicalizing and then converting.
85    let canonical_array = array.to_canonical()?.into_array();
86    let arrow_array = canonical_array
87        .vtable()
88        .to_arrow_fn()
89        .vortex_expect("Canonical encodings must implement ToArrowFn")
90        .to_arrow(&canonical_array, data_type)?
91        .ok_or_else(|| {
92            vortex_err!(
93                "Failed to convert array {} to Arrow {}",
94                canonical_array.encoding(),
95                data_type
96            )
97        })?;
98
99    assert_eq!(array.len(), arrow_array.len());
100
101    Ok(arrow_array)
102}
103
104#[cfg(test)]
105mod tests {
106    use std::sync::Arc;
107
108    use arrow_array::types::Int32Type;
109    use arrow_array::{ArrayRef, PrimitiveArray, StringViewArray, StructArray};
110    use arrow_buffer::NullBuffer;
111
112    use crate::array::Array;
113    use crate::arrays;
114    use crate::compute::to_arrow;
115
116    #[test]
117    fn test_to_arrow() {
118        let array = arrays::StructArray::from_fields(
119            vec![
120                (
121                    "a",
122                    arrays::PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2)])
123                        .into_array(),
124                ),
125                (
126                    "b",
127                    arrays::VarBinViewArray::from_iter_str(vec!["a", "b", "c"]).into_array(),
128                ),
129            ]
130            .as_slice(),
131        )
132        .unwrap();
133
134        let arrow_array: ArrayRef = Arc::new(
135            StructArray::try_from(vec![
136                (
137                    "a",
138                    Arc::new(PrimitiveArray::<Int32Type>::from_iter_values_with_nulls(
139                        vec![1, 0, 2],
140                        Some(NullBuffer::from(vec![true, false, true])),
141                    )) as ArrayRef,
142                ),
143                (
144                    "b",
145                    Arc::new(StringViewArray::from(vec![Some("a"), Some("b"), Some("c")])),
146                ),
147            ])
148            .unwrap(),
149        );
150
151        assert_eq!(
152            &to_arrow(&array, &array.dtype().to_arrow_dtype().unwrap()).unwrap(),
153            &arrow_array
154        );
155    }
156}