vortex_array/compute/
to_arrow.rs

1use arrow_array::{Array as ArrowArray, ArrayRef as ArrowArrayRef};
2use arrow_schema::DataType;
3use vortex_error::{VortexExpect, VortexResult, vortex_err};
4
5use crate::Array;
6use crate::builders::builder_with_capacity;
7use crate::encoding::Encoding;
8
9/// Trait for Arrow conversion compute function.
10pub trait ToArrowFn<A> {
11    /// Return the preferred Arrow [`DataType`] of the encoding, or None of the canonical
12    /// [`DataType`] for the array's Vortex [`vortex_dtype::DType`] should be used.
13    fn preferred_arrow_data_type(&self, _array: A) -> VortexResult<Option<DataType>> {
14        Ok(None)
15    }
16
17    /// Convert the array to an Arrow array of the given type.
18    ///
19    /// Implementation can return None if the conversion cannot be specialized by this encoding.
20    /// In this case, the default conversion via `to_canonical` will be used.
21    fn to_arrow(&self, array: A, data_type: &DataType) -> VortexResult<Option<ArrowArrayRef>>;
22}
23
24impl<E: Encoding> ToArrowFn<&dyn Array> for E
25where
26    E: for<'a> ToArrowFn<&'a E::Array>,
27{
28    fn preferred_arrow_data_type(&self, array: &dyn Array) -> VortexResult<Option<DataType>> {
29        let array_ref = array
30            .as_any()
31            .downcast_ref::<E::Array>()
32            .vortex_expect("Failed to downcast array");
33        ToArrowFn::preferred_arrow_data_type(self, array_ref)
34    }
35
36    fn to_arrow(
37        &self,
38        array: &dyn Array,
39        data_type: &DataType,
40    ) -> VortexResult<Option<ArrowArrayRef>> {
41        let array_ref = array
42            .as_any()
43            .downcast_ref::<E::Array>()
44            .vortex_expect("Failed to downcast array");
45        ToArrowFn::to_arrow(self, array_ref, data_type)
46    }
47}
48
49/// Return the preferred Arrow [`DataType`] of the array.
50pub fn preferred_arrow_data_type(array: &dyn Array) -> VortexResult<DataType> {
51    if let Some(result) = array
52        .vtable()
53        .to_arrow_fn()
54        .and_then(|f| f.preferred_arrow_data_type(array).transpose())
55        .transpose()?
56    {
57        return Ok(result);
58    }
59
60    // Otherwise, we use the default.
61    array.dtype().to_arrow_dtype()
62}
63
64pub fn to_arrow_preferred(array: &dyn Array) -> VortexResult<ArrowArrayRef> {
65    let data_type = preferred_arrow_data_type(array)?;
66    to_arrow(array, &data_type)
67}
68
69/// Convert the array to an Arrow array of the given type.
70pub fn to_arrow(array: &dyn Array, data_type: &DataType) -> VortexResult<ArrowArrayRef> {
71    if let Some(result) = array
72        .vtable()
73        .to_arrow_fn()
74        .and_then(|f| f.to_arrow(array, data_type).transpose())
75        .transpose()?
76    {
77        assert_eq!(
78            result.data_type(),
79            data_type,
80            "ToArrowFn returned wrong data type"
81        );
82        return Ok(result);
83    }
84
85    // Fall back to canonicalizing and then converting.
86    let mut builder = builder_with_capacity(array.dtype(), array.len());
87    array.append_to_builder(builder.as_mut())?;
88    let canonical_array = builder.finish();
89    let arrow_array = canonical_array
90        .vtable()
91        .to_arrow_fn()
92        .vortex_expect("Canonical encodings must implement ToArrowFn")
93        .to_arrow(&canonical_array, data_type)?
94        .ok_or_else(|| {
95            vortex_err!(
96                "Failed to convert array {} to Arrow {}",
97                canonical_array.encoding(),
98                data_type
99            )
100        })?;
101
102    debug_assert_eq!(array.len(), arrow_array.len());
103
104    Ok(arrow_array)
105}
106
107#[cfg(test)]
108mod tests {
109    use std::sync::Arc;
110
111    use arrow_array::types::Int32Type;
112    use arrow_array::{ArrayRef, PrimitiveArray, StringViewArray, StructArray};
113    use arrow_buffer::NullBuffer;
114
115    use crate::array::Array;
116    use crate::arrays;
117    use crate::compute::to_arrow;
118
119    #[test]
120    fn test_to_arrow() {
121        let array = arrays::StructArray::from_fields(
122            vec![
123                (
124                    "a",
125                    arrays::PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2)])
126                        .into_array(),
127                ),
128                (
129                    "b",
130                    arrays::VarBinViewArray::from_iter_str(vec!["a", "b", "c"]).into_array(),
131                ),
132            ]
133            .as_slice(),
134        )
135        .unwrap();
136
137        let arrow_array: ArrayRef = Arc::new(
138            StructArray::try_from(vec![
139                (
140                    "a",
141                    Arc::new(PrimitiveArray::<Int32Type>::from_iter_values_with_nulls(
142                        vec![1, 0, 2],
143                        Some(NullBuffer::from(vec![true, false, true])),
144                    )) as ArrayRef,
145                ),
146                (
147                    "b",
148                    Arc::new(StringViewArray::from(vec![Some("a"), Some("b"), Some("c")])),
149                ),
150            ])
151            .unwrap(),
152        );
153
154        assert_eq!(
155            &to_arrow(&array, &array.dtype().to_arrow_dtype().unwrap()).unwrap(),
156            &arrow_array
157        );
158    }
159}