vortex_array/compute/
to_arrow.rs

1use arrow_array::{Array as ArrowArray, ArrayRef as ArrowArrayRef};
2use arrow_schema::DataType;
3use vortex_error::{VortexExpect, VortexResult, vortex_err};
4
5use crate::Array;
6use crate::arrow::infer_data_type;
7use crate::builders::builder_with_capacity;
8use crate::encoding::Encoding;
9
10/// Trait for Arrow conversion compute function.
11pub trait ToArrowFn<A> {
12    /// Return the preferred Arrow [`DataType`] of the encoding, or None of the canonical
13    /// [`DataType`] for the array's Vortex [`vortex_dtype::DType`] should be used.
14    fn preferred_arrow_data_type(&self, _array: A) -> VortexResult<Option<DataType>> {
15        Ok(None)
16    }
17
18    /// Convert the array to an Arrow array of the given type.
19    ///
20    /// Implementation can return None if the conversion cannot be specialized by this encoding.
21    /// In this case, the default conversion via `to_canonical` will be used.
22    fn to_arrow(&self, array: A, data_type: &DataType) -> VortexResult<Option<ArrowArrayRef>>;
23}
24
25impl<E: Encoding> ToArrowFn<&dyn Array> for E
26where
27    E: for<'a> ToArrowFn<&'a E::Array>,
28{
29    fn preferred_arrow_data_type(&self, array: &dyn Array) -> VortexResult<Option<DataType>> {
30        let array_ref = array
31            .as_any()
32            .downcast_ref::<E::Array>()
33            .vortex_expect("Failed to downcast array");
34        ToArrowFn::preferred_arrow_data_type(self, array_ref)
35    }
36
37    fn to_arrow(
38        &self,
39        array: &dyn Array,
40        data_type: &DataType,
41    ) -> VortexResult<Option<ArrowArrayRef>> {
42        let array_ref = array
43            .as_any()
44            .downcast_ref::<E::Array>()
45            .vortex_expect("Failed to downcast array");
46        ToArrowFn::to_arrow(self, array_ref, data_type)
47    }
48}
49
50/// Return the preferred Arrow [`DataType`] of the array.
51pub fn preferred_arrow_data_type(array: &dyn Array) -> VortexResult<DataType> {
52    if let Some(result) = array
53        .vtable()
54        .to_arrow_fn()
55        .and_then(|f| f.preferred_arrow_data_type(array).transpose())
56        .transpose()?
57    {
58        return Ok(result);
59    }
60
61    // Otherwise, we use the default.
62    infer_data_type(array.dtype())
63}
64
65/// Convert the array to an Arrow array of the given type.
66pub fn to_arrow(array: &dyn Array, data_type: &DataType) -> VortexResult<ArrowArrayRef> {
67    if let Some(result) = array
68        .vtable()
69        .to_arrow_fn()
70        .and_then(|f| f.to_arrow(array, data_type).transpose())
71        .transpose()?
72    {
73        assert_eq!(
74            result.data_type(),
75            data_type,
76            "ToArrowFn returned wrong data type"
77        );
78        return Ok(result);
79    }
80
81    // Fall back to canonicalizing and then converting.
82    let mut builder = builder_with_capacity(array.dtype(), array.len());
83    array.append_to_builder(builder.as_mut())?;
84    let array = builder.finish();
85    array
86        .vtable()
87        .to_arrow_fn()
88        .vortex_expect("Canonical encodings must implement ToArrowFn")
89        .to_arrow(&array, data_type)?
90        .ok_or_else(|| {
91            vortex_err!(
92                "Failed to convert array {} to Arrow {}",
93                array.encoding(),
94                data_type
95            )
96        })
97}
98
99#[cfg(test)]
100mod tests {
101    use std::sync::Arc;
102
103    use arrow_array::types::Int32Type;
104    use arrow_array::{ArrayRef, PrimitiveArray, StringViewArray, StructArray};
105    use arrow_buffer::NullBuffer;
106
107    use crate::array::Array;
108    use crate::arrays;
109    use crate::arrow::infer_data_type;
110    use crate::compute::to_arrow;
111
112    #[test]
113    fn test_to_arrow() {
114        let array = arrays::StructArray::from_fields(
115            vec![
116                (
117                    "a",
118                    arrays::PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2)])
119                        .into_array(),
120                ),
121                (
122                    "b",
123                    arrays::VarBinViewArray::from_iter_str(vec!["a", "b", "c"]).into_array(),
124                ),
125            ]
126            .as_slice(),
127        )
128        .unwrap();
129
130        let arrow_array: ArrayRef = Arc::new(
131            StructArray::try_from(vec![
132                (
133                    "a",
134                    Arc::new(PrimitiveArray::<Int32Type>::from_iter_values_with_nulls(
135                        vec![1, 0, 2],
136                        Some(NullBuffer::from(vec![true, false, true])),
137                    )) as ArrayRef,
138                ),
139                (
140                    "b",
141                    Arc::new(StringViewArray::from(vec![Some("a"), Some("b"), Some("c")])),
142                ),
143            ])
144            .unwrap(),
145        );
146
147        assert_eq!(
148            &to_arrow(&array, &infer_data_type(array.dtype()).unwrap()).unwrap(),
149            &arrow_array
150        );
151    }
152}