vortex_array/arrow/compute/to_arrow/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod canonical;
5mod list;
6mod temporal;
7mod varbin;
8
9use std::any::Any;
10use std::sync::LazyLock;
11
12use arcref::ArcRef;
13use arrow_array::ArrayRef as ArrowArrayRef;
14use arrow_schema::DataType;
15use vortex_dtype::DType;
16use vortex_dtype::arrow::FromArrowType;
17use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
18
19use crate::Array;
20use crate::arrow::array::{ArrowArray, ArrowVTable};
21use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
22use crate::vtable::VTable;
23
24static TO_ARROW_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
25    let compute = ComputeFn::new("to_arrow".into(), ArcRef::new_ref(&ToArrow));
26
27    // Register the kernels we ship ourselves
28    compute.register_kernel(ArcRef::new_ref(&canonical::ToArrowCanonical));
29    compute.register_kernel(ArcRef::new_ref(&temporal::ToArrowTemporal));
30
31    for kernel in inventory::iter::<ToArrowKernelRef> {
32        compute.register_kernel(kernel.0.clone());
33    }
34    compute
35});
36
37pub(crate) fn warm_up_vtable() -> usize {
38    TO_ARROW_FN.kernels().len()
39}
40
41/// Convert a Vortex array to an Arrow array with the encoding's preferred `DataType`.
42///
43/// For example, a `VarBinArray` will be converted to an Arrow `VarBin` array, instead of the
44/// canonical `VarBinViewArray`.
45///
46/// Warning: do not use this to convert a Vortex [`crate::stream::ArrayStream`] since each array
47/// may have a different preferred Arrow type. Use [`to_arrow`] instead.
48pub fn to_arrow_preferred(array: &dyn Array) -> VortexResult<ArrowArrayRef> {
49    to_arrow_opts(array, &ToArrowOptions { arrow_type: None })
50}
51
52/// Convert a Vortex array to an Arrow array of the given type.
53pub fn to_arrow(array: &dyn Array, arrow_type: &DataType) -> VortexResult<ArrowArrayRef> {
54    to_arrow_opts(
55        array,
56        &ToArrowOptions {
57            arrow_type: Some(arrow_type.clone()),
58        },
59    )
60}
61
62pub fn to_arrow_opts(array: &dyn Array, options: &ToArrowOptions) -> VortexResult<ArrowArrayRef> {
63    let arrow = TO_ARROW_FN
64        .invoke(&InvocationArgs {
65            inputs: &[array.into()],
66            options,
67        })?
68        .unwrap_array()?
69        .as_opt::<ArrowVTable>()
70        .ok_or_else(|| vortex_err!("ToArrow compute kernels must return a Vortex ArrowArray"))?
71        .inner()
72        .clone();
73
74    if let Some(arrow_type) = &options.arrow_type
75        && arrow.data_type() != arrow_type
76    {
77        vortex_bail!(
78            "Arrow array type mismatch: expected {:?}, got {:?}",
79            &options.arrow_type,
80            arrow.data_type()
81        );
82    }
83
84    Ok(arrow)
85}
86
87pub struct ToArrowOptions {
88    /// The Arrow data type to convert to, if specified.
89    pub arrow_type: Option<DataType>,
90}
91
92impl Options for ToArrowOptions {
93    fn as_any(&self) -> &dyn Any {
94        self
95    }
96}
97
98struct ToArrow;
99
100impl ComputeFnVTable for ToArrow {
101    fn invoke(
102        &self,
103        args: &InvocationArgs,
104        kernels: &[ArcRef<dyn Kernel>],
105    ) -> VortexResult<Output> {
106        let ToArrowArgs { array, arrow_type } = ToArrowArgs::try_from(args)?;
107
108        for kernel in kernels {
109            if let Some(output) = kernel.invoke(args)? {
110                return Ok(output);
111            }
112        }
113        if let Some(output) = array.invoke(&TO_ARROW_FN, args)? {
114            return Ok(output);
115        }
116
117        // Fall back to canonicalizing and then converting.
118        if !array.is_canonical() {
119            let canonical_array = array.to_canonical();
120            let arrow_array = to_arrow_opts(
121                canonical_array.as_ref(),
122                &ToArrowOptions {
123                    arrow_type: arrow_type.cloned(),
124                },
125            )?;
126            return Ok(ArrowArray::new(arrow_array, array.dtype().nullability())
127                .to_array()
128                .into());
129        }
130
131        vortex_bail!(
132            "Failed to convert array {} to Arrow {:?}",
133            array.encoding_id(),
134            arrow_type
135        );
136    }
137
138    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
139        let ToArrowArgs { array, arrow_type } = ToArrowArgs::try_from(args)?;
140        Ok(arrow_type
141            .map(|arrow_type| DType::from_arrow((arrow_type, array.dtype().nullability())))
142            .unwrap_or_else(|| array.dtype().clone()))
143    }
144
145    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
146        let ToArrowArgs { array, .. } = ToArrowArgs::try_from(args)?;
147        Ok(array.len())
148    }
149
150    fn is_elementwise(&self) -> bool {
151        false
152    }
153}
154
155pub struct ToArrowArgs<'a> {
156    array: &'a dyn Array,
157    arrow_type: Option<&'a DataType>,
158}
159
160impl<'a> TryFrom<&InvocationArgs<'a>> for ToArrowArgs<'a> {
161    type Error = VortexError;
162
163    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
164        if value.inputs.len() != 1 {
165            vortex_bail!("Expected 1 input, found {}", value.inputs.len());
166        }
167        let array = value.inputs[0]
168            .array()
169            .ok_or_else(|| vortex_err!("Expected input 0 to be an array"))?;
170        let options = value
171            .options
172            .as_any()
173            .downcast_ref::<ToArrowOptions>()
174            .vortex_expect("Expected options to be ToArrowOptions");
175
176        Ok(ToArrowArgs {
177            array,
178            arrow_type: options.arrow_type.as_ref(),
179        })
180    }
181}
182
183pub struct ToArrowKernelRef(pub ArcRef<dyn Kernel>);
184inventory::collect!(ToArrowKernelRef);
185
186pub trait ToArrowKernel: VTable {
187    fn to_arrow(
188        &self,
189        arr: &Self::Array,
190        arrow_type: Option<&DataType>,
191    ) -> VortexResult<Option<ArrowArrayRef>>;
192}
193
194#[derive(Debug)]
195pub struct ToArrowKernelAdapter<V: VTable>(pub V);
196
197impl<V: VTable + ToArrowKernel> ToArrowKernelAdapter<V> {
198    pub const fn lift(&'static self) -> ToArrowKernelRef {
199        ToArrowKernelRef(ArcRef::new_ref(self))
200    }
201}
202
203impl<V: VTable + ToArrowKernel> Kernel for ToArrowKernelAdapter<V> {
204    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
205        let inputs = ToArrowArgs::try_from(args)?;
206        let Some(array) = inputs.array.as_opt::<V>() else {
207            return Ok(None);
208        };
209
210        let Some(arrow_array) = V::to_arrow(&self.0, array, inputs.arrow_type)? else {
211            return Ok(None);
212        };
213
214        Ok(Some(
215            ArrowArray::new(arrow_array, array.dtype().nullability())
216                .to_array()
217                .into(),
218        ))
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use std::sync::Arc;
225
226    use arrow_array::types::Int32Type;
227    use arrow_array::{ArrayRef, PrimitiveArray, StringViewArray, StructArray};
228    use arrow_buffer::NullBuffer;
229
230    use super::to_arrow;
231    use crate::{IntoArray, arrays};
232
233    #[test]
234    fn test_to_arrow() {
235        let array = arrays::StructArray::from_fields(
236            vec![
237                (
238                    "a",
239                    arrays::PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2)])
240                        .into_array(),
241                ),
242                (
243                    "b",
244                    arrays::VarBinViewArray::from_iter_str(vec!["a", "b", "c"]).into_array(),
245                ),
246            ]
247            .as_slice(),
248        )
249        .unwrap();
250
251        let arrow_array: ArrayRef = Arc::new(
252            StructArray::try_from(vec![
253                (
254                    "a",
255                    Arc::new(PrimitiveArray::<Int32Type>::from_iter_values_with_nulls(
256                        vec![1, 0, 2],
257                        Some(NullBuffer::from(vec![true, false, true])),
258                    )) as ArrayRef,
259                ),
260                (
261                    "b",
262                    Arc::new(StringViewArray::from(vec![Some("a"), Some("b"), Some("c")])),
263                ),
264            ])
265            .unwrap(),
266        );
267
268        assert_eq!(
269            &to_arrow(array.as_ref(), &array.dtype().to_arrow_dtype().unwrap()).unwrap(),
270            &arrow_array
271        );
272    }
273}