vortex_array/arrow/compute/to_arrow/
mod.rs1mod canonical;
5mod list;
6mod temporal;
7mod varbin;
8
9use std::any::Any;
10use std::sync::LazyLock;
11
12use arcref::ArcRef;
13use arrow_array::ArrayRef as ArrowArrayRef;
14use arrow_schema::DataType;
15use vortex_dtype::DType;
16use vortex_dtype::arrow::FromArrowType;
17use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
18
19use crate::Array;
20use crate::arrow::array::{ArrowArray, ArrowVTable};
21use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
22use crate::vtable::VTable;
23
24static TO_ARROW_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
25 let compute = ComputeFn::new("to_arrow".into(), ArcRef::new_ref(&ToArrow));
26
27 compute.register_kernel(ArcRef::new_ref(&canonical::ToArrowCanonical));
29 compute.register_kernel(ArcRef::new_ref(&temporal::ToArrowTemporal));
30
31 for kernel in inventory::iter::<ToArrowKernelRef> {
32 compute.register_kernel(kernel.0.clone());
33 }
34 compute
35});
36
37pub(crate) fn warm_up_vtable() -> usize {
38 TO_ARROW_FN.kernels().len()
39}
40
41pub fn to_arrow_preferred(array: &dyn Array) -> VortexResult<ArrowArrayRef> {
49 to_arrow_opts(array, &ToArrowOptions { arrow_type: None })
50}
51
52pub fn to_arrow(array: &dyn Array, arrow_type: &DataType) -> VortexResult<ArrowArrayRef> {
54 to_arrow_opts(
55 array,
56 &ToArrowOptions {
57 arrow_type: Some(arrow_type.clone()),
58 },
59 )
60}
61
62pub fn to_arrow_opts(array: &dyn Array, options: &ToArrowOptions) -> VortexResult<ArrowArrayRef> {
63 let arrow = TO_ARROW_FN
64 .invoke(&InvocationArgs {
65 inputs: &[array.into()],
66 options,
67 })?
68 .unwrap_array()?
69 .as_opt::<ArrowVTable>()
70 .ok_or_else(|| vortex_err!("ToArrow compute kernels must return a Vortex ArrowArray"))?
71 .inner()
72 .clone();
73
74 if let Some(arrow_type) = &options.arrow_type
75 && arrow.data_type() != arrow_type
76 {
77 vortex_bail!(
78 "Arrow array type mismatch: expected {:?}, got {:?}",
79 &options.arrow_type,
80 arrow.data_type()
81 );
82 }
83
84 Ok(arrow)
85}
86
87pub struct ToArrowOptions {
88 pub arrow_type: Option<DataType>,
90}
91
92impl Options for ToArrowOptions {
93 fn as_any(&self) -> &dyn Any {
94 self
95 }
96}
97
98struct ToArrow;
99
100impl ComputeFnVTable for ToArrow {
101 fn invoke(
102 &self,
103 args: &InvocationArgs,
104 kernels: &[ArcRef<dyn Kernel>],
105 ) -> VortexResult<Output> {
106 let ToArrowArgs { array, arrow_type } = ToArrowArgs::try_from(args)?;
107
108 for kernel in kernels {
109 if let Some(output) = kernel.invoke(args)? {
110 return Ok(output);
111 }
112 }
113 if let Some(output) = array.invoke(&TO_ARROW_FN, args)? {
114 return Ok(output);
115 }
116
117 if !array.is_canonical() {
119 let canonical_array = array.to_canonical();
120 let arrow_array = to_arrow_opts(
121 canonical_array.as_ref(),
122 &ToArrowOptions {
123 arrow_type: arrow_type.cloned(),
124 },
125 )?;
126 return Ok(ArrowArray::new(arrow_array, array.dtype().nullability())
127 .to_array()
128 .into());
129 }
130
131 vortex_bail!(
132 "Failed to convert array {} to Arrow {:?}",
133 array.encoding_id(),
134 arrow_type
135 );
136 }
137
138 fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
139 let ToArrowArgs { array, arrow_type } = ToArrowArgs::try_from(args)?;
140 Ok(arrow_type
141 .map(|arrow_type| DType::from_arrow((arrow_type, array.dtype().nullability())))
142 .unwrap_or_else(|| array.dtype().clone()))
143 }
144
145 fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
146 let ToArrowArgs { array, .. } = ToArrowArgs::try_from(args)?;
147 Ok(array.len())
148 }
149
150 fn is_elementwise(&self) -> bool {
151 false
152 }
153}
154
155pub struct ToArrowArgs<'a> {
156 array: &'a dyn Array,
157 arrow_type: Option<&'a DataType>,
158}
159
160impl<'a> TryFrom<&InvocationArgs<'a>> for ToArrowArgs<'a> {
161 type Error = VortexError;
162
163 fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
164 if value.inputs.len() != 1 {
165 vortex_bail!("Expected 1 input, found {}", value.inputs.len());
166 }
167 let array = value.inputs[0]
168 .array()
169 .ok_or_else(|| vortex_err!("Expected input 0 to be an array"))?;
170 let options = value
171 .options
172 .as_any()
173 .downcast_ref::<ToArrowOptions>()
174 .vortex_expect("Expected options to be ToArrowOptions");
175
176 Ok(ToArrowArgs {
177 array,
178 arrow_type: options.arrow_type.as_ref(),
179 })
180 }
181}
182
183pub struct ToArrowKernelRef(pub ArcRef<dyn Kernel>);
184inventory::collect!(ToArrowKernelRef);
185
186pub trait ToArrowKernel: VTable {
187 fn to_arrow(
188 &self,
189 arr: &Self::Array,
190 arrow_type: Option<&DataType>,
191 ) -> VortexResult<Option<ArrowArrayRef>>;
192}
193
194#[derive(Debug)]
195pub struct ToArrowKernelAdapter<V: VTable>(pub V);
196
197impl<V: VTable + ToArrowKernel> ToArrowKernelAdapter<V> {
198 pub const fn lift(&'static self) -> ToArrowKernelRef {
199 ToArrowKernelRef(ArcRef::new_ref(self))
200 }
201}
202
203impl<V: VTable + ToArrowKernel> Kernel for ToArrowKernelAdapter<V> {
204 fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
205 let inputs = ToArrowArgs::try_from(args)?;
206 let Some(array) = inputs.array.as_opt::<V>() else {
207 return Ok(None);
208 };
209
210 let Some(arrow_array) = V::to_arrow(&self.0, array, inputs.arrow_type)? else {
211 return Ok(None);
212 };
213
214 Ok(Some(
215 ArrowArray::new(arrow_array, array.dtype().nullability())
216 .to_array()
217 .into(),
218 ))
219 }
220}
221
222#[cfg(test)]
223mod tests {
224 use std::sync::Arc;
225
226 use arrow_array::types::Int32Type;
227 use arrow_array::{ArrayRef, PrimitiveArray, StringViewArray, StructArray};
228 use arrow_buffer::NullBuffer;
229
230 use super::to_arrow;
231 use crate::{IntoArray, arrays};
232
233 #[test]
234 fn test_to_arrow() {
235 let array = arrays::StructArray::from_fields(
236 vec![
237 (
238 "a",
239 arrays::PrimitiveArray::from_option_iter(vec![Some(1), None, Some(2)])
240 .into_array(),
241 ),
242 (
243 "b",
244 arrays::VarBinViewArray::from_iter_str(vec!["a", "b", "c"]).into_array(),
245 ),
246 ]
247 .as_slice(),
248 )
249 .unwrap();
250
251 let arrow_array: ArrayRef = Arc::new(
252 StructArray::try_from(vec![
253 (
254 "a",
255 Arc::new(PrimitiveArray::<Int32Type>::from_iter_values_with_nulls(
256 vec![1, 0, 2],
257 Some(NullBuffer::from(vec![true, false, true])),
258 )) as ArrayRef,
259 ),
260 (
261 "b",
262 Arc::new(StringViewArray::from(vec![Some("a"), Some("b"), Some("c")])),
263 ),
264 ])
265 .unwrap(),
266 );
267
268 assert_eq!(
269 &to_arrow(array.as_ref(), &array.dtype().to_arrow_dtype().unwrap()).unwrap(),
270 &arrow_array
271 );
272 }
273}