arrow_convert/deserialize/
mod.rs1mod iterable;
3pub use iterable::*;
4
5use arrow_array::{types, ArrowPrimitiveType, *};
6use arrow_buffer::{ArrowNativeType, Buffer, ScalarBuffer};
7use chrono::{DateTime, NaiveDate, NaiveDateTime, TimeZone, Utc};
8
9use crate::field::*;
10
11pub trait ArrowDeserialize: ArrowField + Sized
13where
14 Self::ArrayType: ArrowArray,
15{
16 type ArrayType;
18
19 fn arrow_deserialize(v: <Self::ArrayType as ArrowArrayIterable>::Item<'_>) -> Option<<Self as ArrowField>::Type>;
21
22 #[inline]
23 #[doc(hidden)]
24 fn arrow_deserialize_internal(v: <Self::ArrayType as ArrowArrayIterable>::Item<'_>) -> <Self as ArrowField>::Type {
31 Self::arrow_deserialize(v).unwrap()
32 }
33}
34
35#[doc(hidden)]
41pub trait ArrowArray
42where
43 Self: ArrowArrayIterable,
44{
45 type BaseArrayType: Array;
46
47 fn iter_from_array_ref(b: &dyn Array) -> <Self as ArrowArrayIterable>::Iter<'_>;
49}
50
51macro_rules! impl_arrow_deserialize_primitive {
53 ($physical_type:ty, $primitive_type:ty) => {
54 impl ArrowDeserialize for $physical_type {
55 type ArrayType = PrimitiveArray<$primitive_type>;
56
57 #[inline]
58 fn arrow_deserialize<'a>(v: Option<<$primitive_type as ArrowPrimitiveType>::Native>) -> Option<Self> {
59 v
60 }
61 }
62
63 impl_arrow_array!(PrimitiveArray<$primitive_type>);
64 };
65}
66
67macro_rules! impl_arrow_array {
68 ($array:ty) => {
69 impl ArrowArray for $array {
70 type BaseArrayType = Self;
71
72 #[inline]
73 fn iter_from_array_ref(b: &dyn Array) -> <Self as ArrowArrayIterable>::Iter<'_> {
74 let b = b.as_any().downcast_ref::<Self::BaseArrayType>().unwrap();
75 <Self as ArrowArrayIterable>::iter(b)
76 }
77 }
78 };
79}
80
81impl<T> ArrowDeserialize for Option<T>
83where
84 T: ArrowDeserialize,
85 T::ArrayType: 'static + ArrowArray,
86 T::ArrayType: ArrowArrayIterable,
87{
88 type ArrayType = <T as ArrowDeserialize>::ArrayType;
89
90 #[inline]
91 fn arrow_deserialize(v: <Self::ArrayType as ArrowArrayIterable>::Item<'_>) -> Option<<Self as ArrowField>::Type> {
92 Self::arrow_deserialize_internal(v).map(Some)
93 }
94
95 #[inline]
96 fn arrow_deserialize_internal(v: <Self::ArrayType as ArrowArrayIterable>::Item<'_>) -> <Self as ArrowField>::Type {
97 <T as ArrowDeserialize>::arrow_deserialize(v)
98 }
99}
100
101impl_arrow_deserialize_primitive!(u8, types::UInt8Type);
102impl_arrow_deserialize_primitive!(u16, types::UInt16Type);
103impl_arrow_deserialize_primitive!(u32, types::UInt32Type);
104impl_arrow_deserialize_primitive!(u64, types::UInt64Type);
105impl_arrow_deserialize_primitive!(i8, types::Int8Type);
106impl_arrow_deserialize_primitive!(i16, types::Int16Type);
107impl_arrow_deserialize_primitive!(i32, types::Int32Type);
108impl_arrow_deserialize_primitive!(i64, types::Int64Type);
109impl_arrow_deserialize_primitive!(half::f16, types::Float16Type);
110impl_arrow_deserialize_primitive!(f32, types::Float32Type);
111impl_arrow_deserialize_primitive!(f64, types::Float64Type);
112
113impl<const PRECISION: u8, const SCALE: i8> ArrowDeserialize for I128<PRECISION, SCALE> {
114 type ArrayType = PrimitiveArray<types::Decimal128Type>;
115
116 #[inline]
117 fn arrow_deserialize<'a>(v: Option<i128>) -> Option<i128> {
118 v
119 }
120}
121
122impl_arrow_array!(PrimitiveArray<types::Decimal128Type>);
123
124impl ArrowDeserialize for String {
125 type ArrayType = StringArray;
126
127 #[inline]
128 fn arrow_deserialize(v: Option<&str>) -> Option<Self> {
129 v.map(|t| t.to_string())
130 }
131}
132
133impl ArrowDeserialize for LargeString {
134 type ArrayType = LargeStringArray;
135
136 #[inline]
137 fn arrow_deserialize(v: Option<&str>) -> Option<String> {
138 v.map(|t| t.to_string())
139 }
140}
141
142impl ArrowDeserialize for bool {
143 type ArrayType = BooleanArray;
144
145 #[inline]
146 fn arrow_deserialize(v: Option<bool>) -> Option<Self> {
147 v
148 }
149}
150
151impl ArrowDeserialize for NaiveDateTime {
152 type ArrayType = TimestampNanosecondArray;
153
154 #[inline]
155 fn arrow_deserialize(v: Option<i64>) -> Option<Self> {
156 v.and_then(arrow_array::temporal_conversions::timestamp_ns_to_datetime)
157 }
158}
159
160impl ArrowDeserialize for DateTime<Utc> {
161 type ArrayType = TimestampNanosecondArray;
162
163 #[inline]
164 fn arrow_deserialize(v: Option<i64>) -> Option<Self> {
165 v.map(|ns| Utc.timestamp_nanos(ns))
166 }
167}
168
169impl ArrowDeserialize for NaiveDate {
170 type ArrayType = Date32Array;
171
172 #[inline]
173 fn arrow_deserialize(v: Option<i32>) -> Option<Self> {
174 v.and_then(|t| arrow_array::temporal_conversions::as_date::<types::Date32Type>(t as i64))
175 }
176}
177
178pub struct BufferBinaryArrayIter<'a> {
180 index: usize,
181 array: &'a BinaryArray,
182}
183
184impl<'a> Iterator for BufferBinaryArrayIter<'a> {
185 type Item = Option<&'a [u8]>;
186
187 fn next(&mut self) -> Option<Self::Item> {
188 if self.index >= self.array.len() {
189 None
190 } else if self.array.is_valid(self.index) {
191 let value = self.array.value(self.index);
193 self.index += 1;
194 Some(Some(value))
195 } else {
196 self.index += 1;
197 Some(None)
198 }
199 }
200}
201
202pub struct BufferBinaryArray;
204
205impl ArrowArray for BufferBinaryArray {
206 type BaseArrayType = BinaryArray;
207 #[inline]
208 fn iter_from_array_ref(a: &dyn Array) -> <Self as ArrowArrayIterable>::Iter<'_> {
209 let b = a.as_any().downcast_ref::<Self::BaseArrayType>().unwrap();
210
211 BufferBinaryArrayIter { index: 0, array: b }
212 }
213}
214
215impl ArrowDeserialize for Buffer {
217 type ArrayType = BufferBinaryArray;
218
219 #[inline]
220 fn arrow_deserialize(v: Option<&[u8]>) -> Option<Self> {
221 v.map(|t| t.into())
222 }
223}
224impl ArrowDeserialize for ScalarBuffer<u8> {
225 type ArrayType = BufferBinaryArray;
226
227 #[inline]
228 fn arrow_deserialize(v: Option<&[u8]>) -> Option<Self> {
229 v.map(|t| ScalarBuffer::from(t.to_vec()))
230 }
231}
232
233impl ArrowDeserialize for Vec<u8> {
234 type ArrayType = BinaryArray;
235
236 #[inline]
237 fn arrow_deserialize(v: Option<&[u8]>) -> Option<Self> {
238 v.map(|t| t.to_vec())
239 }
240}
241
242impl ArrowDeserialize for LargeBinary {
243 type ArrayType = LargeBinaryArray;
244
245 #[inline]
246 fn arrow_deserialize(v: Option<&[u8]>) -> Option<Vec<u8>> {
247 v.map(|t| t.to_vec())
248 }
249}
250
251impl<const SIZE: i32> ArrowDeserialize for FixedSizeBinary<SIZE> {
252 type ArrayType = FixedSizeBinaryArray;
253
254 #[inline]
255 fn arrow_deserialize(v: Option<&[u8]>) -> Option<Vec<u8>> {
256 v.map(|t| t.to_vec())
257 }
258}
259
260impl<const SIZE: usize> ArrowDeserialize for [u8; SIZE] {
261 type ArrayType = FixedSizeBinaryArray;
262
263 #[inline]
264 fn arrow_deserialize(v: Option<&[u8]>) -> Option<[u8; SIZE]> {
265 v.map(|t| t.to_vec().try_into().unwrap())
266 }
267}
268
269pub(crate) fn arrow_deserialize_vec_helper<T>(v: Option<ArrayRef>) -> Option<<Vec<T> as ArrowField>::Type>
270where
271 T: ArrowDeserialize + ArrowEnableVecForType + 'static,
272 T::ArrayType: ArrowArrayIterable,
273{
274 use std::ops::Deref;
275 v.map(|t| {
276 arrow_array_deserialize_iterator_internal::<<T as ArrowField>::Type, T>(t.deref())
277 .collect::<Vec<<T as ArrowField>::Type>>()
278 })
279}
280
281impl<T, K> ArrowDeserialize for ScalarBuffer<T>
283where
284 K: ArrowPrimitiveType<Native = T>,
285 T: ArrowDeserialize<ArrayType = PrimitiveArray<K>> + ArrowNativeType + ArrowEnableVecForType,
286 <T as ArrowDeserialize>::ArrayType: ArrowArrayIterable,
287{
288 type ArrayType = ListArray;
289
290 #[inline]
291 fn arrow_deserialize(v: <Self::ArrayType as ArrowArrayIterable>::Item<'_>) -> Option<<Self as ArrowField>::Type> {
292 let t = v?;
293 let array = t.as_any().downcast_ref::<PrimitiveArray<K>>().unwrap().values().clone();
294 Some(array)
295 }
296}
297
298impl<T> ArrowDeserialize for Vec<T>
300where
301 T: ArrowDeserialize + ArrowEnableVecForType + 'static,
302 <T as ArrowDeserialize>::ArrayType: 'static,
303 <T as ArrowDeserialize>::ArrayType: ArrowArrayIterable,
304{
305 type ArrayType = ListArray;
306
307 fn arrow_deserialize(v: Option<ArrayRef>) -> Option<<Self as ArrowField>::Type> {
308 arrow_deserialize_vec_helper::<T>(v)
309 }
310}
311
312impl<T> ArrowDeserialize for LargeVec<T>
313where
314 T: ArrowDeserialize + ArrowEnableVecForType + 'static,
315 <T as ArrowDeserialize>::ArrayType: 'static,
316 <T as ArrowDeserialize>::ArrayType: ArrowArrayIterable,
317{
318 type ArrayType = LargeListArray;
319
320 fn arrow_deserialize(v: Option<ArrayRef>) -> Option<<Self as ArrowField>::Type> {
321 arrow_deserialize_vec_helper::<T>(v)
322 }
323}
324
325impl<T, const SIZE: i32> ArrowDeserialize for FixedSizeVec<T, SIZE>
326where
327 T: ArrowDeserialize + ArrowEnableVecForType + 'static,
328 <T as ArrowDeserialize>::ArrayType: 'static,
329 <T as ArrowDeserialize>::ArrayType: ArrowArrayIterable,
330{
331 type ArrayType = FixedSizeListArray;
332
333 fn arrow_deserialize(v: Option<ArrayRef>) -> Option<<Self as ArrowField>::Type> {
334 arrow_deserialize_vec_helper::<T>(v)
335 }
336}
337impl<T, const SIZE: usize> ArrowDeserialize for [T; SIZE]
338where
339 T: ArrowDeserialize + ArrowEnableVecForType + 'static,
340 <T as ArrowDeserialize>::ArrayType: 'static,
341 <T as ArrowDeserialize>::ArrayType: ArrowArrayIterable,
342{
343 type ArrayType = FixedSizeListArray;
344
345 fn arrow_deserialize(v: Option<ArrayRef>) -> Option<<Self as ArrowField>::Type> {
346 let result = arrow_deserialize_vec_helper::<T>(v)?;
347 let length = result.len();
348
349 match <[<T as ArrowField>::Type; SIZE]>::try_from(result).ok() {
350 None => panic!(
351 "Expected size of {} deserializing array of type `{}`, got {}",
352 std::any::type_name::<T>(),
353 SIZE,
354 length
355 ),
356 array => array,
357 }
358 }
359}
360
361impl_arrow_array!(BooleanArray);
362impl_arrow_array!(StringArray);
363impl_arrow_array!(LargeStringArray);
364impl_arrow_array!(BinaryArray);
365impl_arrow_array!(LargeBinaryArray);
366impl_arrow_array!(FixedSizeBinaryArray);
367impl_arrow_array!(ListArray);
368impl_arrow_array!(LargeListArray);
369impl_arrow_array!(FixedSizeListArray);
370impl_arrow_array!(Date32Array);
371impl_arrow_array!(Date64Array);
372impl_arrow_array!(TimestampSecondArray);
373impl_arrow_array!(TimestampMillisecondArray);
374impl_arrow_array!(TimestampMicrosecondArray);
375impl_arrow_array!(TimestampNanosecondArray);
376
377pub trait TryIntoCollection<Collection, Element>
379where
380 Collection: FromIterator<Element>,
381{
382 fn try_into_collection(self) -> Result<Collection, arrow_schema::ArrowError>
384 where
385 Element: ArrowDeserialize + ArrowField<Type = Element> + 'static;
386
387 fn try_into_collection_as_type<ArrowType>(self) -> Result<Collection, arrow_schema::ArrowError>
390 where
391 ArrowType: ArrowDeserialize + ArrowField<Type = Element> + 'static;
392 }
394
395fn arrow_array_deserialize_iterator_internal<Element, Field>(b: &dyn Array) -> impl Iterator<Item = Element> + '_
397where
398 Field: ArrowDeserialize + ArrowField<Type = Element> + 'static,
399 <Field as ArrowDeserialize>::ArrayType: ArrowArrayIterable,
400{
401 <<Field as ArrowDeserialize>::ArrayType as ArrowArray>::iter_from_array_ref(b)
402 .map(<Field as ArrowDeserialize>::arrow_deserialize_internal)
403}
404
405pub fn arrow_array_deserialize_iterator_as_type<Element, ArrowType>(
407 arr: &dyn Array,
408) -> Result<impl Iterator<Item = Element> + '_, arrow_schema::ArrowError>
409where
410 Element: 'static,
411 ArrowType: ArrowDeserialize + ArrowField<Type = Element> + 'static,
412 <ArrowType as ArrowDeserialize>::ArrayType: ArrowArrayIterable,
413{
414 if &<ArrowType as ArrowField>::data_type() != arr.data_type() {
415 Err(arrow_schema::ArrowError::InvalidArgumentError(format!(
416 "Data type mismatch. Expected type={:#?} is_nullable={}, but was type={:#?} is_nullable={}",
417 &<ArrowType as ArrowField>::data_type(),
418 &<ArrowType as ArrowField>::is_nullable(),
419 arr.data_type(),
420 arr.is_nullable()
421 )))
422 } else {
423 Ok(arrow_array_deserialize_iterator_internal::<Element, ArrowType>(
424 arr,
425 ))
426 }
427}
428
429pub fn arrow_array_deserialize_iterator<T>(
431 arr: &dyn Array,
432) -> Result<impl Iterator<Item = T> + '_, arrow_schema::ArrowError>
433where
434 T: ArrowDeserialize + ArrowField<Type = T> + 'static,
435 <T as ArrowDeserialize>::ArrayType: ArrowArrayIterable,
436{
437 arrow_array_deserialize_iterator_as_type::<T, T>(arr)
438}
439
440impl<Collection, Element, ArrowArray> TryIntoCollection<Collection, Element> for ArrowArray
441where
442 Element: 'static,
443 ArrowArray: std::borrow::Borrow<dyn Array>,
444 Collection: FromIterator<Element>,
445{
446 fn try_into_collection(self) -> Result<Collection, arrow_schema::ArrowError>
447 where
448 Element: ArrowDeserialize + ArrowField<Type = Element> + 'static,
449 <Element as ArrowDeserialize>::ArrayType: ArrowArrayIterable,
450 {
451 Ok(arrow_array_deserialize_iterator::<Element>(self.borrow())?.collect())
452 }
453
454 fn try_into_collection_as_type<ArrowType>(self) -> Result<Collection, arrow_schema::ArrowError>
455 where
456 ArrowType: ArrowDeserialize + ArrowField<Type = Element> + 'static,
457 <ArrowType as ArrowDeserialize>::ArrayType: ArrowArrayIterable,
458 {
459 Ok(arrow_array_deserialize_iterator_as_type::<Element, ArrowType>(self.borrow())?.collect())
460 }
461}