Skip to main content

serde_arrow/
arrow_impl.rs

1//! Support for the `arrow` crate (*requires one the `arrow-*` features*)
2//!
3//! Functions to convert Rust objects into arrow Arrays. Deserialization from
4//! `arrow` arrays to Rust objects is not yet supported.
5//!
6#![deny(missing_docs)]
7use std::sync::Arc;
8
9use marrow::{datatypes::Field, error::MarrowError, view::View};
10use serde::{Deserialize, Serialize};
11
12use crate::{
13    _impl::arrow::{
14        array::{Array, ArrayRef, RecordBatch},
15        datatypes::{Field as ArrowField, FieldRef, Schema},
16    },
17    internal::{
18        array_builder::ArrayBuilder,
19        deserializer::Deserializer,
20        error::{fail, Error, ErrorKind, Result},
21        schema::extensions::{Bool8Field, FixedShapeTensorField, VariableShapeTensorField},
22        schema::{SchemaLike, Sealed, SerdeArrowSchema, TracingOptions},
23        serializer::Serializer,
24    },
25};
26
27/// Build arrow arrays from the given items  (*requires one of the `arrow-*`
28/// features*)
29///
30/// `items` should be given in the form a list of records (e.g., a vector of
31/// structs). To serialize items encoding single values consider the
32/// [`Items`][crate::utils::Items] wrapper.
33///
34/// To build arrays record by record use [`ArrayBuilder`]. To construct a record
35/// batch, consider using [`to_record_batch`].
36///
37/// Example:
38///
39/// ```rust
40/// # fn main() -> serde_arrow::Result<()> {
41/// # use serde_arrow::_impl::arrow;
42/// use arrow::datatypes::FieldRef;
43/// use serde::{Serialize, Deserialize};
44/// use serde_arrow::schema::{SchemaLike, TracingOptions};
45///
46/// ##[derive(Serialize, Deserialize)]
47/// struct Record {
48///     a: Option<f32>,
49///     b: u64,
50/// }
51///
52/// let items = vec![
53///     Record { a: Some(1.0), b: 2},
54///     // ...
55/// ];
56///
57/// let fields = Vec::<FieldRef>::from_type::<Record>(TracingOptions::default())?;
58/// let arrays = serde_arrow::to_arrow(&fields, &items)?;
59/// #
60/// # assert_eq!(arrays.len(), 2);
61/// # Ok(())
62/// # }
63/// ```
64///
65pub fn to_arrow<T: Serialize>(fields: &[FieldRef], items: T) -> Result<Vec<ArrayRef>> {
66    let builder = ArrayBuilder::from_arrow(fields)?;
67    items
68        .serialize(Serializer::new(builder))?
69        .into_inner()
70        .into_arrow()
71}
72
73/// Deserialize items from arrow arrays (*requires one of the `arrow-*`
74/// features*)
75///
76/// The type should be a list of records (e.g., a vector of structs). To
77/// deserialize items encoding single values consider the
78/// [`Items`][crate::utils::Items] wrapper.
79///
80/// ```rust
81/// # fn main() -> serde_arrow::Result<()> {
82/// # use serde_arrow::_impl::arrow;
83/// use arrow::datatypes::FieldRef;
84/// use serde::{Deserialize, Serialize};
85/// use serde_arrow::schema::{SchemaLike, TracingOptions};
86///
87/// # let (_, arrays) = serde_arrow::_impl::docs::defs::example_arrow_arrays();
88/// #
89/// ##[derive(Deserialize, Serialize)]
90/// struct Record {
91///     a: Option<f32>,
92///     b: u64,
93/// }
94///
95/// let fields = Vec::<FieldRef>::from_type::<Record>(TracingOptions::default())?;
96/// let items: Vec<Record> = serde_arrow::from_arrow(&fields, &arrays)?;
97/// # Ok(())
98/// # }
99/// ```
100///
101pub fn from_arrow<'de, T, A>(fields: &[FieldRef], arrays: &'de [A]) -> Result<T>
102where
103    T: Deserialize<'de>,
104    A: AsRef<dyn Array>,
105{
106    T::deserialize(Deserializer::from_arrow(fields, arrays)?)
107}
108
109/// Build a record batch from the given items  (*requires one of the `arrow-*`
110/// features*)
111///
112/// `items` should be given in the form a list of records (e.g., a vector of
113/// structs). To serialize items encoding single values consider the
114/// [`Items`][crate::utils::Items] wrapper.
115///
116/// To build arrays record by record use [`ArrayBuilder`].
117///
118/// Example:
119///
120/// ```rust
121/// # fn main() -> serde_arrow::Result<()> {
122/// # use serde_arrow::_impl::arrow;
123/// use arrow::datatypes::FieldRef;
124/// use serde::{Serialize, Deserialize};
125/// use serde_arrow::schema::{SchemaLike, TracingOptions};
126///
127/// ##[derive(Serialize, Deserialize)]
128/// struct Record {
129///     a: Option<f32>,
130///     b: u64,
131/// }
132///
133/// let items = vec![
134///     Record { a: Some(1.0), b: 2},
135///     // ...
136/// ];
137///
138/// let fields = Vec::<FieldRef>::from_type::<Record>(TracingOptions::default())?;
139/// let record_batch = serde_arrow::to_record_batch(&fields, &items)?;
140///
141/// assert_eq!(record_batch.num_columns(), 2);
142/// assert_eq!(record_batch.num_rows(), 1);
143/// # Ok(())
144/// # }
145/// ```
146pub fn to_record_batch<T: Serialize>(fields: &[FieldRef], items: &T) -> Result<RecordBatch> {
147    let builder = ArrayBuilder::from_arrow(fields)?;
148    items
149        .serialize(Serializer::new(builder))?
150        .into_inner()
151        .into_record_batch()
152}
153
154/// Deserialize items from a record batch (*requires one of the `arrow-*`
155/// features*)
156///
157/// The type should be a list of records (e.g., a vector of structs). To
158/// deserialize items encoding single values consider the
159/// [`Items`][crate::utils::Items] wrapper.
160///
161/// ```rust
162/// # fn main() -> serde_arrow::Result<()> {
163/// # let record_batch = serde_arrow::_impl::docs::defs::example_record_batch();
164/// #
165/// use serde::Deserialize;
166///
167/// ##[derive(Deserialize)]
168/// struct Record {
169///     a: Option<f32>,
170///     b: u64,
171/// }
172///
173/// let items: Vec<Record> = serde_arrow::from_record_batch(&record_batch)?;
174/// # Ok(())
175/// # }
176/// ```
177///
178pub fn from_record_batch<'de, T: Deserialize<'de>>(record_batch: &'de RecordBatch) -> Result<T> {
179    T::deserialize(Deserializer::from_record_batch(record_batch)?)
180}
181
182/// Support `arrow` (*requires one of the `arrow-*` features*)
183impl crate::internal::array_builder::ArrayBuilder {
184    /// Build an ArrayBuilder from `arrow` fields (*requires one of the
185    /// `arrow-*` features*)
186    pub fn from_arrow(fields: &[FieldRef]) -> Result<Self> {
187        let fields = fields_from_field_refs(fields)?;
188        Self::new(SerdeArrowSchema { fields })
189    }
190
191    /// Construct `arrow` arrays and reset the builder (*requires one of the
192    /// `arrow-*` features*)
193    pub fn to_arrow(&mut self) -> Result<Vec<ArrayRef>> {
194        self.take().into_arrow()
195    }
196
197    /// Consume the builder and construct the `arrow` arrays (*requires one of
198    /// the `arrow-*` features*)
199    pub fn into_arrow(self) -> Result<Vec<ArrayRef>> {
200        let (arrays, _) = self.into_arrays_and_field_metas()?;
201        Ok(arrays
202            .into_iter()
203            .map(ArrayRef::try_from)
204            .collect::<Result<_, MarrowError>>()?)
205    }
206
207    /// Construct a [`RecordBatch`] and reset the builder (*requires one of the
208    /// `arrow-*` features*)
209    pub fn to_record_batch(&mut self) -> Result<RecordBatch> {
210        self.take().into_record_batch()
211    }
212
213    /// Construct a [`RecordBatch`] and consume the builder (*requires one of the
214    /// `arrow-*` features*)
215    pub fn into_record_batch(self) -> Result<RecordBatch> {
216        let (arrays, metas) = self.into_arrays_and_field_metas()?;
217
218        let arrays = arrays
219            .into_iter()
220            .map(ArrayRef::try_from)
221            .collect::<Result<Vec<ArrayRef>, MarrowError>>()?;
222
223        let mut fields = Vec::with_capacity(arrays.len());
224        for (array, meta) in std::iter::zip(&arrays, metas) {
225            fields.push(FieldRef::new(
226                ArrowField::new(meta.name, array.data_type().clone(), meta.nullable)
227                    .with_metadata(meta.metadata),
228            ));
229        }
230
231        let schema = Schema::new(fields);
232        RecordBatch::try_new(Arc::new(schema), arrays)
233            .map_err(|err| Error::new_from(ErrorKind::Custom, err.to_string(), err))
234    }
235}
236
237impl<'de> Deserializer<'de> {
238    /// Construct a new deserializer from `arrow` arrays (*requires one of the
239    /// `arrow-*` features*)
240    ///
241    /// Usage
242    /// ```rust
243    /// # fn main() -> serde_arrow::Result<()> {
244    /// # let (_, arrays) = serde_arrow::_impl::docs::defs::example_arrow_arrays();
245    /// # use serde_arrow::_impl::arrow;
246    /// use arrow::datatypes::FieldRef;
247    /// use serde::{Deserialize, Serialize};
248    /// use serde_arrow::{Deserializer, schema::{SchemaLike, TracingOptions}};
249    ///
250    /// ##[derive(Deserialize, Serialize)]
251    /// struct Record {
252    ///     a: Option<f32>,
253    ///     b: u64,
254    /// }
255    ///
256    /// let fields = Vec::<FieldRef>::from_type::<Record>(TracingOptions::default())?;
257    ///
258    /// let deserializer = Deserializer::from_arrow(&fields, &arrays)?;
259    /// let items = Vec::<Record>::deserialize(deserializer)?;
260    /// # Ok(())
261    /// # }
262    /// ```
263    pub fn from_arrow<A>(fields: &[FieldRef], arrays: &'de [A]) -> Result<Self>
264    where
265        A: AsRef<dyn Array>,
266    {
267        if fields.len() != arrays.len() {
268            fail!(
269                "different number of fields ({}) and arrays ({})",
270                fields.len(),
271                arrays.len()
272            );
273        }
274
275        let fields = fields_from_field_refs(fields)?;
276
277        let mut views = Vec::new();
278        for array in arrays {
279            views.push(View::try_from(array.as_ref())?);
280        }
281
282        Deserializer::new(&fields, views)
283    }
284
285    /// Construct a new deserializer from a record batch (*requires one of the
286    /// `arrow-*` features*)
287    ///
288    /// Usage:
289    ///
290    /// ```rust
291    /// # fn main() -> serde_arrow::Result<()> {
292    /// # let record_batch = serde_arrow::_impl::docs::defs::example_record_batch();
293    /// #
294    /// use serde::Deserialize;
295    /// use serde_arrow::Deserializer;
296    ///
297    /// ##[derive(Deserialize)]
298    /// struct Record {
299    ///     a: Option<f32>,
300    ///     b: u64,
301    /// }
302    ///
303    /// let deserializer = Deserializer::from_record_batch(&record_batch)?;
304    /// let items = Vec::<Record>::deserialize(deserializer)?;
305    /// # Ok(())
306    /// # }
307    /// ```
308    ///
309    pub fn from_record_batch(record_batch: &'de RecordBatch) -> Result<Self> {
310        let schema = record_batch.schema();
311        Deserializer::from_arrow(schema.fields(), record_batch.columns())
312    }
313}
314
315fn fields_from_field_refs(fields: &[FieldRef]) -> Result<Vec<Field>> {
316    Ok(fields
317        .iter()
318        .map(|field| Field::try_from(field.as_ref()))
319        .collect::<Result<_, MarrowError>>()?)
320}
321
322impl TryFrom<SerdeArrowSchema> for Vec<ArrowField> {
323    type Error = Error;
324
325    fn try_from(value: SerdeArrowSchema) -> Result<Self> {
326        (&value).try_into()
327    }
328}
329
330impl<'a> TryFrom<&'a SerdeArrowSchema> for Vec<ArrowField> {
331    type Error = Error;
332
333    fn try_from(value: &'a SerdeArrowSchema) -> Result<Self> {
334        Ok(value
335            .fields
336            .iter()
337            .map(ArrowField::try_from)
338            .collect::<Result<_, MarrowError>>()?)
339    }
340}
341
342impl TryFrom<SerdeArrowSchema> for Vec<FieldRef> {
343    type Error = Error;
344
345    fn try_from(value: SerdeArrowSchema) -> Result<Self> {
346        (&value).try_into()
347    }
348}
349
350impl<'a> TryFrom<&'a SerdeArrowSchema> for Vec<FieldRef> {
351    type Error = Error;
352
353    fn try_from(value: &'a SerdeArrowSchema) -> Result<Self> {
354        Ok(value
355            .fields
356            .iter()
357            .map(|f| Ok(Arc::new(ArrowField::try_from(f)?)))
358            .collect::<Result<_, MarrowError>>()?)
359    }
360}
361
362impl<'a> TryFrom<&'a [ArrowField]> for SerdeArrowSchema {
363    type Error = Error;
364
365    fn try_from(fields: &'a [ArrowField]) -> Result<Self> {
366        Ok(Self {
367            fields: fields
368                .iter()
369                .map(Field::try_from)
370                .collect::<Result<_, MarrowError>>()?,
371        })
372    }
373}
374
375impl<'a> TryFrom<&'a [FieldRef]> for SerdeArrowSchema {
376    type Error = Error;
377
378    fn try_from(fields: &'a [FieldRef]) -> Result<Self, Self::Error> {
379        Ok(Self {
380            fields: fields
381                .iter()
382                .map(|f| Field::try_from(f.as_ref()))
383                .collect::<Result<_, MarrowError>>()?,
384        })
385    }
386}
387
388impl Sealed for Vec<ArrowField> {}
389
390/// Schema support for `Vec<arrow::datatype::Field>` (*requires one of the
391/// `arrow-*` features*)
392impl SchemaLike for Vec<ArrowField> {
393    fn from_value<T: Serialize>(value: T) -> Result<Self> {
394        SerdeArrowSchema::from_value(value)?.try_into()
395    }
396
397    fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result<Self> {
398        SerdeArrowSchema::from_type::<T>(options)?.try_into()
399    }
400
401    fn from_samples<T: Serialize>(samples: T, options: TracingOptions) -> Result<Self> {
402        SerdeArrowSchema::from_samples(samples, options)?.try_into()
403    }
404}
405
406impl Sealed for Vec<FieldRef> {}
407
408/// Schema support for `Vec<arrow::datatype::FieldRef>` (*requires one of the
409/// `arrow-*` features*)
410impl SchemaLike for Vec<FieldRef> {
411    fn from_value<T: Serialize>(value: T) -> Result<Self> {
412        SerdeArrowSchema::from_value(value)?.try_into()
413    }
414
415    fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result<Self> {
416        SerdeArrowSchema::from_type::<T>(options)?.try_into()
417    }
418
419    fn from_samples<T: Serialize>(samples: T, options: TracingOptions) -> Result<Self> {
420        SerdeArrowSchema::from_samples(samples, options)?.try_into()
421    }
422}
423
424macro_rules! impl_try_from_ext_type {
425    ($ty:ty) => {
426        impl TryFrom<&$ty> for ArrowField {
427            type Error = Error;
428
429            fn try_from(value: &$ty) -> Result<Self, Self::Error> {
430                Ok(Self::try_from(&Field::try_from(value)?)?)
431            }
432        }
433
434        impl TryFrom<$ty> for ArrowField {
435            type Error = Error;
436
437            fn try_from(value: $ty) -> Result<Self, Self::Error> {
438                Self::try_from(&value)
439            }
440        }
441    };
442}
443
444impl_try_from_ext_type!(Bool8Field);
445impl_try_from_ext_type!(FixedShapeTensorField);
446impl_try_from_ext_type!(VariableShapeTensorField);