serde_arrow/arrow_impl.rs
1//! Support for the `arrow` crate (*requires one the `arrow-*` features*)
2//!
3//! Functions to convert Rust objects into arrow Arrays. Deserialization from
4//! `arrow` arrays to Rust objects is not yet supported.
5//!
6#![deny(missing_docs)]
7use std::sync::Arc;
8
9use marrow::{datatypes::Field, error::MarrowError, view::View};
10use serde::{Deserialize, Serialize};
11
12use crate::{
13 _impl::arrow::{
14 array::{Array, ArrayRef, RecordBatch},
15 datatypes::{Field as ArrowField, FieldRef, Schema},
16 },
17 internal::{
18 array_builder::ArrayBuilder,
19 deserializer::Deserializer,
20 error::{fail, Error, ErrorKind, Result},
21 schema::extensions::{Bool8Field, FixedShapeTensorField, VariableShapeTensorField},
22 schema::{SchemaLike, Sealed, SerdeArrowSchema, TracingOptions},
23 serializer::Serializer,
24 },
25};
26
27/// Build arrow arrays from the given items (*requires one of the `arrow-*`
28/// features*)
29///
30/// `items` should be given in the form a list of records (e.g., a vector of
31/// structs). To serialize items encoding single values consider the
32/// [`Items`][crate::utils::Items] wrapper.
33///
34/// To build arrays record by record use [`ArrayBuilder`]. To construct a record
35/// batch, consider using [`to_record_batch`].
36///
37/// Example:
38///
39/// ```rust
40/// # fn main() -> serde_arrow::Result<()> {
41/// # use serde_arrow::_impl::arrow;
42/// use arrow::datatypes::FieldRef;
43/// use serde::{Serialize, Deserialize};
44/// use serde_arrow::schema::{SchemaLike, TracingOptions};
45///
46/// ##[derive(Serialize, Deserialize)]
47/// struct Record {
48/// a: Option<f32>,
49/// b: u64,
50/// }
51///
52/// let items = vec![
53/// Record { a: Some(1.0), b: 2},
54/// // ...
55/// ];
56///
57/// let fields = Vec::<FieldRef>::from_type::<Record>(TracingOptions::default())?;
58/// let arrays = serde_arrow::to_arrow(&fields, &items)?;
59/// #
60/// # assert_eq!(arrays.len(), 2);
61/// # Ok(())
62/// # }
63/// ```
64///
65pub fn to_arrow<T: Serialize>(fields: &[FieldRef], items: T) -> Result<Vec<ArrayRef>> {
66 let builder = ArrayBuilder::from_arrow(fields)?;
67 items
68 .serialize(Serializer::new(builder))?
69 .into_inner()
70 .into_arrow()
71}
72
73/// Deserialize items from arrow arrays (*requires one of the `arrow-*`
74/// features*)
75///
76/// The type should be a list of records (e.g., a vector of structs). To
77/// deserialize items encoding single values consider the
78/// [`Items`][crate::utils::Items] wrapper.
79///
80/// ```rust
81/// # fn main() -> serde_arrow::Result<()> {
82/// # use serde_arrow::_impl::arrow;
83/// use arrow::datatypes::FieldRef;
84/// use serde::{Deserialize, Serialize};
85/// use serde_arrow::schema::{SchemaLike, TracingOptions};
86///
87/// # let (_, arrays) = serde_arrow::_impl::docs::defs::example_arrow_arrays();
88/// #
89/// ##[derive(Deserialize, Serialize)]
90/// struct Record {
91/// a: Option<f32>,
92/// b: u64,
93/// }
94///
95/// let fields = Vec::<FieldRef>::from_type::<Record>(TracingOptions::default())?;
96/// let items: Vec<Record> = serde_arrow::from_arrow(&fields, &arrays)?;
97/// # Ok(())
98/// # }
99/// ```
100///
101pub fn from_arrow<'de, T, A>(fields: &[FieldRef], arrays: &'de [A]) -> Result<T>
102where
103 T: Deserialize<'de>,
104 A: AsRef<dyn Array>,
105{
106 T::deserialize(Deserializer::from_arrow(fields, arrays)?)
107}
108
109/// Build a record batch from the given items (*requires one of the `arrow-*`
110/// features*)
111///
112/// `items` should be given in the form a list of records (e.g., a vector of
113/// structs). To serialize items encoding single values consider the
114/// [`Items`][crate::utils::Items] wrapper.
115///
116/// To build arrays record by record use [`ArrayBuilder`].
117///
118/// Example:
119///
120/// ```rust
121/// # fn main() -> serde_arrow::Result<()> {
122/// # use serde_arrow::_impl::arrow;
123/// use arrow::datatypes::FieldRef;
124/// use serde::{Serialize, Deserialize};
125/// use serde_arrow::schema::{SchemaLike, TracingOptions};
126///
127/// ##[derive(Serialize, Deserialize)]
128/// struct Record {
129/// a: Option<f32>,
130/// b: u64,
131/// }
132///
133/// let items = vec![
134/// Record { a: Some(1.0), b: 2},
135/// // ...
136/// ];
137///
138/// let fields = Vec::<FieldRef>::from_type::<Record>(TracingOptions::default())?;
139/// let record_batch = serde_arrow::to_record_batch(&fields, &items)?;
140///
141/// assert_eq!(record_batch.num_columns(), 2);
142/// assert_eq!(record_batch.num_rows(), 1);
143/// # Ok(())
144/// # }
145/// ```
146pub fn to_record_batch<T: Serialize>(fields: &[FieldRef], items: &T) -> Result<RecordBatch> {
147 let builder = ArrayBuilder::from_arrow(fields)?;
148 items
149 .serialize(Serializer::new(builder))?
150 .into_inner()
151 .into_record_batch()
152}
153
154/// Deserialize items from a record batch (*requires one of the `arrow-*`
155/// features*)
156///
157/// The type should be a list of records (e.g., a vector of structs). To
158/// deserialize items encoding single values consider the
159/// [`Items`][crate::utils::Items] wrapper.
160///
161/// ```rust
162/// # fn main() -> serde_arrow::Result<()> {
163/// # let record_batch = serde_arrow::_impl::docs::defs::example_record_batch();
164/// #
165/// use serde::Deserialize;
166///
167/// ##[derive(Deserialize)]
168/// struct Record {
169/// a: Option<f32>,
170/// b: u64,
171/// }
172///
173/// let items: Vec<Record> = serde_arrow::from_record_batch(&record_batch)?;
174/// # Ok(())
175/// # }
176/// ```
177///
178pub fn from_record_batch<'de, T: Deserialize<'de>>(record_batch: &'de RecordBatch) -> Result<T> {
179 T::deserialize(Deserializer::from_record_batch(record_batch)?)
180}
181
182/// Support `arrow` (*requires one of the `arrow-*` features*)
183impl crate::internal::array_builder::ArrayBuilder {
184 /// Build an ArrayBuilder from `arrow` fields (*requires one of the
185 /// `arrow-*` features*)
186 pub fn from_arrow(fields: &[FieldRef]) -> Result<Self> {
187 let fields = fields_from_field_refs(fields)?;
188 Self::new(SerdeArrowSchema { fields })
189 }
190
191 /// Construct `arrow` arrays and reset the builder (*requires one of the
192 /// `arrow-*` features*)
193 pub fn to_arrow(&mut self) -> Result<Vec<ArrayRef>> {
194 self.take().into_arrow()
195 }
196
197 /// Consume the builder and construct the `arrow` arrays (*requires one of
198 /// the `arrow-*` features*)
199 pub fn into_arrow(self) -> Result<Vec<ArrayRef>> {
200 let (arrays, _) = self.into_arrays_and_field_metas()?;
201 Ok(arrays
202 .into_iter()
203 .map(ArrayRef::try_from)
204 .collect::<Result<_, MarrowError>>()?)
205 }
206
207 /// Construct a [`RecordBatch`] and reset the builder (*requires one of the
208 /// `arrow-*` features*)
209 pub fn to_record_batch(&mut self) -> Result<RecordBatch> {
210 self.take().into_record_batch()
211 }
212
213 /// Construct a [`RecordBatch`] and consume the builder (*requires one of the
214 /// `arrow-*` features*)
215 pub fn into_record_batch(self) -> Result<RecordBatch> {
216 let (arrays, metas) = self.into_arrays_and_field_metas()?;
217
218 let arrays = arrays
219 .into_iter()
220 .map(ArrayRef::try_from)
221 .collect::<Result<Vec<ArrayRef>, MarrowError>>()?;
222
223 let mut fields = Vec::with_capacity(arrays.len());
224 for (array, meta) in std::iter::zip(&arrays, metas) {
225 fields.push(FieldRef::new(
226 ArrowField::new(meta.name, array.data_type().clone(), meta.nullable)
227 .with_metadata(meta.metadata),
228 ));
229 }
230
231 let schema = Schema::new(fields);
232 RecordBatch::try_new(Arc::new(schema), arrays)
233 .map_err(|err| Error::new_from(ErrorKind::Custom, err.to_string(), err))
234 }
235}
236
237impl<'de> Deserializer<'de> {
238 /// Construct a new deserializer from `arrow` arrays (*requires one of the
239 /// `arrow-*` features*)
240 ///
241 /// Usage
242 /// ```rust
243 /// # fn main() -> serde_arrow::Result<()> {
244 /// # let (_, arrays) = serde_arrow::_impl::docs::defs::example_arrow_arrays();
245 /// # use serde_arrow::_impl::arrow;
246 /// use arrow::datatypes::FieldRef;
247 /// use serde::{Deserialize, Serialize};
248 /// use serde_arrow::{Deserializer, schema::{SchemaLike, TracingOptions}};
249 ///
250 /// ##[derive(Deserialize, Serialize)]
251 /// struct Record {
252 /// a: Option<f32>,
253 /// b: u64,
254 /// }
255 ///
256 /// let fields = Vec::<FieldRef>::from_type::<Record>(TracingOptions::default())?;
257 ///
258 /// let deserializer = Deserializer::from_arrow(&fields, &arrays)?;
259 /// let items = Vec::<Record>::deserialize(deserializer)?;
260 /// # Ok(())
261 /// # }
262 /// ```
263 pub fn from_arrow<A>(fields: &[FieldRef], arrays: &'de [A]) -> Result<Self>
264 where
265 A: AsRef<dyn Array>,
266 {
267 if fields.len() != arrays.len() {
268 fail!(
269 "different number of fields ({}) and arrays ({})",
270 fields.len(),
271 arrays.len()
272 );
273 }
274
275 let fields = fields_from_field_refs(fields)?;
276
277 let mut views = Vec::new();
278 for array in arrays {
279 views.push(View::try_from(array.as_ref())?);
280 }
281
282 Deserializer::new(&fields, views)
283 }
284
285 /// Construct a new deserializer from a record batch (*requires one of the
286 /// `arrow-*` features*)
287 ///
288 /// Usage:
289 ///
290 /// ```rust
291 /// # fn main() -> serde_arrow::Result<()> {
292 /// # let record_batch = serde_arrow::_impl::docs::defs::example_record_batch();
293 /// #
294 /// use serde::Deserialize;
295 /// use serde_arrow::Deserializer;
296 ///
297 /// ##[derive(Deserialize)]
298 /// struct Record {
299 /// a: Option<f32>,
300 /// b: u64,
301 /// }
302 ///
303 /// let deserializer = Deserializer::from_record_batch(&record_batch)?;
304 /// let items = Vec::<Record>::deserialize(deserializer)?;
305 /// # Ok(())
306 /// # }
307 /// ```
308 ///
309 pub fn from_record_batch(record_batch: &'de RecordBatch) -> Result<Self> {
310 let schema = record_batch.schema();
311 Deserializer::from_arrow(schema.fields(), record_batch.columns())
312 }
313}
314
315fn fields_from_field_refs(fields: &[FieldRef]) -> Result<Vec<Field>> {
316 Ok(fields
317 .iter()
318 .map(|field| Field::try_from(field.as_ref()))
319 .collect::<Result<_, MarrowError>>()?)
320}
321
322impl TryFrom<SerdeArrowSchema> for Vec<ArrowField> {
323 type Error = Error;
324
325 fn try_from(value: SerdeArrowSchema) -> Result<Self> {
326 (&value).try_into()
327 }
328}
329
330impl<'a> TryFrom<&'a SerdeArrowSchema> for Vec<ArrowField> {
331 type Error = Error;
332
333 fn try_from(value: &'a SerdeArrowSchema) -> Result<Self> {
334 Ok(value
335 .fields
336 .iter()
337 .map(ArrowField::try_from)
338 .collect::<Result<_, MarrowError>>()?)
339 }
340}
341
342impl TryFrom<SerdeArrowSchema> for Vec<FieldRef> {
343 type Error = Error;
344
345 fn try_from(value: SerdeArrowSchema) -> Result<Self> {
346 (&value).try_into()
347 }
348}
349
350impl<'a> TryFrom<&'a SerdeArrowSchema> for Vec<FieldRef> {
351 type Error = Error;
352
353 fn try_from(value: &'a SerdeArrowSchema) -> Result<Self> {
354 Ok(value
355 .fields
356 .iter()
357 .map(|f| Ok(Arc::new(ArrowField::try_from(f)?)))
358 .collect::<Result<_, MarrowError>>()?)
359 }
360}
361
362impl<'a> TryFrom<&'a [ArrowField]> for SerdeArrowSchema {
363 type Error = Error;
364
365 fn try_from(fields: &'a [ArrowField]) -> Result<Self> {
366 Ok(Self {
367 fields: fields
368 .iter()
369 .map(Field::try_from)
370 .collect::<Result<_, MarrowError>>()?,
371 })
372 }
373}
374
375impl<'a> TryFrom<&'a [FieldRef]> for SerdeArrowSchema {
376 type Error = Error;
377
378 fn try_from(fields: &'a [FieldRef]) -> Result<Self, Self::Error> {
379 Ok(Self {
380 fields: fields
381 .iter()
382 .map(|f| Field::try_from(f.as_ref()))
383 .collect::<Result<_, MarrowError>>()?,
384 })
385 }
386}
387
388impl Sealed for Vec<ArrowField> {}
389
390/// Schema support for `Vec<arrow::datatype::Field>` (*requires one of the
391/// `arrow-*` features*)
392impl SchemaLike for Vec<ArrowField> {
393 fn from_value<T: Serialize>(value: T) -> Result<Self> {
394 SerdeArrowSchema::from_value(value)?.try_into()
395 }
396
397 fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result<Self> {
398 SerdeArrowSchema::from_type::<T>(options)?.try_into()
399 }
400
401 fn from_samples<T: Serialize>(samples: T, options: TracingOptions) -> Result<Self> {
402 SerdeArrowSchema::from_samples(samples, options)?.try_into()
403 }
404}
405
406impl Sealed for Vec<FieldRef> {}
407
408/// Schema support for `Vec<arrow::datatype::FieldRef>` (*requires one of the
409/// `arrow-*` features*)
410impl SchemaLike for Vec<FieldRef> {
411 fn from_value<T: Serialize>(value: T) -> Result<Self> {
412 SerdeArrowSchema::from_value(value)?.try_into()
413 }
414
415 fn from_type<'de, T: Deserialize<'de>>(options: TracingOptions) -> Result<Self> {
416 SerdeArrowSchema::from_type::<T>(options)?.try_into()
417 }
418
419 fn from_samples<T: Serialize>(samples: T, options: TracingOptions) -> Result<Self> {
420 SerdeArrowSchema::from_samples(samples, options)?.try_into()
421 }
422}
423
424macro_rules! impl_try_from_ext_type {
425 ($ty:ty) => {
426 impl TryFrom<&$ty> for ArrowField {
427 type Error = Error;
428
429 fn try_from(value: &$ty) -> Result<Self, Self::Error> {
430 Ok(Self::try_from(&Field::try_from(value)?)?)
431 }
432 }
433
434 impl TryFrom<$ty> for ArrowField {
435 type Error = Error;
436
437 fn try_from(value: $ty) -> Result<Self, Self::Error> {
438 Self::try_from(&value)
439 }
440 }
441 };
442}
443
444impl_try_from_ext_type!(Bool8Field);
445impl_try_from_ext_type!(FixedShapeTensorField);
446impl_try_from_ext_type!(VariableShapeTensorField);