dora_arrow_convert/
lib.rs

1//! Provides functions for converting between Apache Arrow arrays and Rust data types.
2
3#![warn(missing_docs)]
4
5use arrow::array::{
6    Array, Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array, UInt8Array,
7    UInt16Array, UInt32Array,
8};
9use arrow::datatypes::DataType;
10use eyre::{ContextCompat, Result, eyre};
11use num::NumCast;
12use std::ops::{Deref, DerefMut};
13
14mod from_impls;
15mod into_impls;
16
17/// Data that can be converted to an Arrow array.
18pub trait IntoArrow {
19    /// The Array type that the data can be converted to.
20    type A: Array;
21
22    /// Convert the data into an Arrow array.
23    fn into_arrow(self) -> Self::A;
24}
25
26/// Wrapper type for an Arrow [`ArrayRef`](arrow::array::ArrayRef).
27#[derive(Debug)]
28pub struct ArrowData(pub arrow::array::ArrayRef);
29
30impl Deref for ArrowData {
31    type Target = arrow::array::ArrayRef;
32
33    fn deref(&self) -> &Self::Target {
34        &self.0
35    }
36}
37
38impl DerefMut for ArrowData {
39    fn deref_mut(&mut self) -> &mut Self::Target {
40        &mut self.0
41    }
42}
43
44macro_rules! register_array_handlers {
45    ($(($variant:path, $array_type:ty, $type_name:expr)),* $(,)?) => {
46        /// Tries to convert the given Arrow array into a `Vec` of integers or floats.
47        pub fn into_vec<T>(data: &ArrowData) -> Result<Vec<T>>
48        where
49            T: Copy + NumCast + 'static,
50        {
51            match data.data_type() {
52                $(
53                    $variant => {
54                        let buffer: &$array_type = data
55                            .as_any()
56                            .downcast_ref()
57                            .context(concat!("series is not ", $type_name))?;
58
59                        let mut result = Vec::with_capacity(buffer.len());
60                        for &v in buffer.values() {
61                            let converted = NumCast::from(v).context(format!("Failed to cast value from {} to target type",$type_name))?;
62                            result.push(converted);
63                        }
64                        Ok(result)
65                    }
66                ),*
67                // Error handling for unsupported types
68                unsupported_type => Err(eyre!("Unsupported data type for conversion: {:?}", unsupported_type))
69            }
70        }
71    };
72}
73
74// Register all supported array types in one place
75register_array_handlers! {
76    (DataType::Float32, Float32Array, "float32"),
77    (DataType::Float64, Float64Array, "float64"),
78    (DataType::Int8, Int8Array, "int8"),
79    (DataType::Int16, Int16Array, "int16"),
80    (DataType::Int32, Int32Array, "int32"),
81    (DataType::Int64, Int64Array, "int64"),
82    (DataType::UInt8, UInt8Array, "uint8"),
83    (DataType::UInt16, UInt16Array, "uint16"),
84    (DataType::UInt32, UInt32Array, "uint32"),
85}