dora_arrow_convert/
from_impls.rs

1use arrow::{
2    array::{Array, AsArray, PrimitiveArray, StringArray},
3    datatypes::{ArrowPrimitiveType, ArrowTemporalType},
4};
5use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
6use eyre::ContextCompat;
7use half::f16;
8
9use crate::ArrowData;
10
11impl From<ArrowData> for arrow::array::ArrayRef {
12    fn from(value: ArrowData) -> Self {
13        value.0
14    }
15}
16
17impl From<arrow::array::ArrayRef> for ArrowData {
18    fn from(value: arrow::array::ArrayRef) -> Self {
19        Self(value)
20    }
21}
22
23impl TryFrom<&ArrowData> for bool {
24    type Error = eyre::Report;
25    fn try_from(value: &ArrowData) -> Result<Self, Self::Error> {
26        let bool_array = value.as_boolean_opt().context("not a bool array")?;
27        if bool_array.is_empty() {
28            eyre::bail!("empty array");
29        }
30        if bool_array.len() != 1 {
31            eyre::bail!("expected length 1");
32        }
33        if bool_array.null_count() != 0 {
34            eyre::bail!("bool array has nulls");
35        }
36        Ok(bool_array.value(0))
37    }
38}
39
40macro_rules! impl_try_from_arrow_data {
41    ($($t:ty => $arrow_type:ident),*) => {
42        $(
43            impl TryFrom<&ArrowData> for $t {
44                type Error = eyre::Report;
45
46                fn try_from(value: &ArrowData) -> Result<Self, Self::Error> {
47                    let array = value
48                        .as_primitive_opt::<arrow::datatypes::$arrow_type>()
49                        .context(concat!("not a primitive ", stringify!($arrow_type), " array"))?;
50                    extract_single_primitive(array)
51                }
52            }
53        )*
54
55        $(
56            impl<'a> TryFrom<&'a ArrowData> for &'a [$t] {
57                type Error = eyre::Report;
58
59                fn try_from(value: &'a ArrowData) -> Result<Self, Self::Error> {
60                    let array: &PrimitiveArray<arrow::datatypes::$arrow_type> = value
61                        .as_primitive_opt()
62                        .wrap_err(concat!("not a primitive ", stringify!($arrow_type), " array"))?;
63                    if array.null_count() != 0 {
64                        eyre::bail!("array has nulls");
65                    }
66                    Ok(array.values())
67                }
68            }
69        )*
70
71        $(
72            impl<'a> TryFrom<&'a ArrowData> for Vec<$t> {
73                type Error = eyre::Report;
74
75                fn try_from(value: &'a ArrowData) -> Result<Self, Self::Error> {
76                    value
77                        .try_into()
78                        .map(|slice: &'a [$t]| slice.to_vec())
79                }
80            }
81        )*
82    };
83}
84
85impl_try_from_arrow_data!(
86    u8 => UInt8Type,
87    u16 => UInt16Type,
88    u32 => UInt32Type,
89    u64 => UInt64Type,
90    i8 => Int8Type,
91    i16 => Int16Type,
92    i32 => Int32Type,
93    i64 => Int64Type,
94    f16 => Float16Type,
95    f32 => Float32Type,
96    f64 => Float64Type
97);
98
99impl<'a> TryFrom<&'a ArrowData> for &'a str {
100    type Error = eyre::Report;
101    fn try_from(value: &'a ArrowData) -> Result<Self, Self::Error> {
102        let array: &StringArray = value.as_string_opt().wrap_err("not a string array")?;
103        if array.is_empty() {
104            eyre::bail!("empty array");
105        }
106        if array.len() != 1 {
107            eyre::bail!("expected length 1");
108        }
109        if array.null_count() != 0 {
110            eyre::bail!("array has nulls");
111        }
112        Ok(array.value(0))
113    }
114}
115
116impl<'a> TryFrom<&'a ArrowData> for String {
117    type Error = eyre::Report;
118    fn try_from(value: &'a ArrowData) -> Result<Self, Self::Error> {
119        let array: &StringArray = value.as_string_opt().wrap_err("not a string array")?;
120        if array.is_empty() {
121            eyre::bail!("empty array");
122        }
123        if array.len() != 1 {
124            eyre::bail!("expected length 1");
125        }
126        if array.null_count() != 0 {
127            eyre::bail!("array has nulls");
128        }
129        Ok(array.value(0).to_string())
130    }
131}
132
133impl TryFrom<&ArrowData> for NaiveDate {
134    type Error = eyre::Report;
135    fn try_from(value: &ArrowData) -> Result<Self, Self::Error> {
136        if let Some(array) = value.as_any().downcast_ref::<arrow::array::Date32Array>() {
137            if check_single_datetime(array) {
138                eyre::bail!("Not a valid array");
139            }
140            return array
141                .value_as_date(0)
142                .context("data type cannot be converted to NaiveDate");
143        }
144        let array = value
145            .as_any()
146            .downcast_ref::<arrow::array::Date64Array>()
147            .context("Reference is neither to a Date32Array nor a Date64Array")?;
148        if check_single_datetime(array) {
149            eyre::bail!("Not a valid array");
150        }
151        array
152            .value_as_date(0)
153            .context("data type cannot be converted to NaiveDate")
154    }
155}
156
157impl TryFrom<&ArrowData> for NaiveTime {
158    type Error = eyre::Report;
159    fn try_from(value: &ArrowData) -> Result<Self, Self::Error> {
160        if let Some(array) = value
161            .as_any()
162            .downcast_ref::<arrow::array::Time32SecondArray>()
163        {
164            if check_single_datetime(array) {
165                eyre::bail!("Not a valid array");
166            }
167            return array
168                .value_as_time(0)
169                .context("data type cannot be converted to NaiveTime");
170        }
171        if let Some(array) = value
172            .as_any()
173            .downcast_ref::<arrow::array::Time32MillisecondArray>()
174        {
175            if check_single_datetime(array) {
176                eyre::bail!("Not a valid array");
177            }
178            return array
179                .value_as_time(0)
180                .context("data type cannot be converted to NaiveTime");
181        }
182        if let Some(array) = value
183            .as_any()
184            .downcast_ref::<arrow::array::Time64MicrosecondArray>()
185        {
186            if check_single_datetime(array) {
187                eyre::bail!("Not a valid array");
188            }
189            return array
190                .value_as_time(0)
191                .context("data type cannot be converted to NaiveTime");
192        }
193        let array = value
194            .as_primitive_opt::<arrow::datatypes::Time64NanosecondType>()
195            .context("not any of the primitive Time arrays")?;
196        if check_single_datetime(array) {
197            eyre::bail!("Not a valid array");
198        }
199        array
200            .value_as_time(0)
201            .context("data type cannot be converted to NaiveTime")
202    }
203}
204
205impl TryFrom<&ArrowData> for NaiveDateTime {
206    type Error = eyre::Report;
207    fn try_from(value: &ArrowData) -> Result<Self, Self::Error> {
208        if let Some(array) = value
209            .as_any()
210            .downcast_ref::<arrow::array::TimestampSecondArray>()
211        {
212            if check_single_datetime(array) {
213                eyre::bail!("Not a valid array");
214            }
215            return array
216                .value_as_datetime(0)
217                .context("data type cannot be converted to NaiveDateTime");
218        }
219        if let Some(array) = value
220            .as_any()
221            .downcast_ref::<arrow::array::TimestampMillisecondArray>()
222        {
223            if check_single_datetime(array) {
224                eyre::bail!("Not a valid array");
225            }
226            return array
227                .value_as_datetime(0)
228                .context("data type cannot be converted to NaiveDateTime");
229        }
230        if let Some(array) = value
231            .as_any()
232            .downcast_ref::<arrow::array::TimestampMicrosecondArray>()
233        {
234            if check_single_datetime(array) {
235                eyre::bail!("Not a valid array");
236            }
237            return array
238                .value_as_datetime(0)
239                .context("data type cannot be converted to NaiveDateTime");
240        }
241        let array = value
242            .as_primitive_opt::<arrow::datatypes::TimestampNanosecondType>()
243            .context("not any of the primitive Time arrays")?;
244        if check_single_datetime(array) {
245            eyre::bail!("Not a valid array");
246        }
247        array
248            .value_as_datetime(0)
249            .context("data type cannot be converted to NaiveDateTime")
250    }
251}
252
253fn check_single_datetime<T>(array: &PrimitiveArray<T>) -> bool
254where
255    T: ArrowTemporalType,
256{
257    array.is_empty() || array.len() != 1 || array.null_count() != 0
258}
259fn extract_single_primitive<T>(array: &PrimitiveArray<T>) -> Result<T::Native, eyre::Error>
260where
261    T: ArrowPrimitiveType,
262{
263    if array.is_empty() {
264        eyre::bail!("empty array");
265    }
266    if array.len() != 1 {
267        eyre::bail!("expected length 1");
268    }
269    if array.null_count() != 0 {
270        eyre::bail!("array has nulls");
271    }
272    Ok(array.value(0))
273}
274
275#[cfg(test)]
276mod tests {
277    use arrow::array::{PrimitiveArray, make_array};
278
279    use crate::ArrowData;
280
281    #[test]
282    fn test_u8() {
283        let array =
284            make_array(PrimitiveArray::<arrow::datatypes::UInt8Type>::from(vec![42]).into());
285        let data: ArrowData = array.into();
286        let value: u8 = (&data).try_into().unwrap();
287        assert_eq!(value, 42);
288    }
289}