Skip to main content

gmt_dos_clients_arrow/arrow/
iter.rs

1use std::collections::VecDeque;
2
3use apache_arrow::{
4    array::{ListArray, PrimitiveArray},
5    datatypes::ArrowPrimitiveType,
6};
7
8use crate::{Arrow, ArrowError, BufferDataType, DropOption, Result};
9
10pub struct ArrowIter<T>(VecDeque<Vec<T>>)
11where
12    T: BufferDataType,
13    <T as BufferDataType>::ArrayType: ArrowPrimitiveType,
14    Vec<T>: FromIterator<<<T as BufferDataType>::ArrayType as ArrowPrimitiveType>::Native>;
15
16impl<T> Iterator for ArrowIter<T>
17where
18    T: BufferDataType,
19    <T as BufferDataType>::ArrayType: ArrowPrimitiveType,
20    Vec<T>: FromIterator<<<T as BufferDataType>::ArrayType as ArrowPrimitiveType>::Native>,
21{
22    type Item = Vec<T>;
23
24    fn next(&mut self) -> Option<Self::Item> {
25        self.0.pop_front()
26    }
27}
28impl<T> DoubleEndedIterator for ArrowIter<T>
29where
30    T: BufferDataType,
31    <T as BufferDataType>::ArrayType: ArrowPrimitiveType,
32    Vec<T>: FromIterator<<<T as BufferDataType>::ArrayType as ArrowPrimitiveType>::Native>,
33{
34    fn next_back(&mut self) -> Option<Self::Item> {
35        self.0.pop_back()
36    }
37}
38impl<T> ExactSizeIterator for ArrowIter<T>
39where
40    T: BufferDataType,
41    <T as BufferDataType>::ArrayType: ArrowPrimitiveType,
42    Vec<T>: FromIterator<<<T as BufferDataType>::ArrayType as ArrowPrimitiveType>::Native>,
43{
44    fn len(&self) -> usize {
45        self.0.len()
46    }
47}
48
49impl Arrow {
50    /// Returns an iterator over the data in the specified field
51    pub fn iter<S, T>(&mut self, field_name: S) -> Result<ArrowIter<T>>
52    where
53        S: AsRef<str>,
54        String: From<S>,
55        T: BufferDataType,
56        <T as BufferDataType>::ArrayType: ArrowPrimitiveType,
57        Vec<T>: FromIterator<<<T as BufferDataType>::ArrayType as ArrowPrimitiveType>::Native>,
58    {
59        match self.record() {
60            Ok(record) => match record.schema().column_with_name(field_name.as_ref()) {
61                Some((idx, _)) => record
62                    .column(idx)
63                    .as_any()
64                    .downcast_ref::<ListArray>()
65                    .map(|data| {
66                        data.iter()
67                            .map(|data| {
68                                data.map(|data| {
69                                    data.as_any()
70                                        .downcast_ref::<PrimitiveArray<<T as BufferDataType>::ArrayType>>()
71                                        .and_then(|data| data.iter().collect::<Option<Vec<T>>>())
72                                })
73                                .flatten()
74                            })
75                            .collect::<Option<VecDeque<Vec<T>>>>()
76                    })
77                    .flatten()
78                    .ok_or_else(|| ArrowError::ParseField(field_name.into())),
79                None => Err(ArrowError::FieldNotFound(field_name.into())),
80            },
81            Err(e) => Err(e),
82        }.map(|data| ArrowIter(data))
83    }
84    /// Returns an iterator over the data in the specified field consuming [Self]
85    ///
86    /// [Self] won't be written to a file regarless of the original settings
87    pub fn into_iter<S, T>(mut self, field_name: S) -> Result<ArrowIter<T>>
88    where
89        S: AsRef<str>,
90        String: From<S>,
91        T: BufferDataType,
92        <T as BufferDataType>::ArrayType: ArrowPrimitiveType,
93        Vec<T>: FromIterator<<<T as BufferDataType>::ArrayType as ArrowPrimitiveType>::Native>,
94    {
95        self.drop_option = DropOption::NoSave;
96        self.iter(field_name)
97    }
98}