arrow_rayon/parallel_array/
parallel_binary_array.rs

1use arrow_array::types::GenericBinaryType;
2use arrow_array::{BinaryArray, LargeBinaryArray};
3use rayon::iter::ParallelIterator;
4
5use crate::parallel_byte_array::{ParallelGenericByteArray, ParallelGenericByteArrayRef};
6
7pub type ParallelGenericBinaryArray<OffsetSize> =
8    ParallelGenericByteArray<GenericBinaryType<OffsetSize>>;
9pub type ParallelGenericBinaryArrayRef<'data, OffsetSize> =
10    ParallelGenericByteArrayRef<'data, GenericBinaryType<OffsetSize>>;
11
12pub type ParallelBinaryArray = ParallelGenericBinaryArray<i32>;
13pub type ParallelBinaryArrayRef<'data> = ParallelGenericBinaryArrayRef<'data, i32>;
14pub type ParallelLargeBinaryArray = ParallelGenericBinaryArray<i64>;
15pub type ParallelLargeBinaryArrayRef<'data> = ParallelGenericBinaryArrayRef<'data, i64>;
16
17pub trait BinaryArrayRefParallelIterator<'data> {
18    type Iter: ParallelIterator<Item = Option<&'data [u8]>>;
19
20    fn par_iter(&'data self) -> Self::Iter;
21}
22
23impl<'data> BinaryArrayRefParallelIterator<'data> for BinaryArray {
24    type Iter = ParallelBinaryArrayRef<'data>;
25
26    fn par_iter(&'data self) -> Self::Iter {
27        ParallelBinaryArrayRef::new(self)
28    }
29}
30
31pub trait LargeBinaryArrayRefParallelIterator<'data> {
32    type Iter: ParallelIterator<Item = Option<&'data [u8]>>;
33
34    fn par_iter(&'data self) -> Self::Iter;
35}
36
37impl<'data> LargeBinaryArrayRefParallelIterator<'data> for LargeBinaryArray {
38    type Iter = ParallelLargeBinaryArrayRef<'data>;
39
40    fn par_iter(&'data self) -> Self::Iter {
41        ParallelLargeBinaryArrayRef::new(self)
42    }
43}
44
45#[cfg(test)]
46mod tests {
47    use arrow_array::Array;
48
49    use super::*;
50
51    #[test]
52    fn test_par_iter() {
53        let array =
54            BinaryArray::from_opt_vec(vec![Some(b"one"), None, Some(b"two"), Some(b"three")]);
55        let items: Vec<String> = array
56            .par_iter()
57            .map(|item| {
58                item.map_or_else(String::new, |item| {
59                    String::from_utf8_lossy(item).to_string()
60                })
61            })
62            .collect();
63        assert_eq!(
64            items,
65            vec![
66                "one".to_owned(),
67                "".to_owned(),
68                "two".to_owned(),
69                "three".to_owned()
70            ]
71        );
72    }
73
74    #[test]
75    fn test_collect_array() {
76        let array =
77            BinaryArray::from_opt_vec(vec![Some(b"one"), None, Some(b"two"), Some(b"three")]);
78        let collected_array: ParallelBinaryArray = array
79            .par_iter()
80            .map(|item| item.map(|item| item.to_ascii_uppercase()))
81            .collect();
82        let binary_array = collected_array.into_inner();
83        assert_eq!(binary_array.value(0), b"ONE");
84        assert!(binary_array.is_null(1));
85        assert_eq!(binary_array.value(2), b"TWO");
86        assert_eq!(binary_array.value(3), b"THREE");
87    }
88}