arrow2/compute/
arity.rs

1//! Defines kernels suitable to perform operations to primitive arrays.
2
3use super::utils::{check_same_len, combine_validities};
4use crate::{
5    array::PrimitiveArray,
6    bitmap::{Bitmap, MutableBitmap},
7    datatypes::DataType,
8    error::Result,
9    types::NativeType,
10};
11
12/// Applies an unary and infallible function to a [`PrimitiveArray`]. This is the
13/// fastest way to perform an operation on a [`PrimitiveArray`] when the benefits
14/// of a vectorized operation outweighs the cost of branching nulls and
15/// non-nulls.
16///
17/// # Implementation
18/// This will apply the function for all values, including those on null slots.
19/// This implies that the operation must be infallible for any value of the
20/// corresponding type or this function may panic.
21#[inline]
22pub fn unary<I, F, O>(array: &PrimitiveArray<I>, op: F, data_type: DataType) -> PrimitiveArray<O>
23where
24    I: NativeType,
25    O: NativeType,
26    F: Fn(I) -> O,
27{
28    let values = array.values().iter().map(|v| op(*v)).collect::<Vec<_>>();
29
30    PrimitiveArray::<O>::new(data_type, values.into(), array.validity().cloned())
31}
32
33/// Version of unary that checks for errors in the closure used to create the
34/// buffer
35pub fn try_unary<I, F, O>(
36    array: &PrimitiveArray<I>,
37    op: F,
38    data_type: DataType,
39) -> Result<PrimitiveArray<O>>
40where
41    I: NativeType,
42    O: NativeType,
43    F: Fn(I) -> Result<O>,
44{
45    let values = array
46        .values()
47        .iter()
48        .map(|v| op(*v))
49        .collect::<Result<Vec<_>>>()?
50        .into();
51
52    Ok(PrimitiveArray::<O>::new(
53        data_type,
54        values,
55        array.validity().cloned(),
56    ))
57}
58
59/// Version of unary that returns an array and bitmap. Used when working with
60/// overflowing operations
61pub fn unary_with_bitmap<I, F, O>(
62    array: &PrimitiveArray<I>,
63    op: F,
64    data_type: DataType,
65) -> (PrimitiveArray<O>, Bitmap)
66where
67    I: NativeType,
68    O: NativeType,
69    F: Fn(I) -> (O, bool),
70{
71    let mut mut_bitmap = MutableBitmap::with_capacity(array.len());
72
73    let values = array
74        .values()
75        .iter()
76        .map(|v| {
77            let (res, over) = op(*v);
78            mut_bitmap.push(over);
79            res
80        })
81        .collect::<Vec<_>>()
82        .into();
83
84    (
85        PrimitiveArray::<O>::new(data_type, values, array.validity().cloned()),
86        mut_bitmap.into(),
87    )
88}
89
90/// Version of unary that creates a mutable bitmap that is used to keep track
91/// of checked operations. The resulting bitmap is compared with the array
92/// bitmap to create the final validity array.
93pub fn unary_checked<I, F, O>(
94    array: &PrimitiveArray<I>,
95    op: F,
96    data_type: DataType,
97) -> PrimitiveArray<O>
98where
99    I: NativeType,
100    O: NativeType,
101    F: Fn(I) -> Option<O>,
102{
103    let mut mut_bitmap = MutableBitmap::with_capacity(array.len());
104
105    let values = array
106        .values()
107        .iter()
108        .map(|v| match op(*v) {
109            Some(val) => {
110                mut_bitmap.push(true);
111                val
112            }
113            None => {
114                mut_bitmap.push(false);
115                O::default()
116            }
117        })
118        .collect::<Vec<_>>()
119        .into();
120
121    // The validity has to be checked against the bitmap created during the
122    // creation of the values with the iterator. If an error was found during
123    // the iteration, then the validity is changed to None to mark the value
124    // as Null
125    let bitmap: Bitmap = mut_bitmap.into();
126    let validity = combine_validities(array.validity(), Some(&bitmap));
127
128    PrimitiveArray::<O>::new(data_type, values, validity)
129}
130
131/// Applies a binary operations to two primitive arrays. This is the fastest
132/// way to perform an operation on two primitive array when the benefits of a
133/// vectorized operation outweighs the cost of branching nulls and non-nulls.
134/// # Errors
135/// This function errors iff the arrays have a different length.
136/// # Implementation
137/// This will apply the function for all values, including those on null slots.
138/// This implies that the operation must be infallible for any value of the
139/// corresponding type.
140/// The types of the arrays are not checked with this operation. The closure
141/// "op" needs to handle the different types in the arrays. The datatype for the
142/// resulting array has to be selected by the implementer of the function as
143/// an argument for the function.
144#[inline]
145pub fn binary<T, D, F>(
146    lhs: &PrimitiveArray<T>,
147    rhs: &PrimitiveArray<D>,
148    data_type: DataType,
149    op: F,
150) -> PrimitiveArray<T>
151where
152    T: NativeType,
153    D: NativeType,
154    F: Fn(T, D) -> T,
155{
156    check_same_len(lhs, rhs).unwrap();
157
158    let validity = combine_validities(lhs.validity(), rhs.validity());
159
160    let values = lhs
161        .values()
162        .iter()
163        .zip(rhs.values().iter())
164        .map(|(l, r)| op(*l, *r))
165        .collect::<Vec<_>>()
166        .into();
167
168    PrimitiveArray::<T>::new(data_type, values, validity)
169}
170
171/// Version of binary that checks for errors in the closure used to create the
172/// buffer
173pub fn try_binary<T, D, F>(
174    lhs: &PrimitiveArray<T>,
175    rhs: &PrimitiveArray<D>,
176    data_type: DataType,
177    op: F,
178) -> Result<PrimitiveArray<T>>
179where
180    T: NativeType,
181    D: NativeType,
182    F: Fn(T, D) -> Result<T>,
183{
184    check_same_len(lhs, rhs)?;
185
186    let validity = combine_validities(lhs.validity(), rhs.validity());
187
188    let values = lhs
189        .values()
190        .iter()
191        .zip(rhs.values().iter())
192        .map(|(l, r)| op(*l, *r))
193        .collect::<Result<Vec<_>>>()?
194        .into();
195
196    Ok(PrimitiveArray::<T>::new(data_type, values, validity))
197}
198
199/// Version of binary that returns an array and bitmap. Used when working with
200/// overflowing operations
201pub fn binary_with_bitmap<T, D, F>(
202    lhs: &PrimitiveArray<T>,
203    rhs: &PrimitiveArray<D>,
204    data_type: DataType,
205    op: F,
206) -> (PrimitiveArray<T>, Bitmap)
207where
208    T: NativeType,
209    D: NativeType,
210    F: Fn(T, D) -> (T, bool),
211{
212    check_same_len(lhs, rhs).unwrap();
213
214    let validity = combine_validities(lhs.validity(), rhs.validity());
215
216    let mut mut_bitmap = MutableBitmap::with_capacity(lhs.len());
217
218    let values = lhs
219        .values()
220        .iter()
221        .zip(rhs.values().iter())
222        .map(|(l, r)| {
223            let (res, over) = op(*l, *r);
224            mut_bitmap.push(over);
225            res
226        })
227        .collect::<Vec<_>>()
228        .into();
229
230    (
231        PrimitiveArray::<T>::new(data_type, values, validity),
232        mut_bitmap.into(),
233    )
234}
235
236/// Version of binary that creates a mutable bitmap that is used to keep track
237/// of checked operations. The resulting bitmap is compared with the array
238/// bitmap to create the final validity array.
239pub fn binary_checked<T, D, F>(
240    lhs: &PrimitiveArray<T>,
241    rhs: &PrimitiveArray<D>,
242    data_type: DataType,
243    op: F,
244) -> PrimitiveArray<T>
245where
246    T: NativeType,
247    D: NativeType,
248    F: Fn(T, D) -> Option<T>,
249{
250    check_same_len(lhs, rhs).unwrap();
251
252    let mut mut_bitmap = MutableBitmap::with_capacity(lhs.len());
253
254    let values = lhs
255        .values()
256        .iter()
257        .zip(rhs.values().iter())
258        .map(|(l, r)| match op(*l, *r) {
259            Some(val) => {
260                mut_bitmap.push(true);
261                val
262            }
263            None => {
264                mut_bitmap.push(false);
265                T::default()
266            }
267        })
268        .collect::<Vec<_>>()
269        .into();
270
271    let bitmap: Bitmap = mut_bitmap.into();
272    let validity = combine_validities(lhs.validity(), rhs.validity());
273
274    // The validity has to be checked against the bitmap created during the
275    // creation of the values with the iterator. If an error was found during
276    // the iteration, then the validity is changed to None to mark the value
277    // as Null
278    let validity = combine_validities(validity.as_ref(), Some(&bitmap));
279
280    PrimitiveArray::<T>::new(data_type, values, validity)
281}