polars_arrow/legacy/kernels/
mod.rs

1use std::iter::Enumerate;
2
3use crate::array::BooleanArray;
4use crate::bitmap::utils::BitChunks;
5pub mod ewm;
6pub mod set;
7pub mod sort_partition;
8#[cfg(feature = "performant")]
9pub mod sorted_join;
10#[cfg(feature = "strings")]
11pub mod string;
12pub mod take_agg;
13mod time;
14
15pub use time::{Ambiguous, NonExistent};
16#[cfg(feature = "timezones")]
17pub use time::{convert_to_naive_local, convert_to_naive_local_opt};
18
19/// Internal state of [SlicesIterator]
20#[derive(Debug, PartialEq)]
21enum State {
22    // it is iterating over bits of a mask (`u64`, steps of size of 1 slot)
23    Bits(u64),
24    // it is iterating over chunks (steps of size of 64 slots)
25    Chunks,
26    // it is iterating over the remaining bits (steps of size of 1 slot)
27    Remainder,
28    // nothing more to iterate.
29    Finish,
30}
31
32/// Forked and modified from arrow crate.
33///
34/// An iterator of `(usize, usize)` each representing an interval `[start,end[` whose
35/// slots of a [BooleanArray] are true. Each interval corresponds to a contiguous region of memory to be
36/// "taken" from an array to be filtered.
37#[derive(Debug)]
38struct MaskedSlicesIterator<'a> {
39    iter: Enumerate<BitChunks<'a, u64>>,
40    state: State,
41    remainder_mask: u64,
42    remainder_len: usize,
43    chunk_len: usize,
44    len: usize,
45    start: usize,
46    on_region: bool,
47    current_chunk: usize,
48    current_bit: usize,
49    total_len: usize,
50}
51
52impl<'a> MaskedSlicesIterator<'a> {
53    pub(crate) fn new(mask: &'a BooleanArray) -> Self {
54        let chunks = mask.values().chunks::<u64>();
55
56        let chunk_len = mask.len() / 64;
57        let remainder_len = chunks.remainder_len();
58        let remainder_mask = chunks.remainder();
59
60        Self {
61            iter: chunks.enumerate(),
62            state: State::Chunks,
63            remainder_len,
64            chunk_len,
65            remainder_mask,
66            len: 0,
67            start: 0,
68            on_region: false,
69            current_chunk: 0,
70            current_bit: 0,
71            total_len: mask.len(),
72        }
73    }
74
75    #[inline]
76    fn current_start(&self) -> usize {
77        self.current_chunk * 64 + self.current_bit
78    }
79
80    #[inline]
81    fn iterate_bits(&mut self, mask: u64, max: usize) -> Option<(usize, usize)> {
82        while self.current_bit < max {
83            if (mask & (1 << self.current_bit)) != 0 {
84                if !self.on_region {
85                    self.start = self.current_start();
86                    self.on_region = true;
87                }
88                self.len += 1;
89            } else if self.on_region {
90                let result = (self.start, self.start + self.len);
91                self.len = 0;
92                self.on_region = false;
93                self.current_bit += 1;
94                return Some(result);
95            }
96            self.current_bit += 1;
97        }
98        self.current_bit = 0;
99        None
100    }
101
102    /// iterates over chunks.
103    #[inline]
104    fn iterate_chunks(&mut self) -> Option<(usize, usize)> {
105        while let Some((i, mask)) = self.iter.next() {
106            self.current_chunk = i;
107            if mask == 0 {
108                if self.on_region {
109                    let result = (self.start, self.start + self.len);
110                    self.len = 0;
111                    self.on_region = false;
112                    return Some(result);
113                }
114            } else if mask == u64::MAX {
115                // = !0u64
116                if !self.on_region {
117                    self.start = self.current_start();
118                    self.on_region = true;
119                }
120                self.len += 64;
121            } else {
122                // there is a chunk that has a non-trivial mask => iterate over bits.
123                self.state = State::Bits(mask);
124                return None;
125            }
126        }
127        // no more chunks => start iterating over the remainder
128        self.current_chunk = self.chunk_len;
129        self.state = State::Remainder;
130        None
131    }
132}
133
134impl Iterator for MaskedSlicesIterator<'_> {
135    type Item = (usize, usize);
136
137    fn next(&mut self) -> Option<Self::Item> {
138        match self.state {
139            State::Chunks => {
140                match self.iterate_chunks() {
141                    None => {
142                        // iterating over chunks does not yield any new slice => continue to the next
143                        self.current_bit = 0;
144                        self.next()
145                    },
146                    other => other,
147                }
148            },
149            State::Bits(mask) => {
150                match self.iterate_bits(mask, 64) {
151                    None => {
152                        // iterating over bits does not yield any new slice => change back
153                        // to chunks and continue to the next
154                        self.state = State::Chunks;
155                        self.next()
156                    },
157                    other => other,
158                }
159            },
160            State::Remainder => match self.iterate_bits(self.remainder_mask, self.remainder_len) {
161                None => {
162                    self.state = State::Finish;
163                    if self.on_region {
164                        Some((self.start, self.start + self.len))
165                    } else {
166                        None
167                    }
168                },
169                other => other,
170            },
171            State::Finish => None,
172        }
173    }
174}
175
176#[derive(Eq, PartialEq, Debug)]
177enum BinaryMaskedState {
178    Start,
179    // Last masks were false values
180    LastFalse,
181    // Last masks were true values
182    LastTrue,
183    Finish,
184}
185
186pub(crate) struct BinaryMaskedSliceIterator<'a> {
187    slice_iter: MaskedSlicesIterator<'a>,
188    filled: usize,
189    low: usize,
190    high: usize,
191    state: BinaryMaskedState,
192}
193
194impl<'a> BinaryMaskedSliceIterator<'a> {
195    pub(crate) fn new(mask: &'a BooleanArray) -> Self {
196        Self {
197            slice_iter: MaskedSlicesIterator::new(mask),
198            filled: 0,
199            low: 0,
200            high: 0,
201            state: BinaryMaskedState::Start,
202        }
203    }
204}
205
206impl Iterator for BinaryMaskedSliceIterator<'_> {
207    type Item = (usize, usize, bool);
208
209    fn next(&mut self) -> Option<Self::Item> {
210        use BinaryMaskedState::*;
211
212        match self.state {
213            Start => {
214                // first iteration
215                if self.low == 0 && self.high == 0 {
216                    match self.slice_iter.next() {
217                        Some((low, high)) => {
218                            self.low = low;
219                            self.high = high;
220
221                            if low > 0 {
222                                // do another start iteration.
223                                Some((0, low, false))
224                            } else {
225                                self.state = LastTrue;
226                                self.filled = high;
227                                Some((low, high, true))
228                            }
229                        },
230                        None => {
231                            self.state = Finish;
232                            Some((self.filled, self.slice_iter.total_len, false))
233                        },
234                    }
235                } else {
236                    self.filled = self.high;
237                    self.state = LastTrue;
238                    Some((self.low, self.high, true))
239                }
240            },
241            LastFalse => {
242                self.state = LastTrue;
243                self.filled = self.high;
244                Some((self.low, self.high, true))
245            },
246            LastTrue => match self.slice_iter.next() {
247                Some((low, high)) => {
248                    self.low = low;
249                    self.high = high;
250                    self.state = LastFalse;
251                    let last_filled = self.filled;
252                    self.filled = low;
253                    Some((last_filled, low, false))
254                },
255                None => {
256                    self.state = Finish;
257                    if self.filled != self.slice_iter.total_len {
258                        Some((self.filled, self.slice_iter.total_len, false))
259                    } else {
260                        None
261                    }
262                },
263            },
264            Finish => None,
265        }
266    }
267}
268
269#[cfg(test)]
270mod test {
271    use super::*;
272
273    #[test]
274    fn test_binary_masked_slice_iter() {
275        let mask = BooleanArray::from_slice([false, false, true, true, true, false, false]);
276
277        let out = BinaryMaskedSliceIterator::new(&mask)
278            .map(|(_, _, b)| b)
279            .collect::<Vec<_>>();
280        assert_eq!(out, &[false, true, false]);
281        let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();
282        assert_eq!(out, &[(0, 2, false), (2, 5, true), (5, 7, false)]);
283        let mask = BooleanArray::from_slice([true, true, false, true]);
284        let out = BinaryMaskedSliceIterator::new(&mask)
285            .map(|(_, _, b)| b)
286            .collect::<Vec<_>>();
287        assert_eq!(out, &[true, false, true]);
288        let mask = BooleanArray::from_slice([true, true, false, true, true]);
289        let out = BinaryMaskedSliceIterator::new(&mask)
290            .map(|(_, _, b)| b)
291            .collect::<Vec<_>>();
292        assert_eq!(out, &[true, false, true]);
293    }
294
295    #[test]
296    fn test_binary_slice_mask_iter_with_false() {
297        let mask = BooleanArray::from_slice([false, false]);
298        let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();
299        assert_eq!(out, &[(0, 2, false)]);
300    }
301}