polars_arrow/legacy/kernels/
mod.rs1use std::iter::Enumerate;
2
3use crate::array::BooleanArray;
4use crate::bitmap::utils::BitChunks;
5pub mod ewm;
6pub mod set;
7pub mod sort_partition;
8#[cfg(feature = "performant")]
9pub mod sorted_join;
10#[cfg(feature = "strings")]
11pub mod string;
12pub mod take_agg;
13mod time;
14
15pub use time::{Ambiguous, NonExistent};
16#[cfg(feature = "timezones")]
17pub use time::{convert_to_naive_local, convert_to_naive_local_opt};
18
19#[derive(Debug, PartialEq)]
21enum State {
22 Bits(u64),
24 Chunks,
26 Remainder,
28 Finish,
30}
31
32#[derive(Debug)]
38struct MaskedSlicesIterator<'a> {
39 iter: Enumerate<BitChunks<'a, u64>>,
40 state: State,
41 remainder_mask: u64,
42 remainder_len: usize,
43 chunk_len: usize,
44 len: usize,
45 start: usize,
46 on_region: bool,
47 current_chunk: usize,
48 current_bit: usize,
49 total_len: usize,
50}
51
52impl<'a> MaskedSlicesIterator<'a> {
53 pub(crate) fn new(mask: &'a BooleanArray) -> Self {
54 let chunks = mask.values().chunks::<u64>();
55
56 let chunk_len = mask.len() / 64;
57 let remainder_len = chunks.remainder_len();
58 let remainder_mask = chunks.remainder();
59
60 Self {
61 iter: chunks.enumerate(),
62 state: State::Chunks,
63 remainder_len,
64 chunk_len,
65 remainder_mask,
66 len: 0,
67 start: 0,
68 on_region: false,
69 current_chunk: 0,
70 current_bit: 0,
71 total_len: mask.len(),
72 }
73 }
74
75 #[inline]
76 fn current_start(&self) -> usize {
77 self.current_chunk * 64 + self.current_bit
78 }
79
80 #[inline]
81 fn iterate_bits(&mut self, mask: u64, max: usize) -> Option<(usize, usize)> {
82 while self.current_bit < max {
83 if (mask & (1 << self.current_bit)) != 0 {
84 if !self.on_region {
85 self.start = self.current_start();
86 self.on_region = true;
87 }
88 self.len += 1;
89 } else if self.on_region {
90 let result = (self.start, self.start + self.len);
91 self.len = 0;
92 self.on_region = false;
93 self.current_bit += 1;
94 return Some(result);
95 }
96 self.current_bit += 1;
97 }
98 self.current_bit = 0;
99 None
100 }
101
102 #[inline]
104 fn iterate_chunks(&mut self) -> Option<(usize, usize)> {
105 while let Some((i, mask)) = self.iter.next() {
106 self.current_chunk = i;
107 if mask == 0 {
108 if self.on_region {
109 let result = (self.start, self.start + self.len);
110 self.len = 0;
111 self.on_region = false;
112 return Some(result);
113 }
114 } else if mask == u64::MAX {
115 if !self.on_region {
117 self.start = self.current_start();
118 self.on_region = true;
119 }
120 self.len += 64;
121 } else {
122 self.state = State::Bits(mask);
124 return None;
125 }
126 }
127 self.current_chunk = self.chunk_len;
129 self.state = State::Remainder;
130 None
131 }
132}
133
134impl Iterator for MaskedSlicesIterator<'_> {
135 type Item = (usize, usize);
136
137 fn next(&mut self) -> Option<Self::Item> {
138 match self.state {
139 State::Chunks => {
140 match self.iterate_chunks() {
141 None => {
142 self.current_bit = 0;
144 self.next()
145 },
146 other => other,
147 }
148 },
149 State::Bits(mask) => {
150 match self.iterate_bits(mask, 64) {
151 None => {
152 self.state = State::Chunks;
155 self.next()
156 },
157 other => other,
158 }
159 },
160 State::Remainder => match self.iterate_bits(self.remainder_mask, self.remainder_len) {
161 None => {
162 self.state = State::Finish;
163 if self.on_region {
164 Some((self.start, self.start + self.len))
165 } else {
166 None
167 }
168 },
169 other => other,
170 },
171 State::Finish => None,
172 }
173 }
174}
175
176#[derive(Eq, PartialEq, Debug)]
177enum BinaryMaskedState {
178 Start,
179 LastFalse,
181 LastTrue,
183 Finish,
184}
185
186pub(crate) struct BinaryMaskedSliceIterator<'a> {
187 slice_iter: MaskedSlicesIterator<'a>,
188 filled: usize,
189 low: usize,
190 high: usize,
191 state: BinaryMaskedState,
192}
193
194impl<'a> BinaryMaskedSliceIterator<'a> {
195 pub(crate) fn new(mask: &'a BooleanArray) -> Self {
196 Self {
197 slice_iter: MaskedSlicesIterator::new(mask),
198 filled: 0,
199 low: 0,
200 high: 0,
201 state: BinaryMaskedState::Start,
202 }
203 }
204}
205
206impl Iterator for BinaryMaskedSliceIterator<'_> {
207 type Item = (usize, usize, bool);
208
209 fn next(&mut self) -> Option<Self::Item> {
210 use BinaryMaskedState::*;
211
212 match self.state {
213 Start => {
214 if self.low == 0 && self.high == 0 {
216 match self.slice_iter.next() {
217 Some((low, high)) => {
218 self.low = low;
219 self.high = high;
220
221 if low > 0 {
222 Some((0, low, false))
224 } else {
225 self.state = LastTrue;
226 self.filled = high;
227 Some((low, high, true))
228 }
229 },
230 None => {
231 self.state = Finish;
232 Some((self.filled, self.slice_iter.total_len, false))
233 },
234 }
235 } else {
236 self.filled = self.high;
237 self.state = LastTrue;
238 Some((self.low, self.high, true))
239 }
240 },
241 LastFalse => {
242 self.state = LastTrue;
243 self.filled = self.high;
244 Some((self.low, self.high, true))
245 },
246 LastTrue => match self.slice_iter.next() {
247 Some((low, high)) => {
248 self.low = low;
249 self.high = high;
250 self.state = LastFalse;
251 let last_filled = self.filled;
252 self.filled = low;
253 Some((last_filled, low, false))
254 },
255 None => {
256 self.state = Finish;
257 if self.filled != self.slice_iter.total_len {
258 Some((self.filled, self.slice_iter.total_len, false))
259 } else {
260 None
261 }
262 },
263 },
264 Finish => None,
265 }
266 }
267}
268
269#[cfg(test)]
270mod test {
271 use super::*;
272
273 #[test]
274 fn test_binary_masked_slice_iter() {
275 let mask = BooleanArray::from_slice([false, false, true, true, true, false, false]);
276
277 let out = BinaryMaskedSliceIterator::new(&mask)
278 .map(|(_, _, b)| b)
279 .collect::<Vec<_>>();
280 assert_eq!(out, &[false, true, false]);
281 let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();
282 assert_eq!(out, &[(0, 2, false), (2, 5, true), (5, 7, false)]);
283 let mask = BooleanArray::from_slice([true, true, false, true]);
284 let out = BinaryMaskedSliceIterator::new(&mask)
285 .map(|(_, _, b)| b)
286 .collect::<Vec<_>>();
287 assert_eq!(out, &[true, false, true]);
288 let mask = BooleanArray::from_slice([true, true, false, true, true]);
289 let out = BinaryMaskedSliceIterator::new(&mask)
290 .map(|(_, _, b)| b)
291 .collect::<Vec<_>>();
292 assert_eq!(out, &[true, false, true]);
293 }
294
295 #[test]
296 fn test_binary_slice_mask_iter_with_false() {
297 let mask = BooleanArray::from_slice([false, false]);
298 let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();
299 assert_eq!(out, &[(0, 2, false)]);
300 }
301}