polars_arrow/bitmap/
bitmap_ops.rs

1use std::ops::{BitAnd, BitOr, BitXor, Not};
2
3use super::Bitmap;
4use super::bitmask::BitMask;
5use super::utils::{BitChunk, BitChunkIterExact, BitChunksExact};
6use crate::bitmap::MutableBitmap;
7use crate::trusted_len::TrustedLen;
8
9#[inline(always)]
10pub(crate) fn push_bitchunk<T: BitChunk>(buffer: &mut Vec<u8>, value: T) {
11    buffer.extend(value.to_ne_bytes())
12}
13
14/// Creates a [`Vec<u8>`] from a [`TrustedLen`] of [`BitChunk`].
15pub fn chunk_iter_to_vec<T: BitChunk, I: TrustedLen<Item = T>>(iter: I) -> Vec<u8> {
16    let cap = iter.size_hint().0 * size_of::<T>();
17    let mut buffer = Vec::with_capacity(cap);
18    for v in iter {
19        push_bitchunk(&mut buffer, v)
20    }
21    buffer
22}
23
24fn chunk_iter_to_vec_and_remainder<T: BitChunk, I: TrustedLen<Item = T>>(
25    iter: I,
26    remainder: T,
27) -> Vec<u8> {
28    let cap = (iter.size_hint().0 + 1) * size_of::<T>();
29    let mut buffer = Vec::with_capacity(cap);
30    for v in iter {
31        push_bitchunk(&mut buffer, v)
32    }
33    push_bitchunk(&mut buffer, remainder);
34    debug_assert_eq!(buffer.len(), cap);
35    buffer
36}
37
38/// Apply a bitwise operation `op` to four inputs and return the result as a [`Bitmap`].
39pub fn quaternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, a4: &Bitmap, op: F) -> Bitmap
40where
41    F: Fn(u64, u64, u64, u64) -> u64,
42{
43    assert_eq!(a1.len(), a2.len());
44    assert_eq!(a1.len(), a3.len());
45    assert_eq!(a1.len(), a4.len());
46    let a1_chunks = a1.chunks();
47    let a2_chunks = a2.chunks();
48    let a3_chunks = a3.chunks();
49    let a4_chunks = a4.chunks();
50
51    let rem_a1 = a1_chunks.remainder();
52    let rem_a2 = a2_chunks.remainder();
53    let rem_a3 = a3_chunks.remainder();
54    let rem_a4 = a4_chunks.remainder();
55
56    let chunks = a1_chunks
57        .zip(a2_chunks)
58        .zip(a3_chunks)
59        .zip(a4_chunks)
60        .map(|(((a1, a2), a3), a4)| op(a1, a2, a3, a4));
61
62    let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3, rem_a4));
63    let length = a1.len();
64
65    Bitmap::from_u8_vec(buffer, length)
66}
67
68/// Apply a bitwise operation `op` to three inputs and return the result as a [`Bitmap`].
69pub fn ternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, op: F) -> Bitmap
70where
71    F: Fn(u64, u64, u64) -> u64,
72{
73    assert_eq!(a1.len(), a2.len());
74    assert_eq!(a1.len(), a3.len());
75    let a1_chunks = a1.chunks();
76    let a2_chunks = a2.chunks();
77    let a3_chunks = a3.chunks();
78
79    let rem_a1 = a1_chunks.remainder();
80    let rem_a2 = a2_chunks.remainder();
81    let rem_a3 = a3_chunks.remainder();
82
83    let chunks = a1_chunks
84        .zip(a2_chunks)
85        .zip(a3_chunks)
86        .map(|((a1, a2), a3)| op(a1, a2, a3));
87
88    let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3));
89    let length = a1.len();
90
91    Bitmap::from_u8_vec(buffer, length)
92}
93
94/// Apply a bitwise operation `op` to two inputs and return the result as a [`Bitmap`].
95pub fn binary<F>(lhs: &Bitmap, rhs: &Bitmap, op: F) -> Bitmap
96where
97    F: Fn(u64, u64) -> u64,
98{
99    assert_eq!(lhs.len(), rhs.len());
100    let lhs_chunks = lhs.chunks();
101    let rhs_chunks = rhs.chunks();
102    let rem_lhs = lhs_chunks.remainder();
103    let rem_rhs = rhs_chunks.remainder();
104
105    let chunks = lhs_chunks
106        .zip(rhs_chunks)
107        .map(|(left, right)| op(left, right));
108
109    let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_lhs, rem_rhs));
110    let length = lhs.len();
111
112    Bitmap::from_u8_vec(buffer, length)
113}
114
115/// Apply a bitwise operation `op` to two inputs and fold the result.
116pub fn binary_fold<B, F, R>(lhs: &Bitmap, rhs: &Bitmap, op: F, init: B, fold: R) -> B
117where
118    F: Fn(u64, u64) -> B,
119    R: Fn(B, B) -> B,
120{
121    assert_eq!(lhs.len(), rhs.len());
122    let lhs_chunks = lhs.chunks();
123    let rhs_chunks = rhs.chunks();
124    let rem_lhs = lhs_chunks.remainder();
125    let rem_rhs = rhs_chunks.remainder();
126
127    let result = lhs_chunks
128        .zip(rhs_chunks)
129        .fold(init, |prev, (left, right)| fold(prev, op(left, right)));
130
131    fold(result, op(rem_lhs, rem_rhs))
132}
133
134/// Apply a bitwise operation `op` to two inputs and fold the result.
135pub fn binary_mask_fold<B, F, R>(lhs: BitMask<'_>, rhs: BitMask<'_>, op: F, init: B, fold: R) -> B
136where
137    F: Fn(u64, u64) -> B,
138    R: Fn(B, B) -> B,
139{
140    assert_eq!(lhs.len(), rhs.len());
141    let lhs_chunks = lhs.chunks();
142    let rhs_chunks = rhs.chunks();
143    let rem_lhs = lhs_chunks.remainder();
144    let rem_rhs = rhs_chunks.remainder();
145
146    let result = lhs_chunks
147        .zip(rhs_chunks)
148        .fold(init, |prev, (left, right)| fold(prev, op(left, right)));
149
150    fold(result, op(rem_lhs, rem_rhs))
151}
152
153/// Apply a bitwise operation `op` to two inputs and fold the result.
154pub fn binary_fold_mut<B, F, R>(
155    lhs: &MutableBitmap,
156    rhs: &MutableBitmap,
157    op: F,
158    init: B,
159    fold: R,
160) -> B
161where
162    F: Fn(u64, u64) -> B,
163    R: Fn(B, B) -> B,
164{
165    assert_eq!(lhs.len(), rhs.len());
166    let lhs_chunks = lhs.chunks();
167    let rhs_chunks = rhs.chunks();
168    let rem_lhs = lhs_chunks.remainder();
169    let rem_rhs = rhs_chunks.remainder();
170
171    let result = lhs_chunks
172        .zip(rhs_chunks)
173        .fold(init, |prev, (left, right)| fold(prev, op(left, right)));
174
175    fold(result, op(rem_lhs, rem_rhs))
176}
177
178fn unary_impl<F, I>(iter: I, op: F, length: usize) -> Bitmap
179where
180    I: BitChunkIterExact<u64>,
181    F: Fn(u64) -> u64,
182{
183    let rem = op(iter.remainder());
184    let buffer = chunk_iter_to_vec_and_remainder(iter.map(op), rem);
185
186    Bitmap::from_u8_vec(buffer, length)
187}
188
189/// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`].
190pub fn unary<F>(lhs: &Bitmap, op: F) -> Bitmap
191where
192    F: Fn(u64) -> u64,
193{
194    let (slice, offset, length) = lhs.as_slice();
195    if offset == 0 {
196        let iter = BitChunksExact::<u64>::new(slice, length);
197        unary_impl(iter, op, lhs.len())
198    } else {
199        let iter = lhs.chunks::<u64>();
200        unary_impl(iter, op, lhs.len())
201    }
202}
203
204// create a new [`Bitmap`] semantically equal to ``bitmap`` but with an offset equal to ``offset``
205pub(crate) fn align(bitmap: &Bitmap, new_offset: usize) -> Bitmap {
206    let length = bitmap.len();
207
208    let bitmap: Bitmap = std::iter::repeat_n(false, new_offset)
209        .chain(bitmap.iter())
210        .collect();
211
212    bitmap.sliced(new_offset, length)
213}
214
215/// Compute bitwise A AND B operation.
216pub fn and(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
217    if lhs.unset_bits() == lhs.len() || rhs.unset_bits() == rhs.len() {
218        assert_eq!(lhs.len(), rhs.len());
219        Bitmap::new_zeroed(lhs.len())
220    } else {
221        binary(lhs, rhs, |x, y| x & y)
222    }
223}
224
225/// Compute bitwise A AND NOT B operation.
226pub fn and_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
227    binary(lhs, rhs, |x, y| x & !y)
228}
229
230/// Compute bitwise A OR B operation.
231pub fn or(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
232    if lhs.unset_bits() == 0 || rhs.unset_bits() == 0 {
233        assert_eq!(lhs.len(), rhs.len());
234        let mut mutable = MutableBitmap::with_capacity(lhs.len());
235        mutable.extend_constant(lhs.len(), true);
236        mutable.into()
237    } else {
238        binary(lhs, rhs, |x, y| x | y)
239    }
240}
241
242/// Compute bitwise A OR NOT B operation.
243pub fn or_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
244    binary(lhs, rhs, |x, y| x | !y)
245}
246
247/// Compute bitwise XOR operation.
248pub fn xor(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
249    let lhs_nulls = lhs.unset_bits();
250    let rhs_nulls = rhs.unset_bits();
251
252    // all false or all true
253    if lhs_nulls == rhs_nulls && rhs_nulls == rhs.len() || lhs_nulls == 0 && rhs_nulls == 0 {
254        assert_eq!(lhs.len(), rhs.len());
255        Bitmap::new_zeroed(rhs.len())
256    }
257    // all false and all true or vice versa
258    else if (lhs_nulls == 0 && rhs_nulls == rhs.len())
259        || (lhs_nulls == lhs.len() && rhs_nulls == 0)
260    {
261        assert_eq!(lhs.len(), rhs.len());
262        let mut mutable = MutableBitmap::with_capacity(lhs.len());
263        mutable.extend_constant(lhs.len(), true);
264        mutable.into()
265    } else {
266        binary(lhs, rhs, |x, y| x ^ y)
267    }
268}
269
270/// Compute bitwise equality (not XOR) operation.
271fn eq(lhs: &Bitmap, rhs: &Bitmap) -> bool {
272    if lhs.len() != rhs.len() {
273        return false;
274    }
275
276    let mut lhs_chunks = lhs.chunks::<u64>();
277    let mut rhs_chunks = rhs.chunks::<u64>();
278
279    let equal_chunks = lhs_chunks
280        .by_ref()
281        .zip(rhs_chunks.by_ref())
282        .all(|(left, right)| left == right);
283
284    if !equal_chunks {
285        return false;
286    }
287    let lhs_remainder = lhs_chunks.remainder_iter();
288    let rhs_remainder = rhs_chunks.remainder_iter();
289    lhs_remainder.zip(rhs_remainder).all(|(x, y)| x == y)
290}
291
292pub fn num_intersections_with(lhs: BitMask<'_>, rhs: BitMask<'_>) -> usize {
293    binary_mask_fold(
294        lhs,
295        rhs,
296        |lhs, rhs| (lhs & rhs).count_ones() as usize,
297        0,
298        |lhs, rhs| lhs + rhs,
299    )
300}
301
302pub fn intersects_with(lhs: &Bitmap, rhs: &Bitmap) -> bool {
303    binary_fold(
304        lhs,
305        rhs,
306        |lhs, rhs| lhs & rhs != 0,
307        false,
308        |lhs, rhs| lhs || rhs,
309    )
310}
311
312pub fn intersects_with_mut(lhs: &MutableBitmap, rhs: &MutableBitmap) -> bool {
313    binary_fold_mut(
314        lhs,
315        rhs,
316        |lhs, rhs| lhs & rhs != 0,
317        false,
318        |lhs, rhs| lhs || rhs,
319    )
320}
321
322pub fn num_edges(lhs: &Bitmap) -> usize {
323    if lhs.is_empty() {
324        return 0;
325    }
326
327    // @TODO: If is probably quite inefficient to do it like this because now either one is not
328    // aligned. Maybe, we can implement a smarter way to do this.
329    binary_fold(
330        &unsafe { lhs.clone().sliced_unchecked(0, lhs.len() - 1) },
331        &unsafe { lhs.clone().sliced_unchecked(1, lhs.len() - 1) },
332        |l, r| (l ^ r).count_ones() as usize,
333        0,
334        |acc, v| acc + v,
335    )
336}
337
338/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy }`.
339pub fn select_constant(selector: &Bitmap, truthy: &Bitmap, falsy: bool) -> Bitmap {
340    let falsy_mask: u64 = if falsy {
341        0xFFFF_FFFF_FFFF_FFFF
342    } else {
343        0x0000_0000_0000_0000
344    };
345
346    binary(selector, truthy, |s, t| (s & t) | (!s & falsy_mask))
347}
348
349/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy[i] }`.
350pub fn select(selector: &Bitmap, truthy: &Bitmap, falsy: &Bitmap) -> Bitmap {
351    ternary(selector, truthy, falsy, |s, t, f| (s & t) | (!s & f))
352}
353
354impl PartialEq for Bitmap {
355    fn eq(&self, other: &Self) -> bool {
356        eq(self, other)
357    }
358}
359
360impl<'b> BitOr<&'b Bitmap> for &Bitmap {
361    type Output = Bitmap;
362
363    fn bitor(self, rhs: &'b Bitmap) -> Bitmap {
364        or(self, rhs)
365    }
366}
367
368impl<'b> BitAnd<&'b Bitmap> for &Bitmap {
369    type Output = Bitmap;
370
371    fn bitand(self, rhs: &'b Bitmap) -> Bitmap {
372        and(self, rhs)
373    }
374}
375
376impl<'b> BitXor<&'b Bitmap> for &Bitmap {
377    type Output = Bitmap;
378
379    fn bitxor(self, rhs: &'b Bitmap) -> Bitmap {
380        xor(self, rhs)
381    }
382}
383
384impl Not for &Bitmap {
385    type Output = Bitmap;
386
387    fn not(self) -> Bitmap {
388        unary(self, |a| !a)
389    }
390}
391
392#[cfg(test)]
393mod tests {
394    use proptest::prelude::*;
395
396    use super::*;
397    use crate::bitmap::proptest::bitmap;
398
399    fn two_equal_length_bitmaps() -> impl Strategy<Value = (Bitmap, Bitmap)> {
400        (1..=250usize).prop_flat_map(|length| {
401            (
402                bitmap(length..300),
403                bitmap(length..300),
404                0..length,
405                0..length,
406            )
407                .prop_flat_map(move |(lhs, rhs, lhs_offset, rhs_offset)| {
408                    (0..usize::min(length - lhs_offset, length - rhs_offset)).prop_map(
409                        move |slice_length| {
410                            (
411                                lhs.clone().sliced(lhs_offset, slice_length),
412                                rhs.clone().sliced(rhs_offset, slice_length),
413                            )
414                        },
415                    )
416                })
417        })
418    }
419
420    proptest! {
421        #[test]
422        fn test_num_intersections_with(
423            (lhs, rhs) in two_equal_length_bitmaps()
424        ) {
425            let kernel_out = num_intersections_with(BitMask::from_bitmap(&lhs), BitMask::from_bitmap(&rhs));
426            let mut reference_out = 0;
427            for (l, r) in lhs.iter().zip(rhs.iter()) {
428                reference_out += usize::from(l & r);
429            }
430
431            prop_assert_eq!(kernel_out, reference_out);
432        }
433    }
434}