polars_arrow/bitmap/
bitmap_ops.rs

1use std::ops::{BitAnd, BitOr, BitXor, Not};
2
3use super::Bitmap;
4use super::utils::{BitChunk, BitChunkIterExact, BitChunksExact};
5use crate::bitmap::MutableBitmap;
6use crate::trusted_len::TrustedLen;
7
8#[inline(always)]
9pub(crate) fn push_bitchunk<T: BitChunk>(buffer: &mut Vec<u8>, value: T) {
10    buffer.extend(value.to_ne_bytes())
11}
12
13/// Creates a [`Vec<u8>`] from a [`TrustedLen`] of [`BitChunk`].
14pub fn chunk_iter_to_vec<T: BitChunk, I: TrustedLen<Item = T>>(iter: I) -> Vec<u8> {
15    let cap = iter.size_hint().0 * size_of::<T>();
16    let mut buffer = Vec::with_capacity(cap);
17    for v in iter {
18        push_bitchunk(&mut buffer, v)
19    }
20    buffer
21}
22
23fn chunk_iter_to_vec_and_remainder<T: BitChunk, I: TrustedLen<Item = T>>(
24    iter: I,
25    remainder: T,
26) -> Vec<u8> {
27    let cap = (iter.size_hint().0 + 1) * size_of::<T>();
28    let mut buffer = Vec::with_capacity(cap);
29    for v in iter {
30        push_bitchunk(&mut buffer, v)
31    }
32    push_bitchunk(&mut buffer, remainder);
33    debug_assert_eq!(buffer.len(), cap);
34    buffer
35}
36
37/// Apply a bitwise operation `op` to four inputs and return the result as a [`Bitmap`].
38pub fn quaternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, a4: &Bitmap, op: F) -> Bitmap
39where
40    F: Fn(u64, u64, u64, u64) -> u64,
41{
42    assert_eq!(a1.len(), a2.len());
43    assert_eq!(a1.len(), a3.len());
44    assert_eq!(a1.len(), a4.len());
45    let a1_chunks = a1.chunks();
46    let a2_chunks = a2.chunks();
47    let a3_chunks = a3.chunks();
48    let a4_chunks = a4.chunks();
49
50    let rem_a1 = a1_chunks.remainder();
51    let rem_a2 = a2_chunks.remainder();
52    let rem_a3 = a3_chunks.remainder();
53    let rem_a4 = a4_chunks.remainder();
54
55    let chunks = a1_chunks
56        .zip(a2_chunks)
57        .zip(a3_chunks)
58        .zip(a4_chunks)
59        .map(|(((a1, a2), a3), a4)| op(a1, a2, a3, a4));
60
61    let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3, rem_a4));
62    let length = a1.len();
63
64    Bitmap::from_u8_vec(buffer, length)
65}
66
67/// Apply a bitwise operation `op` to three inputs and return the result as a [`Bitmap`].
68pub fn ternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, op: F) -> Bitmap
69where
70    F: Fn(u64, u64, u64) -> u64,
71{
72    assert_eq!(a1.len(), a2.len());
73    assert_eq!(a1.len(), a3.len());
74    let a1_chunks = a1.chunks();
75    let a2_chunks = a2.chunks();
76    let a3_chunks = a3.chunks();
77
78    let rem_a1 = a1_chunks.remainder();
79    let rem_a2 = a2_chunks.remainder();
80    let rem_a3 = a3_chunks.remainder();
81
82    let chunks = a1_chunks
83        .zip(a2_chunks)
84        .zip(a3_chunks)
85        .map(|((a1, a2), a3)| op(a1, a2, a3));
86
87    let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3));
88    let length = a1.len();
89
90    Bitmap::from_u8_vec(buffer, length)
91}
92
93/// Apply a bitwise operation `op` to two inputs and return the result as a [`Bitmap`].
94pub fn binary<F>(lhs: &Bitmap, rhs: &Bitmap, op: F) -> Bitmap
95where
96    F: Fn(u64, u64) -> u64,
97{
98    assert_eq!(lhs.len(), rhs.len());
99    let lhs_chunks = lhs.chunks();
100    let rhs_chunks = rhs.chunks();
101    let rem_lhs = lhs_chunks.remainder();
102    let rem_rhs = rhs_chunks.remainder();
103
104    let chunks = lhs_chunks
105        .zip(rhs_chunks)
106        .map(|(left, right)| op(left, right));
107
108    let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_lhs, rem_rhs));
109    let length = lhs.len();
110
111    Bitmap::from_u8_vec(buffer, length)
112}
113
114/// Apply a bitwise operation `op` to two inputs and fold the result.
115pub fn binary_fold<B, F, R>(lhs: &Bitmap, rhs: &Bitmap, op: F, init: B, fold: R) -> B
116where
117    F: Fn(u64, u64) -> B,
118    R: Fn(B, B) -> B,
119{
120    assert_eq!(lhs.len(), rhs.len());
121    let lhs_chunks = lhs.chunks();
122    let rhs_chunks = rhs.chunks();
123    let rem_lhs = lhs_chunks.remainder();
124    let rem_rhs = rhs_chunks.remainder();
125
126    let result = lhs_chunks
127        .zip(rhs_chunks)
128        .fold(init, |prev, (left, right)| fold(prev, op(left, right)));
129
130    fold(result, op(rem_lhs, rem_rhs))
131}
132
133/// Apply a bitwise operation `op` to two inputs and fold the result.
134pub fn binary_fold_mut<B, F, R>(
135    lhs: &MutableBitmap,
136    rhs: &MutableBitmap,
137    op: F,
138    init: B,
139    fold: R,
140) -> B
141where
142    F: Fn(u64, u64) -> B,
143    R: Fn(B, B) -> B,
144{
145    assert_eq!(lhs.len(), rhs.len());
146    let lhs_chunks = lhs.chunks();
147    let rhs_chunks = rhs.chunks();
148    let rem_lhs = lhs_chunks.remainder();
149    let rem_rhs = rhs_chunks.remainder();
150
151    let result = lhs_chunks
152        .zip(rhs_chunks)
153        .fold(init, |prev, (left, right)| fold(prev, op(left, right)));
154
155    fold(result, op(rem_lhs, rem_rhs))
156}
157
158fn unary_impl<F, I>(iter: I, op: F, length: usize) -> Bitmap
159where
160    I: BitChunkIterExact<u64>,
161    F: Fn(u64) -> u64,
162{
163    let rem = op(iter.remainder());
164    let buffer = chunk_iter_to_vec_and_remainder(iter.map(op), rem);
165
166    Bitmap::from_u8_vec(buffer, length)
167}
168
169/// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`].
170pub fn unary<F>(lhs: &Bitmap, op: F) -> Bitmap
171where
172    F: Fn(u64) -> u64,
173{
174    let (slice, offset, length) = lhs.as_slice();
175    if offset == 0 {
176        let iter = BitChunksExact::<u64>::new(slice, length);
177        unary_impl(iter, op, lhs.len())
178    } else {
179        let iter = lhs.chunks::<u64>();
180        unary_impl(iter, op, lhs.len())
181    }
182}
183
184// create a new [`Bitmap`] semantically equal to ``bitmap`` but with an offset equal to ``offset``
185pub(crate) fn align(bitmap: &Bitmap, new_offset: usize) -> Bitmap {
186    let length = bitmap.len();
187
188    let bitmap: Bitmap = std::iter::repeat_n(false, new_offset)
189        .chain(bitmap.iter())
190        .collect();
191
192    bitmap.sliced(new_offset, length)
193}
194
195/// Compute bitwise A AND B operation.
196pub fn and(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
197    if lhs.unset_bits() == lhs.len() || rhs.unset_bits() == rhs.len() {
198        assert_eq!(lhs.len(), rhs.len());
199        Bitmap::new_zeroed(lhs.len())
200    } else {
201        binary(lhs, rhs, |x, y| x & y)
202    }
203}
204
205/// Compute bitwise A AND NOT B operation.
206pub fn and_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
207    binary(lhs, rhs, |x, y| x & !y)
208}
209
210/// Compute bitwise A OR B operation.
211pub fn or(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
212    if lhs.unset_bits() == 0 || rhs.unset_bits() == 0 {
213        assert_eq!(lhs.len(), rhs.len());
214        let mut mutable = MutableBitmap::with_capacity(lhs.len());
215        mutable.extend_constant(lhs.len(), true);
216        mutable.into()
217    } else {
218        binary(lhs, rhs, |x, y| x | y)
219    }
220}
221
222/// Compute bitwise A OR NOT B operation.
223pub fn or_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
224    binary(lhs, rhs, |x, y| x | !y)
225}
226
227/// Compute bitwise XOR operation.
228pub fn xor(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
229    let lhs_nulls = lhs.unset_bits();
230    let rhs_nulls = rhs.unset_bits();
231
232    // all false or all true
233    if lhs_nulls == rhs_nulls && rhs_nulls == rhs.len() || lhs_nulls == 0 && rhs_nulls == 0 {
234        assert_eq!(lhs.len(), rhs.len());
235        Bitmap::new_zeroed(rhs.len())
236    }
237    // all false and all true or vice versa
238    else if (lhs_nulls == 0 && rhs_nulls == rhs.len())
239        || (lhs_nulls == lhs.len() && rhs_nulls == 0)
240    {
241        assert_eq!(lhs.len(), rhs.len());
242        let mut mutable = MutableBitmap::with_capacity(lhs.len());
243        mutable.extend_constant(lhs.len(), true);
244        mutable.into()
245    } else {
246        binary(lhs, rhs, |x, y| x ^ y)
247    }
248}
249
250/// Compute bitwise equality (not XOR) operation.
251fn eq(lhs: &Bitmap, rhs: &Bitmap) -> bool {
252    if lhs.len() != rhs.len() {
253        return false;
254    }
255
256    let mut lhs_chunks = lhs.chunks::<u64>();
257    let mut rhs_chunks = rhs.chunks::<u64>();
258
259    let equal_chunks = lhs_chunks
260        .by_ref()
261        .zip(rhs_chunks.by_ref())
262        .all(|(left, right)| left == right);
263
264    if !equal_chunks {
265        return false;
266    }
267    let lhs_remainder = lhs_chunks.remainder_iter();
268    let rhs_remainder = rhs_chunks.remainder_iter();
269    lhs_remainder.zip(rhs_remainder).all(|(x, y)| x == y)
270}
271
272pub fn num_intersections_with(lhs: &Bitmap, rhs: &Bitmap) -> usize {
273    binary_fold(
274        lhs,
275        rhs,
276        |lhs, rhs| (lhs & rhs).count_ones() as usize,
277        0,
278        |lhs, rhs| lhs + rhs,
279    )
280}
281
282pub fn intersects_with_mut(lhs: &MutableBitmap, rhs: &MutableBitmap) -> bool {
283    binary_fold_mut(
284        lhs,
285        rhs,
286        |lhs, rhs| lhs & rhs != 0,
287        false,
288        |lhs, rhs| lhs || rhs,
289    )
290}
291
292pub fn num_edges(lhs: &Bitmap) -> usize {
293    if lhs.is_empty() {
294        return 0;
295    }
296
297    // @TODO: If is probably quite inefficient to do it like this because now either one is not
298    // aligned. Maybe, we can implement a smarter way to do this.
299    binary_fold(
300        &unsafe { lhs.clone().sliced_unchecked(0, lhs.len() - 1) },
301        &unsafe { lhs.clone().sliced_unchecked(1, lhs.len() - 1) },
302        |l, r| (l ^ r).count_ones() as usize,
303        0,
304        |acc, v| acc + v,
305    )
306}
307
308/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy }`.
309pub fn select_constant(selector: &Bitmap, truthy: &Bitmap, falsy: bool) -> Bitmap {
310    let falsy_mask: u64 = if falsy {
311        0xFFFF_FFFF_FFFF_FFFF
312    } else {
313        0x0000_0000_0000_0000
314    };
315
316    binary(selector, truthy, |s, t| (s & t) | (!s & falsy_mask))
317}
318
319/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy[i] }`.
320pub fn select(selector: &Bitmap, truthy: &Bitmap, falsy: &Bitmap) -> Bitmap {
321    ternary(selector, truthy, falsy, |s, t, f| (s & t) | (!s & f))
322}
323
324impl PartialEq for Bitmap {
325    fn eq(&self, other: &Self) -> bool {
326        eq(self, other)
327    }
328}
329
330impl<'b> BitOr<&'b Bitmap> for &Bitmap {
331    type Output = Bitmap;
332
333    fn bitor(self, rhs: &'b Bitmap) -> Bitmap {
334        or(self, rhs)
335    }
336}
337
338impl<'b> BitAnd<&'b Bitmap> for &Bitmap {
339    type Output = Bitmap;
340
341    fn bitand(self, rhs: &'b Bitmap) -> Bitmap {
342        and(self, rhs)
343    }
344}
345
346impl<'b> BitXor<&'b Bitmap> for &Bitmap {
347    type Output = Bitmap;
348
349    fn bitxor(self, rhs: &'b Bitmap) -> Bitmap {
350        xor(self, rhs)
351    }
352}
353
354impl Not for &Bitmap {
355    type Output = Bitmap;
356
357    fn not(self) -> Bitmap {
358        unary(self, |a| !a)
359    }
360}
361
362#[cfg(test)]
363mod tests {
364    use proptest::prelude::*;
365
366    use super::*;
367    use crate::bitmap::proptest::bitmap;
368
369    fn two_equal_length_bitmaps() -> impl Strategy<Value = (Bitmap, Bitmap)> {
370        (1..=250usize).prop_flat_map(|length| {
371            (
372                bitmap(length..300),
373                bitmap(length..300),
374                0..length,
375                0..length,
376            )
377                .prop_flat_map(move |(lhs, rhs, lhs_offset, rhs_offset)| {
378                    (0..usize::min(length - lhs_offset, length - rhs_offset)).prop_map(
379                        move |slice_length| {
380                            (
381                                lhs.clone().sliced(lhs_offset, slice_length),
382                                rhs.clone().sliced(rhs_offset, slice_length),
383                            )
384                        },
385                    )
386                })
387        })
388    }
389
390    proptest! {
391        #[test]
392        fn test_num_intersections_with(
393            (lhs, rhs) in two_equal_length_bitmaps()
394        ) {
395            let kernel_out = num_intersections_with(&lhs, &rhs);
396            let mut reference_out = 0;
397            for (l, r) in lhs.iter().zip(rhs.iter()) {
398                reference_out += usize::from(l & r);
399            }
400
401            prop_assert_eq!(kernel_out, reference_out);
402        }
403    }
404}