vortex_mask/
intersect_by_rank.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use crate::{AllOr, Mask};
5
6impl Mask {
7    /// Take the intersection of the `mask` with the set of true values in `self`.
8    ///
9    /// We are more interested in low selectivity `self` (as indices) with a boolean buffer mask,
10    /// so we don't optimize for other cases, yet.
11    ///
12    /// Note: we might be able to accelerate this function on x86 with BMI, see:
13    /// <https://www.microsoft.com/en-us/research/uploads/prod/2023/06/parquet-select-sigmod23.pdf>
14    ///
15    /// # Examples
16    ///
17    /// Keep the third and fifth set values from mask `m1`:
18    /// ```
19    /// use vortex_mask::Mask;
20    ///
21    /// let m1 = Mask::from_iter([true, false, false, true, true, true, false, true]);
22    /// let m2 = Mask::from_iter([false, false, true, false, true]);
23    /// assert_eq!(
24    ///     m1.intersect_by_rank(&m2),
25    ///     Mask::from_iter([false, false, false, false, true, false, false, true])
26    /// );
27    /// ```
28    pub fn intersect_by_rank(&self, mask: &Mask) -> Mask {
29        assert_eq!(self.true_count(), mask.len());
30
31        match (self.indices(), mask.indices()) {
32            (AllOr::All, _) => mask.clone(),
33            (_, AllOr::All) => self.clone(),
34            (AllOr::None, _) => Self::new_false(0),
35            (_, AllOr::None) => Self::new_false(self.len()),
36            (AllOr::Some(self_indices), AllOr::Some(mask_indices)) => {
37                Self::from_indices(
38                    self.len(),
39                    mask_indices
40                        .iter()
41                        .map(|idx|
42                            // This is verified as safe because we know that the indices are less than the
43                            // mask.len() and we known mask.len() <= self.len(),
44                            // implied by `self.true_count() == mask.len()`.
45                            unsafe{*self_indices.get_unchecked(*idx)})
46                        .collect(),
47                )
48            }
49        }
50    }
51}
52
53#[cfg(test)]
54mod test {
55    use arrow_buffer::BooleanBuffer;
56
57    use crate::Mask;
58
59    #[test]
60    fn mask_bitand_all_as_bit_and() {
61        let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, true, true, true, true]));
62        let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![
63            false, true, false, true, true,
64        ]));
65        assert_eq!(
66            this.intersect_by_rank(&mask),
67            Mask::from_indices(5, vec![1, 3, 4])
68        );
69    }
70
71    #[test]
72    fn mask_bitand_all_true() {
73        let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![
74            false, false, true, true, true,
75        ]));
76        let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, true, true]));
77        assert_eq!(
78            this.intersect_by_rank(&mask),
79            Mask::from_indices(5, vec![2, 3, 4])
80        );
81    }
82
83    #[test]
84    fn mask_bitand_true() {
85        let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![
86            true, false, false, true, true,
87        ]));
88        let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, false, true]));
89        assert_eq!(
90            this.intersect_by_rank(&mask),
91            Mask::from_indices(5, vec![0, 4])
92        );
93    }
94
95    #[test]
96    fn mask_bitand_false() {
97        let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![
98            true, false, false, true, true,
99        ]));
100        let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![false, false, false]));
101        assert_eq!(this.intersect_by_rank(&mask), Mask::from_indices(5, vec![]));
102    }
103}