vortex_mask/
intersect_by_rank.rs

1use crate::{AllOr, Mask};
2
3impl Mask {
4    /// Take the intersection of the `mask` with the set of true values in `self`.
5    ///
6    /// We are more interested in low selectivity `self` (as indices) with a boolean buffer mask,
7    /// so we don't optimize for other cases, yet.
8    ///
9    /// Note: we might be able to accelerate this function on x86 with BMI, see:
10    /// <https://www.microsoft.com/en-us/research/uploads/prod/2023/06/parquet-select-sigmod23.pdf>
11    ///
12    /// # Examples
13    ///
14    /// Keep the third and fifth set values from mask `m1`:
15    /// ```
16    /// use vortex_mask::Mask;
17    ///
18    /// let m1 = Mask::from_iter([true, false, false, true, true, true, false, true]);
19    /// let m2 = Mask::from_iter([false, false, true, false, true]);
20    /// assert_eq!(
21    ///     m1.intersect_by_rank(&m2),
22    ///     Mask::from_iter([false, false, false, false, true, false, false, true])
23    /// );
24    /// ```
25    pub fn intersect_by_rank(&self, mask: &Mask) -> Mask {
26        assert_eq!(self.true_count(), mask.len());
27
28        match (self.indices(), mask.indices()) {
29            (AllOr::All, _) => mask.clone(),
30            (_, AllOr::All) => self.clone(),
31            (AllOr::None, _) => Self::new_false(0),
32            (_, AllOr::None) => Self::new_false(self.len()),
33            (AllOr::Some(self_indices), AllOr::Some(mask_indices)) => {
34                Self::from_indices(
35                    self.len(),
36                    mask_indices
37                        .iter()
38                        .map(|idx|
39                            // This is verified as safe because we know that the indices are less than the
40                            // mask.len() and we known mask.len() <= self.len(),
41                            // implied by `self.true_count() == mask.len()`.
42                            unsafe{*self_indices.get_unchecked(*idx)})
43                        .collect(),
44                )
45            }
46        }
47    }
48}
49
50#[cfg(test)]
51mod test {
52    use arrow_buffer::BooleanBuffer;
53
54    use crate::Mask;
55
56    #[test]
57    fn mask_bitand_all_as_bit_and() {
58        let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, true, true, true, true]));
59        let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![
60            false, true, false, true, true,
61        ]));
62        assert_eq!(
63            this.intersect_by_rank(&mask),
64            Mask::from_indices(5, vec![1, 3, 4])
65        );
66    }
67
68    #[test]
69    fn mask_bitand_all_true() {
70        let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![
71            false, false, true, true, true,
72        ]));
73        let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, true, true]));
74        assert_eq!(
75            this.intersect_by_rank(&mask),
76            Mask::from_indices(5, vec![2, 3, 4])
77        );
78    }
79
80    #[test]
81    fn mask_bitand_true() {
82        let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![
83            true, false, false, true, true,
84        ]));
85        let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![true, false, true]));
86        assert_eq!(
87            this.intersect_by_rank(&mask),
88            Mask::from_indices(5, vec![0, 4])
89        );
90    }
91
92    #[test]
93    fn mask_bitand_false() {
94        let this = Mask::from_buffer(BooleanBuffer::from_iter(vec![
95            true, false, false, true, true,
96        ]));
97        let mask = Mask::from_buffer(BooleanBuffer::from_iter(vec![false, false, false]));
98        assert_eq!(this.intersect_by_rank(&mask), Mask::from_indices(5, vec![]));
99    }
100}