polars_compute/unique/
boolean.rs

1use arrow::array::{Array, BooleanArray};
2use arrow::bitmap::BitmapBuilder;
3use arrow::datatypes::ArrowDataType;
4
5use super::{GenericUniqueKernel, RangedUniqueKernel};
6
7#[derive(Default, Clone)]
8pub struct BooleanUniqueKernelState {
9    seen: u32,
10}
11
12impl BooleanUniqueKernelState {
13    pub fn new() -> Self {
14        Self::default()
15    }
16}
17
18impl RangedUniqueKernel for BooleanUniqueKernelState {
19    type Array = BooleanArray;
20
21    fn has_seen_all(&self) -> bool {
22        self.seen == 0b111
23    }
24
25    fn append(&mut self, array: &Self::Array) {
26        if array.len() == 0 {
27            return;
28        }
29
30        let null_count = array.null_count();
31        self.seen |= u32::from(null_count > 0) << 2;
32        let num_trues = if null_count > 0 {
33            array
34                .values()
35                .num_intersections_with(array.validity().unwrap())
36        } else {
37            array.values().set_bits()
38        };
39
40        self.seen |= u32::from(num_trues != array.len() - null_count);
41        self.seen |= u32::from(num_trues != 0) << 1;
42    }
43
44    fn append_state(&mut self, other: &Self) {
45        self.seen |= other.seen;
46    }
47
48    fn finalize_unique(self) -> Self::Array {
49        let mut values = BitmapBuilder::with_capacity(self.seen.count_ones() as usize);
50
51        if self.seen & 0b001 != 0 {
52            values.push(false);
53        }
54        if self.seen & 0b010 != 0 {
55            values.push(true);
56        }
57        let validity = if self.seen & 0b100 != 0 {
58            let mut validity = BitmapBuilder::with_capacity(values.len() + 1);
59            validity.extend_constant(values.len(), true);
60            validity.push(false);
61            values.push(false);
62            Some(validity.freeze())
63        } else {
64            None
65        };
66
67        let values = values.freeze();
68        BooleanArray::new(ArrowDataType::Boolean, values, validity)
69    }
70
71    fn finalize_n_unique(&self) -> usize {
72        self.seen.count_ones() as usize
73    }
74
75    fn finalize_n_unique_non_null(&self) -> usize {
76        (self.seen & 0b011).count_ones() as usize
77    }
78}
79
80impl GenericUniqueKernel for BooleanArray {
81    fn unique(&self) -> Self {
82        let mut state = BooleanUniqueKernelState::new();
83        state.append(self);
84        state.finalize_unique()
85    }
86
87    fn n_unique(&self) -> usize {
88        let mut state = BooleanUniqueKernelState::new();
89        state.append(self);
90        state.finalize_n_unique()
91    }
92
93    fn n_unique_non_null(&self) -> usize {
94        let mut state = BooleanUniqueKernelState::new();
95        state.append(self);
96        state.finalize_n_unique_non_null()
97    }
98}
99
100#[cfg(test)]
101mod tests {
102    use arrow::array::{BooleanArray, MutableBooleanArray, boolean_array};
103    use proptest::prelude::*;
104
105    use super::*;
106
107    #[test]
108    fn test_boolean_distinct_count() {
109        use arrow::bitmap::Bitmap;
110        use arrow::datatypes::ArrowDataType;
111
112        macro_rules! assert_bool_dc {
113            ($values:expr, $validity:expr => $dc:expr) => {
114                let validity: Option<Bitmap> =
115                    <Option<Vec<bool>>>::map($validity, |v| Bitmap::from_iter(v));
116                let arr =
117                    BooleanArray::new(ArrowDataType::Boolean, Bitmap::from_iter($values), validity);
118                assert_eq!(arr.n_unique(), $dc);
119            };
120        }
121
122        assert_bool_dc!(vec![], None => 0);
123        assert_bool_dc!(vec![], Some(vec![]) => 0);
124        assert_bool_dc!(vec![true], None => 1);
125        assert_bool_dc!(vec![true], Some(vec![true]) => 1);
126        assert_bool_dc!(vec![true], Some(vec![false]) => 1);
127        assert_bool_dc!(vec![true, false], None => 2);
128        assert_bool_dc!(vec![true, false, false], None => 2);
129        assert_bool_dc!(vec![true, false, false], Some(vec![true, true, false]) => 3);
130
131        // Copied from https://github.com/pola-rs/polars/pull/16765#discussion_r1629426159
132        assert_bool_dc!(vec![true, true, true, true, true], Some(vec![true, false, true, false, false]) => 2);
133        assert_bool_dc!(vec![false, true, false, true, true], Some(vec![true, false, true, false, false]) => 2);
134        assert_bool_dc!(vec![true, false, true, false, true, true], Some(vec![true, true, false, true, false, false]) => 3);
135    }
136
137    proptest! {
138        #[test]
139        fn test_proptest(array in boolean_array(0..100)) {
140            let mut state = BooleanUniqueKernelState::new();
141            state.append(&array);
142
143            let mut has_none = false;
144            let mut has_false = false;
145            let mut has_true = false;
146            for v in array.iter() {
147                match v {
148                    None => has_none |= true,
149                    Some(false) => has_false |= true,
150                    Some(true) => has_true |= true,
151                }
152            }
153
154            let mut unique = MutableBooleanArray::new();
155            if has_false {
156                unique.push(Some(false));
157            }
158            if has_true {
159                unique.push(Some(true));
160            }
161            if has_none {
162                unique.push(None);
163            }
164            let unique = unique.freeze();
165
166            assert_eq!(state.clone().finalize_unique(), unique);
167            assert_eq!(state.clone().finalize_n_unique(), unique.len());
168            assert_eq!(state.clone().finalize_n_unique_non_null(), unique.len() - usize::from(has_none));
169        }
170    }
171}