polars_compute/unique/
boolean.rs1use arrow::array::{Array, BooleanArray};
2use arrow::bitmap::BitmapBuilder;
3use arrow::datatypes::ArrowDataType;
4
5use super::{GenericUniqueKernel, RangedUniqueKernel};
6
7#[derive(Default, Clone)]
8pub struct BooleanUniqueKernelState {
9 seen: u32,
10}
11
12impl BooleanUniqueKernelState {
13 pub fn new() -> Self {
14 Self::default()
15 }
16}
17
18impl RangedUniqueKernel for BooleanUniqueKernelState {
19 type Array = BooleanArray;
20
21 fn has_seen_all(&self) -> bool {
22 self.seen == 0b111
23 }
24
25 fn append(&mut self, array: &Self::Array) {
26 if array.len() == 0 {
27 return;
28 }
29
30 let null_count = array.null_count();
31 self.seen |= u32::from(null_count > 0) << 2;
32 let num_trues = if null_count > 0 {
33 array
34 .values()
35 .num_intersections_with(array.validity().unwrap())
36 } else {
37 array.values().set_bits()
38 };
39
40 self.seen |= u32::from(num_trues != array.len() - null_count);
41 self.seen |= u32::from(num_trues != 0) << 1;
42 }
43
44 fn append_state(&mut self, other: &Self) {
45 self.seen |= other.seen;
46 }
47
48 fn finalize_unique(self) -> Self::Array {
49 let mut values = BitmapBuilder::with_capacity(self.seen.count_ones() as usize);
50
51 if self.seen & 0b001 != 0 {
52 values.push(false);
53 }
54 if self.seen & 0b010 != 0 {
55 values.push(true);
56 }
57 let validity = if self.seen & 0b100 != 0 {
58 let mut validity = BitmapBuilder::with_capacity(values.len() + 1);
59 validity.extend_constant(values.len(), true);
60 validity.push(false);
61 values.push(false);
62 Some(validity.freeze())
63 } else {
64 None
65 };
66
67 let values = values.freeze();
68 BooleanArray::new(ArrowDataType::Boolean, values, validity)
69 }
70
71 fn finalize_n_unique(&self) -> usize {
72 self.seen.count_ones() as usize
73 }
74
75 fn finalize_n_unique_non_null(&self) -> usize {
76 (self.seen & 0b011).count_ones() as usize
77 }
78}
79
80impl GenericUniqueKernel for BooleanArray {
81 fn unique(&self) -> Self {
82 let mut state = BooleanUniqueKernelState::new();
83 state.append(self);
84 state.finalize_unique()
85 }
86
87 fn n_unique(&self) -> usize {
88 let mut state = BooleanUniqueKernelState::new();
89 state.append(self);
90 state.finalize_n_unique()
91 }
92
93 fn n_unique_non_null(&self) -> usize {
94 let mut state = BooleanUniqueKernelState::new();
95 state.append(self);
96 state.finalize_n_unique_non_null()
97 }
98}
99
100#[cfg(test)]
101mod tests {
102 use arrow::array::{BooleanArray, MutableBooleanArray, boolean_array};
103 use proptest::prelude::*;
104
105 use super::*;
106
107 #[test]
108 fn test_boolean_distinct_count() {
109 use arrow::bitmap::Bitmap;
110 use arrow::datatypes::ArrowDataType;
111
112 macro_rules! assert_bool_dc {
113 ($values:expr, $validity:expr => $dc:expr) => {
114 let validity: Option<Bitmap> =
115 <Option<Vec<bool>>>::map($validity, |v| Bitmap::from_iter(v));
116 let arr =
117 BooleanArray::new(ArrowDataType::Boolean, Bitmap::from_iter($values), validity);
118 assert_eq!(arr.n_unique(), $dc);
119 };
120 }
121
122 assert_bool_dc!(vec![], None => 0);
123 assert_bool_dc!(vec![], Some(vec![]) => 0);
124 assert_bool_dc!(vec![true], None => 1);
125 assert_bool_dc!(vec![true], Some(vec![true]) => 1);
126 assert_bool_dc!(vec![true], Some(vec![false]) => 1);
127 assert_bool_dc!(vec![true, false], None => 2);
128 assert_bool_dc!(vec![true, false, false], None => 2);
129 assert_bool_dc!(vec![true, false, false], Some(vec![true, true, false]) => 3);
130
131 assert_bool_dc!(vec![true, true, true, true, true], Some(vec![true, false, true, false, false]) => 2);
133 assert_bool_dc!(vec![false, true, false, true, true], Some(vec![true, false, true, false, false]) => 2);
134 assert_bool_dc!(vec![true, false, true, false, true, true], Some(vec![true, true, false, true, false, false]) => 3);
135 }
136
137 proptest! {
138 #[test]
139 fn test_proptest(array in boolean_array(0..100)) {
140 let mut state = BooleanUniqueKernelState::new();
141 state.append(&array);
142
143 let mut has_none = false;
144 let mut has_false = false;
145 let mut has_true = false;
146 for v in array.iter() {
147 match v {
148 None => has_none |= true,
149 Some(false) => has_false |= true,
150 Some(true) => has_true |= true,
151 }
152 }
153
154 let mut unique = MutableBooleanArray::new();
155 if has_false {
156 unique.push(Some(false));
157 }
158 if has_true {
159 unique.push(Some(true));
160 }
161 if has_none {
162 unique.push(None);
163 }
164 let unique = unique.freeze();
165
166 assert_eq!(state.clone().finalize_unique(), unique);
167 assert_eq!(state.clone().finalize_n_unique(), unique.len());
168 assert_eq!(state.clone().finalize_n_unique_non_null(), unique.len() - usize::from(has_none));
169 }
170 }
171}