Skip to main content

vortex_compressor/stats/
bool.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Bool compression statistics.
5
6use vortex_array::arrays::BoolArray;
7use vortex_array::arrays::bool::BoolArrayExt;
8use vortex_error::VortexResult;
9use vortex_mask::AllOr;
10
11/// Array of booleans and relevant stats for compression.
12#[derive(Clone, Debug)]
13pub struct BoolStats {
14    /// Number of null values.
15    null_count: u32,
16    /// Number of non-null values.
17    value_count: u32,
18    /// Number of `true` values among valid (non-null) elements.
19    true_count: u32,
20}
21
22impl BoolStats {
23    /// Generates stats, returning an error on failure.
24    ///
25    /// # Errors
26    ///
27    /// Returns an error if getting validity mask fails or values exceed `u32` bounds.
28    pub fn generate(input: &BoolArray) -> VortexResult<Self> {
29        if input.is_empty() {
30            return Ok(Self {
31                null_count: 0,
32                value_count: 0,
33                true_count: 0,
34            });
35        }
36
37        if input.all_invalid()? {
38            return Ok(Self {
39                null_count: u32::try_from(input.len())?,
40                value_count: 0,
41                true_count: 0,
42            });
43        }
44
45        let validity = input.validity_mask()?;
46        let null_count = validity.false_count();
47        let value_count = validity.true_count();
48
49        let bits = input.to_bit_buffer();
50
51        // Count how many true values exist among valid elements.
52        let true_count = match validity.bit_buffer() {
53            AllOr::All => bits.true_count(),
54            AllOr::None => unreachable!("all-invalid handled above"),
55            AllOr::Some(v) => {
56                // AND the bits with validity to only count valid trues.
57                (&bits & v).true_count()
58            }
59        };
60
61        Ok(Self {
62            null_count: u32::try_from(null_count)?,
63            value_count: u32::try_from(value_count)?,
64            true_count: u32::try_from(true_count)?,
65        })
66    }
67
68    /// Returns the number of null values.
69    pub fn null_count(&self) -> u32 {
70        self.null_count
71    }
72
73    /// Returns the number of non-null values.
74    pub fn value_count(&self) -> u32 {
75        self.value_count
76    }
77
78    /// Returns the number of `true` values among valid elements.
79    pub fn true_count(&self) -> u32 {
80        self.true_count
81    }
82
83    /// Returns `true` if all valid values are the same (all-true or all-false).
84    pub fn is_constant(&self) -> bool {
85        self.value_count > 0 && (self.true_count == 0 || self.true_count == self.value_count)
86    }
87}
88
89#[cfg(test)]
90mod tests {
91    use vortex_array::arrays::BoolArray;
92    use vortex_array::validity::Validity;
93    use vortex_buffer::BitBuffer;
94    use vortex_error::VortexResult;
95
96    use super::BoolStats;
97
98    #[test]
99    fn test_all_true() -> VortexResult<()> {
100        let array = BoolArray::new(
101            BitBuffer::from(vec![true, true, true]),
102            Validity::NonNullable,
103        );
104        let stats = BoolStats::generate(&array)?;
105        assert_eq!(stats.value_count, 3);
106        assert_eq!(stats.null_count, 0);
107        assert_eq!(stats.true_count, 3);
108        assert!(stats.is_constant());
109        Ok(())
110    }
111
112    #[test]
113    fn test_all_false() -> VortexResult<()> {
114        let array = BoolArray::new(
115            BitBuffer::from(vec![false, false, false]),
116            Validity::NonNullable,
117        );
118        let stats = BoolStats::generate(&array)?;
119        assert_eq!(stats.value_count, 3);
120        assert_eq!(stats.null_count, 0);
121        assert_eq!(stats.true_count, 0);
122        assert!(stats.is_constant());
123        Ok(())
124    }
125
126    #[test]
127    fn test_mixed() -> VortexResult<()> {
128        let array = BoolArray::new(
129            BitBuffer::from(vec![true, false, true]),
130            Validity::NonNullable,
131        );
132        let stats = BoolStats::generate(&array)?;
133        assert_eq!(stats.value_count, 3);
134        assert_eq!(stats.null_count, 0);
135        assert_eq!(stats.true_count, 2);
136        assert!(!stats.is_constant());
137        Ok(())
138    }
139
140    #[test]
141    fn test_with_nulls() -> VortexResult<()> {
142        let array = BoolArray::new(
143            BitBuffer::from(vec![true, false, true]),
144            Validity::from_iter([true, false, true]),
145        );
146        let stats = BoolStats::generate(&array)?;
147        assert_eq!(stats.value_count, 2);
148        assert_eq!(stats.null_count, 1);
149        assert_eq!(stats.true_count, 2);
150        assert!(stats.is_constant());
151        Ok(())
152    }
153}