Skip to main content

vortex_compressor/stats/
bool.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Bool compression statistics.
5
6use vortex_array::LEGACY_SESSION;
7use vortex_array::VortexSessionExecute;
8use vortex_array::arrays::BoolArray;
9use vortex_array::arrays::bool::BoolArrayExt;
10use vortex_error::VortexResult;
11use vortex_mask::AllOr;
12
13/// Array of booleans and relevant stats for compression.
14#[derive(Clone, Debug)]
15pub struct BoolStats {
16    /// Number of null values.
17    null_count: u32,
18    /// Number of non-null values.
19    value_count: u32,
20    /// Number of `true` values among valid (non-null) elements.
21    true_count: u32,
22}
23
24impl BoolStats {
25    /// Generates stats, returning an error on failure.
26    ///
27    /// # Errors
28    ///
29    /// Returns an error if getting validity mask fails or values exceed `u32` bounds.
30    pub fn generate(input: &BoolArray) -> VortexResult<Self> {
31        if input.is_empty() {
32            return Ok(Self {
33                null_count: 0,
34                value_count: 0,
35                true_count: 0,
36            });
37        }
38
39        let mut ctx = LEGACY_SESSION.create_execution_ctx();
40        if input.all_invalid(&mut ctx)? {
41            return Ok(Self {
42                null_count: u32::try_from(input.len())?,
43                value_count: 0,
44                true_count: 0,
45            });
46        }
47
48        let validity = input
49            .as_ref()
50            .validity()?
51            .to_mask(input.as_ref().len(), &mut ctx)?;
52        let null_count = validity.false_count();
53        let value_count = validity.true_count();
54
55        let bits = input.to_bit_buffer();
56
57        // Count how many true values exist among valid elements.
58        let true_count = match validity.bit_buffer() {
59            AllOr::All => bits.true_count(),
60            AllOr::None => unreachable!("all-invalid handled above"),
61            AllOr::Some(v) => {
62                // AND the bits with validity to only count valid trues.
63                (&bits & v).true_count()
64            }
65        };
66
67        Ok(Self {
68            null_count: u32::try_from(null_count)?,
69            value_count: u32::try_from(value_count)?,
70            true_count: u32::try_from(true_count)?,
71        })
72    }
73
74    /// Returns the number of null values.
75    pub fn null_count(&self) -> u32 {
76        self.null_count
77    }
78
79    /// Returns the number of non-null values.
80    pub fn value_count(&self) -> u32 {
81        self.value_count
82    }
83
84    /// Returns the number of `true` values among valid elements.
85    pub fn true_count(&self) -> u32 {
86        self.true_count
87    }
88
89    /// Returns `true` if all valid values are the same (all-true or all-false).
90    pub fn is_constant(&self) -> bool {
91        self.value_count > 0 && (self.true_count == 0 || self.true_count == self.value_count)
92    }
93}
94
95#[cfg(test)]
96mod tests {
97    use vortex_array::arrays::BoolArray;
98    use vortex_array::validity::Validity;
99    use vortex_buffer::BitBuffer;
100    use vortex_error::VortexResult;
101
102    use super::BoolStats;
103
104    #[test]
105    fn test_all_true() -> VortexResult<()> {
106        let array = BoolArray::new(
107            BitBuffer::from(vec![true, true, true]),
108            Validity::NonNullable,
109        );
110        let stats = BoolStats::generate(&array)?;
111        assert_eq!(stats.value_count, 3);
112        assert_eq!(stats.null_count, 0);
113        assert_eq!(stats.true_count, 3);
114        assert!(stats.is_constant());
115        Ok(())
116    }
117
118    #[test]
119    fn test_all_false() -> VortexResult<()> {
120        let array = BoolArray::new(
121            BitBuffer::from(vec![false, false, false]),
122            Validity::NonNullable,
123        );
124        let stats = BoolStats::generate(&array)?;
125        assert_eq!(stats.value_count, 3);
126        assert_eq!(stats.null_count, 0);
127        assert_eq!(stats.true_count, 0);
128        assert!(stats.is_constant());
129        Ok(())
130    }
131
132    #[test]
133    fn test_mixed() -> VortexResult<()> {
134        let array = BoolArray::new(
135            BitBuffer::from(vec![true, false, true]),
136            Validity::NonNullable,
137        );
138        let stats = BoolStats::generate(&array)?;
139        assert_eq!(stats.value_count, 3);
140        assert_eq!(stats.null_count, 0);
141        assert_eq!(stats.true_count, 2);
142        assert!(!stats.is_constant());
143        Ok(())
144    }
145
146    #[test]
147    fn test_with_nulls() -> VortexResult<()> {
148        let array = BoolArray::new(
149            BitBuffer::from(vec![true, false, true]),
150            Validity::from_iter([true, false, true]),
151        );
152        let stats = BoolStats::generate(&array)?;
153        assert_eq!(stats.value_count, 2);
154        assert_eq!(stats.null_count, 1);
155        assert_eq!(stats.true_count, 2);
156        assert!(stats.is_constant());
157        Ok(())
158    }
159}