Skip to main content

vortex_compressor/stats/
bool.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Bool compression statistics.
5
6use vortex_array::ExecutionCtx;
7use vortex_array::arrays::BoolArray;
8use vortex_array::arrays::bool::BoolArrayExt;
9use vortex_error::VortexResult;
10use vortex_mask::AllOr;
11
12/// Array of booleans and relevant stats for compression.
13#[derive(Clone, Debug)]
14pub struct BoolStats {
15    /// Number of null values.
16    null_count: u32,
17    /// Number of non-null values.
18    value_count: u32,
19    /// Number of `true` values among valid (non-null) elements.
20    true_count: u32,
21}
22
23impl BoolStats {
24    /// Generates stats, returning an error on failure.
25    ///
26    /// # Errors
27    ///
28    /// Returns an error if getting validity mask fails or values exceed `u32` bounds.
29    pub fn generate(input: &BoolArray, ctx: &mut ExecutionCtx) -> VortexResult<Self> {
30        if input.is_empty() {
31            return Ok(Self {
32                null_count: 0,
33                value_count: 0,
34                true_count: 0,
35            });
36        }
37
38        if input.all_invalid(ctx)? {
39            return Ok(Self {
40                null_count: u32::try_from(input.len())?,
41                value_count: 0,
42                true_count: 0,
43            });
44        }
45
46        let validity = input
47            .as_ref()
48            .validity()?
49            .execute_mask(input.as_ref().len(), ctx)?;
50        let null_count = validity.false_count();
51        let value_count = validity.true_count();
52
53        let bits = input.to_bit_buffer();
54
55        // Count how many true values exist among valid elements.
56        let true_count = match validity.bit_buffer() {
57            AllOr::All => bits.true_count(),
58            AllOr::None => unreachable!("all-invalid handled above"),
59            AllOr::Some(v) => {
60                // AND the bits with validity to only count valid trues.
61                (&bits & v).true_count()
62            }
63        };
64
65        Ok(Self {
66            null_count: u32::try_from(null_count)?,
67            value_count: u32::try_from(value_count)?,
68            true_count: u32::try_from(true_count)?,
69        })
70    }
71
72    /// Returns the number of null values.
73    pub fn null_count(&self) -> u32 {
74        self.null_count
75    }
76
77    /// Returns the number of non-null values.
78    pub fn value_count(&self) -> u32 {
79        self.value_count
80    }
81
82    /// Returns the number of `true` values among valid elements.
83    pub fn true_count(&self) -> u32 {
84        self.true_count
85    }
86
87    /// Returns `true` if all valid values are the same (all-true or all-false).
88    pub fn is_constant(&self) -> bool {
89        self.value_count > 0 && (self.true_count == 0 || self.true_count == self.value_count)
90    }
91}
92
93#[cfg(test)]
94mod tests {
95    use vortex_array::LEGACY_SESSION;
96    use vortex_array::VortexSessionExecute;
97    use vortex_array::arrays::BoolArray;
98    use vortex_array::validity::Validity;
99    use vortex_buffer::BitBuffer;
100    use vortex_error::VortexResult;
101
102    use super::BoolStats;
103
104    #[test]
105    fn test_all_true() -> VortexResult<()> {
106        let mut ctx = LEGACY_SESSION.create_execution_ctx();
107        let array = BoolArray::new(
108            BitBuffer::from(vec![true, true, true]),
109            Validity::NonNullable,
110        );
111        let stats = BoolStats::generate(&array, &mut ctx)?;
112        assert_eq!(stats.value_count, 3);
113        assert_eq!(stats.null_count, 0);
114        assert_eq!(stats.true_count, 3);
115        assert!(stats.is_constant());
116        Ok(())
117    }
118
119    #[test]
120    fn test_all_false() -> VortexResult<()> {
121        let mut ctx = LEGACY_SESSION.create_execution_ctx();
122        let array = BoolArray::new(
123            BitBuffer::from(vec![false, false, false]),
124            Validity::NonNullable,
125        );
126        let stats = BoolStats::generate(&array, &mut ctx)?;
127        assert_eq!(stats.value_count, 3);
128        assert_eq!(stats.null_count, 0);
129        assert_eq!(stats.true_count, 0);
130        assert!(stats.is_constant());
131        Ok(())
132    }
133
134    #[test]
135    fn test_mixed() -> VortexResult<()> {
136        let mut ctx = LEGACY_SESSION.create_execution_ctx();
137        let array = BoolArray::new(
138            BitBuffer::from(vec![true, false, true]),
139            Validity::NonNullable,
140        );
141        let stats = BoolStats::generate(&array, &mut ctx)?;
142        assert_eq!(stats.value_count, 3);
143        assert_eq!(stats.null_count, 0);
144        assert_eq!(stats.true_count, 2);
145        assert!(!stats.is_constant());
146        Ok(())
147    }
148
149    #[test]
150    fn test_with_nulls() -> VortexResult<()> {
151        let mut ctx = LEGACY_SESSION.create_execution_ctx();
152        let array = BoolArray::new(
153            BitBuffer::from(vec![true, false, true]),
154            Validity::from_iter([true, false, true]),
155        );
156        let stats = BoolStats::generate(&array, &mut ctx)?;
157        assert_eq!(stats.value_count, 2);
158        assert_eq!(stats.null_count, 1);
159        assert_eq!(stats.true_count, 2);
160        assert!(stats.is_constant());
161        Ok(())
162    }
163}