Skip to main content

vortex_array/stats/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Traits and utilities to compute and access array statistics.
5
6use arrow_buffer::BooleanBufferBuilder;
7use arrow_buffer::MutableBuffer;
8use arrow_buffer::bit_iterator::BitIterator;
9use enum_iterator::last;
10pub use expr::all_nan;
11pub use expr::all_non_nan;
12pub use expr::all_non_null;
13pub use expr::all_null;
14pub use expr::min_max;
15pub use expr::nan_count;
16pub use expr::null_count;
17pub use expr::stat;
18pub use expr::sum;
19pub use stats_set::*;
20
21mod array;
22pub mod bind;
23pub mod expr;
24pub mod flatbuffers;
25pub mod rewrite;
26pub mod session;
27mod stats_set;
28
29pub use array::*;
30pub use session::*;
31use vortex_error::VortexExpect;
32
33use crate::expr::stats::Stat;
34
35/// Statistics that are used for pruning files (i.e., we want to ensure they are computed when compressing/writing).
36/// Sum is included for boolean arrays.
37pub const PRUNING_STATS: &[Stat] = &[
38    Stat::Min,
39    Stat::Max,
40    Stat::Sum,
41    Stat::NullCount,
42    Stat::NaNCount,
43];
44
45pub fn as_stat_bitset_bytes(stats: &[Stat]) -> Vec<u8> {
46    let max_stat = u8::from(last::<Stat>().vortex_expect("last stat")) as usize + 1;
47    // TODO(ngates): use vortex-buffer::BitBuffer
48    let mut stat_bitset = BooleanBufferBuilder::new_from_buffer(
49        MutableBuffer::from_len_zeroed(max_stat.div_ceil(8)),
50        max_stat,
51    );
52    for stat in stats {
53        stat_bitset.set_bit(u8::from(*stat) as usize, true);
54    }
55
56    stat_bitset
57        .finish()
58        .into_inner()
59        .into_vec()
60        .unwrap_or_else(|b| b.to_vec())
61}
62
63pub fn stats_from_bitset_bytes(bytes: &[u8]) -> Vec<Stat> {
64    BitIterator::new(bytes, 0, bytes.len() * 8)
65        .enumerate()
66        .filter_map(|(i, b)| b.then_some(i))
67        // Filter out indices failing conversion, these are stats written by newer version of library
68        .filter_map(|i| {
69            let Ok(stat) = u8::try_from(i) else {
70                tracing::debug!("invalid stat encountered: {i}");
71                return None;
72            };
73            Stat::try_from(stat).ok()
74        })
75        .collect::<Vec<_>>()
76}