vortex_array/arrays/chunked/
stats.rs

1use vortex_error::VortexResult;
2
3use crate::Array;
4use crate::arrays::ChunkedEncoding;
5use crate::arrays::chunked::ChunkedArray;
6use crate::stats::{Precision, Stat, StatsProviderExt, StatsSet};
7use crate::vtable::StatisticsVTable;
8
9impl StatisticsVTable<&ChunkedArray> for ChunkedEncoding {
10    fn compute_statistics(&self, array: &ChunkedArray, stat: Stat) -> VortexResult<StatsSet> {
11        // for UncompressedSizeInBytes, we end up with sum of chunk uncompressed sizes
12        // this ignores the `chunk_offsets` array child, so it won't exactly match self.nbytes()
13
14        let mut stats: Option<StatsSet> = None;
15
16        for chunk in array.chunks() {
17            let s = chunk.statistics();
18            let chunk_stat = match stat {
19                // We need to know min and max to merge_ordered these stats.
20                Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted => {
21                    let chunk_stats = s.compute_all(&[stat, Stat::Min, Stat::Max])?;
22                    if chunk_stats.get_as::<bool>(stat) == Some(Precision::Exact(false)) {
23                        // exit early
24                        return Ok(StatsSet::of(stat, Precision::exact(false)));
25                    } else {
26                        Some(chunk_stats)
27                    }
28                }
29                _ => s
30                    .compute_stat(stat)?
31                    .map(|s| StatsSet::of(stat, Precision::exact(s))),
32            }
33            .unwrap_or_default();
34
35            stats = stats.map(|s| s.merge_ordered(&chunk_stat, array.dtype()));
36        }
37
38        Ok(stats.unwrap_or_default())
39    }
40}