vortex_layout/layouts/stats/
stats_table.rs1use std::sync::Arc;
2
3use itertools::Itertools;
4use vortex_array::arrays::StructArray;
5use vortex_array::builders::{ArrayBuilder, ArrayBuilderExt, builder_with_capacity};
6use vortex_array::compute::sum;
7use vortex_array::stats::{Precision, Stat, StatsSet};
8use vortex_array::validity::Validity;
9use vortex_array::{Array, ArrayRef, ArrayVariants};
10use vortex_dtype::{DType, Nullability, PType, StructDType};
11use vortex_error::{VortexExpect, VortexResult, vortex_bail};
12
13#[derive(Clone)]
18pub struct StatsTable {
19 array: ArrayRef,
21 stats: Arc<[Stat]>,
23}
24
25impl StatsTable {
26 pub fn try_new(column_dtype: DType, array: ArrayRef, stats: Arc<[Stat]>) -> VortexResult<Self> {
29 if &Self::dtype_for_stats_table(&column_dtype, &stats) != array.dtype() {
30 vortex_bail!("Array dtype does not match expected stats table dtype");
31 }
32 Ok(Self::unchecked_new(array, stats))
33 }
34
35 pub fn unchecked_new(array: ArrayRef, stats: Arc<[Stat]>) -> Self {
37 Self { array, stats }
38 }
39
40 pub fn dtype_for_stats_table(column_dtype: &DType, present_stats: &[Stat]) -> DType {
42 assert!(present_stats.is_sorted(), "Stats must be sorted");
43 DType::Struct(
44 Arc::new(StructDType::from_iter(present_stats.iter().filter_map(
45 |stat| {
46 stat.dtype(column_dtype)
47 .map(|dtype| (stat.name(), dtype.as_nullable()))
48 },
49 ))),
50 Nullability::NonNullable,
51 )
52 }
53
54 pub fn array(&self) -> &ArrayRef {
56 &self.array
57 }
58
59 pub fn present_stats(&self) -> &[Stat] {
61 &self.stats
62 }
63
64 pub fn to_stats_set(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
66 let mut stats_set = StatsSet::default();
67 for stat in stats {
68 let Some(array) = self.get_stat(*stat)? else {
69 continue;
70 };
71
72 match stat {
74 Stat::Min | Stat::Max | Stat::Sum => {
76 if let Some(s) = array.statistics().compute_stat(*stat)? {
77 stats_set.set(*stat, Precision::exact(s))
78 }
79 }
80 Stat::NullCount | Stat::UncompressedSizeInBytes => {
82 let sum = sum(&array)?
83 .cast(&DType::Primitive(PType::U64, Nullability::Nullable))?
84 .into_value();
85 stats_set.set(*stat, Precision::exact(sum));
86 }
87 Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted => {}
89 }
90 }
91 Ok(stats_set)
92 }
93
94 pub fn get_stat(&self, stat: Stat) -> VortexResult<Option<ArrayRef>> {
96 Ok(self
97 .array
98 .as_struct_typed()
99 .vortex_expect("Stats table must be a struct array")
100 .maybe_null_field_by_name(stat.name())
101 .ok())
102 }
103}
104
105pub struct StatsAccumulator {
112 stats: Arc<[Stat]>,
113 builders: Vec<Box<dyn ArrayBuilder>>,
114 length: usize,
115}
116
117impl StatsAccumulator {
118 pub fn new(dtype: DType, stats: &[Stat]) -> Self {
119 let (stats, builders): (Vec<Stat>, _) = stats
120 .iter()
121 .filter_map(|s| {
122 s.dtype(&dtype)
123 .map(|stat_dtype| (*s, builder_with_capacity(&stat_dtype.as_nullable(), 1024)))
124 })
125 .unzip();
126
127 Self {
128 stats: stats.into(),
129 builders,
130 length: 0,
131 }
132 }
133
134 pub fn stats(&self) -> &[Stat] {
135 &self.stats
136 }
137
138 pub fn push_chunk(&mut self, array: &dyn Array) -> VortexResult<()> {
139 for (s, builder) in self.stats.iter().zip_eq(self.builders.iter_mut()) {
140 if let Some(v) = array.statistics().compute_stat(*s)? {
141 builder.append_scalar_value(v)?;
142 } else {
143 builder.append_null();
144 }
145 }
146 self.length += 1;
147 Ok(())
148 }
149
150 pub fn as_stats_table(&mut self) -> Option<StatsTable> {
155 let mut names = Vec::new();
156 let mut fields = Vec::new();
157 let mut stats = Vec::new();
158
159 for (stat, builder) in self
160 .stats
161 .iter()
162 .zip(self.builders.iter_mut())
163 .sorted_unstable_by_key(|&(&s, _)| s)
165 {
166 let values = builder.finish();
167
168 if values
170 .invalid_count()
171 .vortex_expect("failed to get invalid count")
172 == values.len()
173 {
174 continue;
175 }
176
177 stats.push(*stat);
178 names.push(stat.to_string().into());
179 fields.push(values);
180 }
181
182 if names.is_empty() {
183 return None;
184 }
185
186 Some(StatsTable {
187 array: StructArray::try_new(names.into(), fields, self.length, Validity::NonNullable)
188 .vortex_expect("Failed to create stats table")
189 .into_array(),
190 stats: stats.into(),
191 })
192 }
193}