vortex_array/stats/
array.rs1use std::sync::Arc;
7
8use parking_lot::RwLock;
9use vortex_error::VortexError;
10use vortex_error::VortexResult;
11use vortex_error::vortex_panic;
12use vortex_scalar::Scalar;
13use vortex_scalar::ScalarValue;
14
15use super::MutTypedStatsSetRef;
16use super::StatsSet;
17use super::StatsSetIntoIter;
18use super::TypedStatsSetRef;
19use crate::Array;
20use crate::builders::builder_with_capacity;
21use crate::compute::MinMaxResult;
22use crate::compute::is_constant;
23use crate::compute::is_sorted;
24use crate::compute::is_strict_sorted;
25use crate::compute::min_max;
26use crate::compute::nan_count;
27use crate::compute::sum;
28use crate::expr::stats::Precision;
29use crate::expr::stats::Stat;
30use crate::expr::stats::StatsProvider;
31
32#[derive(Clone, Default, Debug)]
35pub struct ArrayStats {
36 inner: Arc<RwLock<StatsSet>>,
37}
38
39pub struct StatsSetRef<'a> {
43 dyn_array_ref: &'a dyn Array,
45 array_stats: &'a ArrayStats,
46}
47
48impl ArrayStats {
49 pub fn to_ref<'a>(&'a self, array: &'a dyn Array) -> StatsSetRef<'a> {
50 StatsSetRef {
51 dyn_array_ref: array,
52 array_stats: self,
53 }
54 }
55
56 pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
57 self.inner.write().set(stat, value);
58 }
59
60 pub fn clear(&self, stat: Stat) {
61 self.inner.write().clear(stat);
62 }
63
64 pub fn retain(&self, stats: &[Stat]) {
65 self.inner.write().retain_only(stats);
66 }
67}
68
69impl From<StatsSet> for ArrayStats {
70 fn from(value: StatsSet) -> Self {
71 Self {
72 inner: Arc::new(RwLock::new(value)),
73 }
74 }
75}
76
77impl From<ArrayStats> for StatsSet {
78 fn from(value: ArrayStats) -> Self {
79 value.inner.read().clone()
80 }
81}
82
83impl StatsSetRef<'_> {
84 pub fn set_iter(&self, iter: StatsSetIntoIter) {
85 let mut guard = self.array_stats.inner.write();
86 for (stat, value) in iter {
87 guard.set(stat, value);
88 }
89 }
90
91 pub fn inherit_from(&self, stats: StatsSetRef<'_>) {
92 if !Arc::ptr_eq(&self.array_stats.inner, &stats.array_stats.inner) {
94 stats.with_iter(|iter| self.inherit(iter));
95 }
96 }
97
98 pub fn inherit<'a>(&self, iter: impl Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) {
99 let mut guard = self.array_stats.inner.write();
100 for (stat, value) in iter {
101 if !value.is_exact() {
102 if !guard.get(*stat).is_some_and(|v| v.is_exact()) {
103 guard.set(*stat, value.clone());
104 }
105 } else {
106 guard.set(*stat, value.clone());
107 }
108 }
109 }
110
111 pub fn with_typed_stats_set<U, F: FnOnce(TypedStatsSetRef) -> U>(&self, apply: F) -> U {
112 apply(
113 self.array_stats
114 .inner
115 .read()
116 .as_typed_ref(self.dyn_array_ref.dtype()),
117 )
118 }
119
120 pub fn with_mut_typed_stats_set<U, F: FnOnce(MutTypedStatsSetRef) -> U>(&self, apply: F) -> U {
121 apply(
122 self.array_stats
123 .inner
124 .write()
125 .as_mut_typed_ref(self.dyn_array_ref.dtype()),
126 )
127 }
128
129 pub fn to_owned(&self) -> StatsSet {
130 self.array_stats.inner.read().clone()
131 }
132
133 pub fn with_iter<
134 F: for<'a> FnOnce(&mut dyn Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) -> R,
135 R,
136 >(
137 &self,
138 f: F,
139 ) -> R {
140 let lock = self.array_stats.inner.read();
141 f(&mut lock.iter())
142 }
143
144 pub fn compute_stat(&self, stat: Stat) -> VortexResult<Option<Scalar>> {
145 if let Some(Precision::Exact(s)) = self.get(stat) {
147 return Ok(Some(s));
148 }
149
150 Ok(match stat {
151 Stat::Min => min_max(self.dyn_array_ref)?.map(|MinMaxResult { min, max: _ }| min),
152 Stat::Max => min_max(self.dyn_array_ref)?.map(|MinMaxResult { min: _, max }| max),
153 Stat::Sum => {
154 Stat::Sum
155 .dtype(self.dyn_array_ref.dtype())
156 .is_some()
157 .then(|| {
158 sum(self.dyn_array_ref)
160 })
161 .transpose()?
162 }
163 Stat::NullCount => Some(self.dyn_array_ref.invalid_count().into()),
164 Stat::IsConstant => {
165 if self.dyn_array_ref.is_empty() {
166 None
167 } else {
168 is_constant(self.dyn_array_ref)?.map(|v| v.into())
169 }
170 }
171 Stat::IsSorted => is_sorted(self.dyn_array_ref)?.map(|v| v.into()),
172 Stat::IsStrictSorted => is_strict_sorted(self.dyn_array_ref)?.map(|v| v.into()),
173 Stat::UncompressedSizeInBytes => {
174 let mut builder =
175 builder_with_capacity(self.dyn_array_ref.dtype(), self.dyn_array_ref.len());
176 unsafe {
177 builder.extend_from_array_unchecked(self.dyn_array_ref);
178 }
179 let nbytes = builder.finish().nbytes();
180 self.set(stat, Precision::exact(nbytes));
181 Some(nbytes.into())
182 }
183 Stat::NaNCount => {
184 Stat::NaNCount
185 .dtype(self.dyn_array_ref.dtype())
186 .is_some()
187 .then(|| {
188 nan_count(self.dyn_array_ref)
190 })
191 .transpose()?
192 .map(|s| s.into())
193 }
194 })
195 }
196
197 pub fn compute_all(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
198 let mut stats_set = StatsSet::default();
199 for &stat in stats {
200 if let Some(s) = self.compute_stat(stat)? {
201 stats_set.set(stat, Precision::exact(s.into_value()))
202 }
203 }
204 Ok(stats_set)
205 }
206}
207
208impl StatsSetRef<'_> {
209 pub fn compute_as<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
210 &self,
211 stat: Stat,
212 ) -> Option<U> {
213 self.compute_stat(stat)
214 .inspect_err(|e| log::warn!("Failed to compute stat {stat}: {e}"))
215 .ok()
216 .flatten()
217 .map(|s| U::try_from(&s))
218 .transpose()
219 .unwrap_or_else(|err| {
220 vortex_panic!(
221 err,
222 "Failed to compute stat {} as {}",
223 stat,
224 std::any::type_name::<U>()
225 )
226 })
227 }
228
229 pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
230 self.array_stats.set(stat, value);
231 }
232
233 pub fn clear(&self, stat: Stat) {
234 self.array_stats.clear(stat);
235 }
236
237 pub fn compute_min<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(&self) -> Option<U> {
238 self.compute_as(Stat::Min)
239 }
240
241 pub fn compute_max<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(&self) -> Option<U> {
242 self.compute_as(Stat::Max)
243 }
244
245 pub fn compute_is_sorted(&self) -> Option<bool> {
246 self.compute_as(Stat::IsSorted)
247 }
248
249 pub fn compute_is_strict_sorted(&self) -> Option<bool> {
250 self.compute_as(Stat::IsStrictSorted)
251 }
252
253 pub fn compute_is_constant(&self) -> Option<bool> {
254 self.compute_as(Stat::IsConstant)
255 }
256
257 pub fn compute_null_count(&self) -> Option<usize> {
258 self.compute_as(Stat::NullCount)
259 }
260
261 pub fn compute_uncompressed_size_in_bytes(&self) -> Option<usize> {
262 self.compute_as(Stat::UncompressedSizeInBytes)
263 }
264}
265
266impl StatsProvider for StatsSetRef<'_> {
267 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
268 self.array_stats
269 .inner
270 .read()
271 .as_typed_ref(self.dyn_array_ref.dtype())
272 .get(stat)
273 }
274
275 fn len(&self) -> usize {
276 self.array_stats.inner.read().len()
277 }
278}