vortex_array/stats/
array.rs

1//! Stats as they are stored on arrays.
2
3use std::sync::Arc;
4
5use parking_lot::RwLock;
6use vortex_error::{VortexError, VortexResult, vortex_panic};
7use vortex_scalar::ScalarValue;
8
9use super::{
10    Precision, Stat, StatType, StatsProvider, StatsProviderExt, StatsSet, StatsSetIntoIter,
11};
12use crate::Array;
13use crate::compute::{
14    MinMaxResult, is_constant, is_sorted, is_strict_sorted, min_max, sum, uncompressed_size,
15};
16
17/// A shared [`StatsSet`] stored in an array. Can be shared by copies of the array and can also be mutated in place.
18// TODO(adamg): This is a very bad name.
19#[derive(Clone, Default, Debug)]
20pub struct ArrayStats {
21    inner: Arc<RwLock<StatsSet>>,
22}
23
24/// Reference to an array's [`StatsSet`]. Can be used to get and mutate the underlying stats.
25pub struct StatsSetRef<'a> {
26    // We need to reference back to the array
27    dyn_array_ref: &'a dyn Array,
28    parent_stats: ArrayStats,
29}
30
31impl ArrayStats {
32    pub fn to_ref<'a>(&self, array: &'a dyn Array) -> StatsSetRef<'a> {
33        StatsSetRef {
34            dyn_array_ref: array,
35            parent_stats: self.clone(),
36        }
37    }
38
39    pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
40        self.inner.write().set(stat, value);
41    }
42
43    pub fn clear(&self, stat: Stat) {
44        self.inner.write().clear(stat);
45    }
46
47    pub fn retain(&self, stats: &[Stat]) {
48        self.inner.write().retain_only(stats);
49    }
50}
51
52impl From<StatsSet> for ArrayStats {
53    fn from(value: StatsSet) -> Self {
54        Self {
55            inner: Arc::new(RwLock::new(value)),
56        }
57    }
58}
59
60impl From<ArrayStats> for StatsSet {
61    fn from(value: ArrayStats) -> Self {
62        value.inner.read().clone()
63    }
64}
65
66impl StatsProvider for ArrayStats {
67    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
68        let guard = self.inner.read();
69        guard.get(stat)
70    }
71
72    fn len(&self) -> usize {
73        let guard = self.inner.read();
74        guard.len()
75    }
76}
77
78impl StatsSetRef<'_> {
79    pub fn set_iter(&self, iter: StatsSetIntoIter) {
80        let mut guard = self.parent_stats.inner.write();
81
82        for (stat, value) in iter {
83            guard.set(stat, value);
84        }
85    }
86
87    pub fn inherit(&self, parent_stats: StatsSetRef<'_>) {
88        // TODO(ngates): depending on statistic, this should choose the more precise one
89        self.set_iter(parent_stats.into_iter());
90    }
91
92    // TODO(adamg): potentially problematic name
93    pub fn to_owned(&self) -> StatsSet {
94        self.parent_stats.inner.read().clone()
95    }
96
97    pub fn into_iter(&self) -> StatsSetIntoIter {
98        self.to_owned().into_iter()
99    }
100
101    pub fn compute_stat(&self, stat: Stat) -> VortexResult<Option<ScalarValue>> {
102        // If it's already computed and exact, we can return it.
103        if let Some(Precision::Exact(stat)) = self.get(stat) {
104            return Ok(Some(stat));
105        }
106
107        // NOTE(ngates): this is the beginning of the stats refactor that pushes stats compute into
108        //  regular compute functions.
109        Ok(match stat {
110            Stat::Min => {
111                min_max(self.dyn_array_ref)?.map(|MinMaxResult { min, max: _ }| min.into_value())
112            }
113            Stat::Max => {
114                min_max(self.dyn_array_ref)?.map(|MinMaxResult { min: _, max }| max.into_value())
115            }
116            Stat::Sum => {
117                Stat::Sum
118                    .dtype(self.dyn_array_ref.dtype())
119                    .is_some()
120                    .then(|| {
121                        // Sum is supported for this dtype.
122                        sum(self.dyn_array_ref)
123                    })
124                    .transpose()?
125                    .map(|s| s.into_value())
126            }
127            Stat::NullCount => Some(self.dyn_array_ref.invalid_count()?.into()),
128            Stat::IsConstant => {
129                if self.dyn_array_ref.is_empty() {
130                    None
131                } else {
132                    Some(is_constant(self.dyn_array_ref)?.into())
133                }
134            }
135            Stat::IsSorted => Some(is_sorted(self.dyn_array_ref)?.into()),
136            Stat::IsStrictSorted => Some(is_strict_sorted(self.dyn_array_ref)?.into()),
137            Stat::UncompressedSizeInBytes => Some(uncompressed_size(self.dyn_array_ref)?.into()),
138        })
139    }
140
141    pub fn compute_all(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
142        let mut stats_set = StatsSet::default();
143        for stat in stats {
144            if let Some(s) = self.compute_stat(*stat)? {
145                stats_set.set(*stat, Precision::exact(s))
146            }
147        }
148        Ok(stats_set)
149    }
150}
151
152impl StatsSetRef<'_> {
153    pub fn get_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
154        &self,
155        stat: Stat,
156    ) -> Option<Precision<U>> {
157        StatsProviderExt::get_as::<U>(self, stat)
158    }
159
160    pub fn get_as_bound<S, U>(&self) -> Option<S::Bound>
161    where
162        S: StatType<U>,
163        U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>,
164    {
165        StatsProviderExt::get_as_bound::<S, U>(self)
166    }
167
168    pub fn compute_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
169        &self,
170        stat: Stat,
171    ) -> Option<U> {
172        self.compute_stat(stat)
173            .inspect_err(|e| log::warn!("Failed to compute stat {}: {}", stat, e))
174            .ok()
175            .flatten()
176            .map(|s| U::try_from(&s))
177            .transpose()
178            .unwrap_or_else(|err| {
179                vortex_panic!(
180                    err,
181                    "Failed to compute stat {} as {}",
182                    stat,
183                    std::any::type_name::<U>()
184                )
185            })
186    }
187
188    pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
189        self.parent_stats.set(stat, value);
190    }
191
192    pub fn clear(&self, stat: Stat) {
193        self.parent_stats.clear(stat);
194    }
195
196    pub fn retain(&self, stats: &[Stat]) {
197        self.parent_stats.retain(stats);
198    }
199
200    pub fn compute_min<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
201        &self,
202    ) -> Option<U> {
203        self.compute_as(Stat::Min)
204    }
205
206    pub fn compute_max<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
207        &self,
208    ) -> Option<U> {
209        self.compute_as(Stat::Max)
210    }
211
212    pub fn compute_is_sorted(&self) -> Option<bool> {
213        self.compute_as(Stat::IsSorted)
214    }
215
216    pub fn compute_is_strict_sorted(&self) -> Option<bool> {
217        self.compute_as(Stat::IsStrictSorted)
218    }
219
220    pub fn compute_is_constant(&self) -> Option<bool> {
221        self.compute_as(Stat::IsConstant)
222    }
223
224    pub fn compute_null_count(&self) -> Option<usize> {
225        self.compute_as(Stat::NullCount)
226    }
227
228    pub fn compute_uncompressed_size_in_bytes(&self) -> Option<usize> {
229        self.compute_as(Stat::UncompressedSizeInBytes)
230    }
231}
232
233impl StatsProvider for StatsSetRef<'_> {
234    fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
235        self.parent_stats.get(stat)
236    }
237
238    fn len(&self) -> usize {
239        self.parent_stats.len()
240    }
241}