vortex_array/stats/
array.rs1use std::sync::Arc;
4
5use parking_lot::RwLock;
6use vortex_error::{VortexError, VortexResult, vortex_panic};
7use vortex_scalar::ScalarValue;
8
9use super::{
10 Precision, Stat, StatType, StatsProvider, StatsProviderExt, StatsSet, StatsSetIntoIter,
11};
12use crate::Array;
13use crate::compute::{
14 MinMaxResult, is_constant, is_sorted, is_strict_sorted, min_max, sum, uncompressed_size,
15};
16
17#[derive(Clone, Default, Debug)]
20pub struct ArrayStats {
21 inner: Arc<RwLock<StatsSet>>,
22}
23
24pub struct StatsSetRef<'a> {
26 dyn_array_ref: &'a dyn Array,
28 parent_stats: ArrayStats,
29}
30
31impl ArrayStats {
32 pub fn to_ref<'a>(&self, array: &'a dyn Array) -> StatsSetRef<'a> {
33 StatsSetRef {
34 dyn_array_ref: array,
35 parent_stats: self.clone(),
36 }
37 }
38
39 pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
40 self.inner.write().set(stat, value);
41 }
42
43 pub fn clear(&self, stat: Stat) {
44 self.inner.write().clear(stat);
45 }
46
47 pub fn retain(&self, stats: &[Stat]) {
48 self.inner.write().retain_only(stats);
49 }
50}
51
52impl From<StatsSet> for ArrayStats {
53 fn from(value: StatsSet) -> Self {
54 Self {
55 inner: Arc::new(RwLock::new(value)),
56 }
57 }
58}
59
60impl From<ArrayStats> for StatsSet {
61 fn from(value: ArrayStats) -> Self {
62 value.inner.read().clone()
63 }
64}
65
66impl StatsProvider for ArrayStats {
67 fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
68 let guard = self.inner.read();
69 guard.get(stat)
70 }
71
72 fn len(&self) -> usize {
73 let guard = self.inner.read();
74 guard.len()
75 }
76}
77
78impl StatsSetRef<'_> {
79 pub fn set_iter(&self, iter: StatsSetIntoIter) {
80 let mut guard = self.parent_stats.inner.write();
81
82 for (stat, value) in iter {
83 guard.set(stat, value);
84 }
85 }
86
87 pub fn inherit(&self, parent_stats: StatsSetRef<'_>) {
88 self.set_iter(parent_stats.into_iter());
90 }
91
92 pub fn to_owned(&self) -> StatsSet {
94 self.parent_stats.inner.read().clone()
95 }
96
97 pub fn into_iter(&self) -> StatsSetIntoIter {
98 self.to_owned().into_iter()
99 }
100
101 pub fn compute_stat(&self, stat: Stat) -> VortexResult<Option<ScalarValue>> {
102 if let Some(Precision::Exact(stat)) = self.get(stat) {
104 return Ok(Some(stat));
105 }
106
107 Ok(match stat {
110 Stat::Min => {
111 min_max(self.dyn_array_ref)?.map(|MinMaxResult { min, max: _ }| min.into_value())
112 }
113 Stat::Max => {
114 min_max(self.dyn_array_ref)?.map(|MinMaxResult { min: _, max }| max.into_value())
115 }
116 Stat::Sum => {
117 Stat::Sum
118 .dtype(self.dyn_array_ref.dtype())
119 .is_some()
120 .then(|| {
121 sum(self.dyn_array_ref)
123 })
124 .transpose()?
125 .map(|s| s.into_value())
126 }
127 Stat::NullCount => Some(self.dyn_array_ref.invalid_count()?.into()),
128 Stat::IsConstant => {
129 if self.dyn_array_ref.is_empty() {
130 None
131 } else {
132 Some(is_constant(self.dyn_array_ref)?.into())
133 }
134 }
135 Stat::IsSorted => Some(is_sorted(self.dyn_array_ref)?.into()),
136 Stat::IsStrictSorted => Some(is_strict_sorted(self.dyn_array_ref)?.into()),
137 Stat::UncompressedSizeInBytes => Some(uncompressed_size(self.dyn_array_ref)?.into()),
138 })
139 }
140
141 pub fn compute_all(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
142 let mut stats_set = StatsSet::default();
143 for stat in stats {
144 if let Some(s) = self.compute_stat(*stat)? {
145 stats_set.set(*stat, Precision::exact(s))
146 }
147 }
148 Ok(stats_set)
149 }
150}
151
152impl StatsSetRef<'_> {
153 pub fn get_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
154 &self,
155 stat: Stat,
156 ) -> Option<Precision<U>> {
157 StatsProviderExt::get_as::<U>(self, stat)
158 }
159
160 pub fn get_as_bound<S, U>(&self) -> Option<S::Bound>
161 where
162 S: StatType<U>,
163 U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>,
164 {
165 StatsProviderExt::get_as_bound::<S, U>(self)
166 }
167
168 pub fn compute_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
169 &self,
170 stat: Stat,
171 ) -> Option<U> {
172 self.compute_stat(stat)
173 .inspect_err(|e| log::warn!("Failed to compute stat {}: {}", stat, e))
174 .ok()
175 .flatten()
176 .map(|s| U::try_from(&s))
177 .transpose()
178 .unwrap_or_else(|err| {
179 vortex_panic!(
180 err,
181 "Failed to compute stat {} as {}",
182 stat,
183 std::any::type_name::<U>()
184 )
185 })
186 }
187
188 pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
189 self.parent_stats.set(stat, value);
190 }
191
192 pub fn clear(&self, stat: Stat) {
193 self.parent_stats.clear(stat);
194 }
195
196 pub fn retain(&self, stats: &[Stat]) {
197 self.parent_stats.retain(stats);
198 }
199
200 pub fn compute_min<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
201 &self,
202 ) -> Option<U> {
203 self.compute_as(Stat::Min)
204 }
205
206 pub fn compute_max<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
207 &self,
208 ) -> Option<U> {
209 self.compute_as(Stat::Max)
210 }
211
212 pub fn compute_is_sorted(&self) -> Option<bool> {
213 self.compute_as(Stat::IsSorted)
214 }
215
216 pub fn compute_is_strict_sorted(&self) -> Option<bool> {
217 self.compute_as(Stat::IsStrictSorted)
218 }
219
220 pub fn compute_is_constant(&self) -> Option<bool> {
221 self.compute_as(Stat::IsConstant)
222 }
223
224 pub fn compute_null_count(&self) -> Option<usize> {
225 self.compute_as(Stat::NullCount)
226 }
227
228 pub fn compute_uncompressed_size_in_bytes(&self) -> Option<usize> {
229 self.compute_as(Stat::UncompressedSizeInBytes)
230 }
231}
232
233impl StatsProvider for StatsSetRef<'_> {
234 fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
235 self.parent_stats.get(stat)
236 }
237
238 fn len(&self) -> usize {
239 self.parent_stats.len()
240 }
241}