vortex_array/stats/
array.rs1use std::sync::Arc;
4
5use parking_lot::RwLock;
6use vortex_error::{VortexError, VortexResult, vortex_panic};
7use vortex_scalar::ScalarValue;
8
9use super::{
10 Precision, Stat, StatType, StatsProvider, StatsProviderExt, StatsSet, StatsSetIntoIter,
11};
12use crate::Array;
13use crate::compute::{
14 MinMaxResult, is_constant, is_sorted, is_strict_sorted, min_max, nan_count, sum,
15};
16
17#[derive(Clone, Default, Debug)]
20pub struct ArrayStats {
21 inner: Arc<RwLock<StatsSet>>,
22}
23
24pub struct StatsSetRef<'a> {
28 dyn_array_ref: &'a dyn Array,
30 parent_stats: ArrayStats,
31}
32
33impl ArrayStats {
34 pub fn to_ref<'a>(&self, array: &'a dyn Array) -> StatsSetRef<'a> {
35 StatsSetRef {
36 dyn_array_ref: array,
37 parent_stats: self.clone(),
38 }
39 }
40
41 pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
42 self.inner.write().set(stat, value);
43 }
44
45 pub fn clear(&self, stat: Stat) {
46 self.inner.write().clear(stat);
47 }
48
49 pub fn retain(&self, stats: &[Stat]) {
50 self.inner.write().retain_only(stats);
51 }
52}
53
54impl From<StatsSet> for ArrayStats {
55 fn from(value: StatsSet) -> Self {
56 Self {
57 inner: Arc::new(RwLock::new(value)),
58 }
59 }
60}
61
62impl From<ArrayStats> for StatsSet {
63 fn from(value: ArrayStats) -> Self {
64 value.inner.read().clone()
65 }
66}
67
68impl StatsProvider for ArrayStats {
69 fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
70 let guard = self.inner.read();
71 guard.get(stat)
72 }
73
74 fn len(&self) -> usize {
75 let guard = self.inner.read();
76 guard.len()
77 }
78}
79
80impl StatsSetRef<'_> {
81 pub fn set_iter(&self, iter: StatsSetIntoIter) {
82 let mut guard = self.parent_stats.inner.write();
83
84 for (stat, value) in iter {
85 guard.set(stat, value);
86 }
87 }
88
89 pub fn inherit(&self, parent_stats: StatsSetRef<'_>) {
90 self.set_iter(parent_stats.into_iter());
92 }
93
94 pub fn to_owned(&self) -> StatsSet {
96 self.parent_stats.inner.read().clone()
97 }
98
99 pub fn into_iter(&self) -> StatsSetIntoIter {
100 self.to_owned().into_iter()
101 }
102
103 pub fn compute_stat(&self, stat: Stat) -> VortexResult<Option<ScalarValue>> {
104 if let Some(Precision::Exact(stat)) = self.get(stat) {
106 return Ok(Some(stat));
107 }
108
109 Ok(match stat {
110 Stat::Min => {
111 min_max(self.dyn_array_ref)?.map(|MinMaxResult { min, max: _ }| min.into_value())
112 }
113 Stat::Max => {
114 min_max(self.dyn_array_ref)?.map(|MinMaxResult { min: _, max }| max.into_value())
115 }
116 Stat::Sum => {
117 Stat::Sum
118 .dtype(self.dyn_array_ref.dtype())
119 .is_some()
120 .then(|| {
121 sum(self.dyn_array_ref)
123 })
124 .transpose()?
125 .map(|s| s.into_value())
126 }
127 Stat::NullCount => Some(self.dyn_array_ref.invalid_count()?.into()),
128 Stat::IsConstant => {
129 if self.dyn_array_ref.is_empty() {
130 None
131 } else {
132 is_constant(self.dyn_array_ref)?.map(ScalarValue::from)
133 }
134 }
135 Stat::IsSorted => Some(is_sorted(self.dyn_array_ref)?.into()),
136 Stat::IsStrictSorted => Some(is_strict_sorted(self.dyn_array_ref)?.into()),
137 Stat::UncompressedSizeInBytes => {
138 let nbytes: ScalarValue =
139 (self.dyn_array_ref.to_canonical()?.as_ref().nbytes() as u64).into();
140 self.set(stat, Precision::exact(nbytes.clone()));
141 Some(nbytes)
142 }
143 Stat::NaNCount => {
144 Stat::NaNCount
145 .dtype(self.dyn_array_ref.dtype())
146 .is_some()
147 .then(|| {
148 nan_count(self.dyn_array_ref)
150 })
151 .transpose()?
152 .map(|s| s.into())
153 }
154 })
155 }
156
157 pub fn compute_all(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
158 let mut stats_set = StatsSet::default();
159 for &stat in stats {
160 if let Some(s) = self.compute_stat(stat)? {
161 stats_set.set(stat, Precision::exact(s))
162 }
163 }
164 Ok(stats_set)
165 }
166}
167
168impl StatsSetRef<'_> {
169 pub fn get_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
170 &self,
171 stat: Stat,
172 ) -> Option<Precision<U>> {
173 StatsProviderExt::get_as::<U>(self, stat)
174 }
175
176 pub fn get_as_bound<S, U>(&self) -> Option<S::Bound>
177 where
178 S: StatType<U>,
179 U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>,
180 {
181 StatsProviderExt::get_as_bound::<S, U>(self)
182 }
183
184 pub fn compute_as<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
185 &self,
186 stat: Stat,
187 ) -> Option<U> {
188 self.compute_stat(stat)
189 .inspect_err(|e| log::warn!("Failed to compute stat {stat}: {e}"))
190 .ok()
191 .flatten()
192 .map(|s| U::try_from(&s))
193 .transpose()
194 .unwrap_or_else(|err| {
195 vortex_panic!(
196 err,
197 "Failed to compute stat {} as {}",
198 stat,
199 std::any::type_name::<U>()
200 )
201 })
202 }
203
204 pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
205 self.parent_stats.set(stat, value);
206 }
207
208 pub fn clear(&self, stat: Stat) {
209 self.parent_stats.clear(stat);
210 }
211
212 pub fn retain(&self, stats: &[Stat]) {
213 self.parent_stats.retain(stats);
214 }
215
216 pub fn compute_min<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
217 &self,
218 ) -> Option<U> {
219 self.compute_as(Stat::Min)
220 }
221
222 pub fn compute_max<U: for<'a> TryFrom<&'a ScalarValue, Error = VortexError>>(
223 &self,
224 ) -> Option<U> {
225 self.compute_as(Stat::Max)
226 }
227
228 pub fn compute_is_sorted(&self) -> Option<bool> {
229 self.compute_as(Stat::IsSorted)
230 }
231
232 pub fn compute_is_strict_sorted(&self) -> Option<bool> {
233 self.compute_as(Stat::IsStrictSorted)
234 }
235
236 pub fn compute_is_constant(&self) -> Option<bool> {
237 self.compute_as(Stat::IsConstant)
238 }
239
240 pub fn compute_null_count(&self) -> Option<usize> {
241 self.compute_as(Stat::NullCount)
242 }
243
244 pub fn compute_uncompressed_size_in_bytes(&self) -> Option<usize> {
245 self.compute_as(Stat::UncompressedSizeInBytes)
246 }
247}
248
249impl StatsProvider for StatsSetRef<'_> {
250 fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
251 self.parent_stats.get(stat)
252 }
253
254 fn len(&self) -> usize {
255 self.parent_stats.len()
256 }
257}