vortex_array/stats/
array.rs1use std::sync::Arc;
7
8use parking_lot::RwLock;
9use vortex_error::{VortexError, VortexResult, vortex_panic};
10use vortex_scalar::{Scalar, ScalarValue};
11
12use super::{Precision, Stat, StatsProvider, StatsSet, StatsSetIntoIter, TypedStatsSetRef};
13use crate::Array;
14use crate::builders::builder_with_capacity;
15use crate::compute::{
16 MinMaxResult, is_constant, is_sorted, is_strict_sorted, min_max, nan_count, sum,
17};
18
19#[derive(Clone, Default, Debug)]
22pub struct ArrayStats {
23 inner: Arc<RwLock<StatsSet>>,
24}
25
26pub struct StatsSetRef<'a> {
30 dyn_array_ref: &'a dyn Array,
32 array_stats: &'a ArrayStats,
33}
34
35impl ArrayStats {
36 pub fn to_ref<'a>(&'a self, array: &'a dyn Array) -> StatsSetRef<'a> {
37 StatsSetRef {
38 dyn_array_ref: array,
39 array_stats: self,
40 }
41 }
42
43 pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
44 self.inner.write().set(stat, value);
45 }
46
47 pub fn clear(&self, stat: Stat) {
48 self.inner.write().clear(stat);
49 }
50
51 pub fn retain(&self, stats: &[Stat]) {
52 self.inner.write().retain_only(stats);
53 }
54}
55
56impl From<StatsSet> for ArrayStats {
57 fn from(value: StatsSet) -> Self {
58 Self {
59 inner: Arc::new(RwLock::new(value)),
60 }
61 }
62}
63
64impl From<ArrayStats> for StatsSet {
65 fn from(value: ArrayStats) -> Self {
66 value.inner.read().clone()
67 }
68}
69
70impl StatsSetRef<'_> {
71 pub fn set_iter(&self, iter: StatsSetIntoIter) {
72 let mut guard = self.array_stats.inner.write();
73 for (stat, value) in iter {
74 guard.set(stat, value);
75 }
76 }
77
78 pub fn inherit_from(&self, stats: StatsSetRef<'_>) {
79 if !Arc::ptr_eq(&self.array_stats.inner, &stats.array_stats.inner) {
81 stats.with_iter(|iter| self.inherit(iter));
82 }
83 }
84
85 pub fn inherit<'a>(&self, iter: impl Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) {
86 let mut guard = self.array_stats.inner.write();
87 for (stat, value) in iter {
88 if !value.is_exact() {
89 if !guard.get(*stat).is_some_and(|v| v.is_exact()) {
90 guard.set(*stat, value.clone());
91 }
92 } else {
93 guard.set(*stat, value.clone());
94 }
95 }
96 }
97
98 pub fn with_typed_stats_set<U, F: FnOnce(TypedStatsSetRef) -> U>(&self, apply: F) -> U {
99 apply(
100 self.array_stats
101 .inner
102 .read()
103 .as_typed_ref(self.dyn_array_ref.dtype()),
104 )
105 }
106
107 pub fn to_owned(&self) -> StatsSet {
108 self.array_stats.inner.read().clone()
109 }
110
111 pub fn with_iter<
112 F: for<'a> FnOnce(&mut dyn Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) -> R,
113 R,
114 >(
115 &self,
116 f: F,
117 ) -> R {
118 let lock = self.array_stats.inner.read();
119 f(&mut lock.iter())
120 }
121
122 pub fn compute_stat(&self, stat: Stat) -> VortexResult<Option<Scalar>> {
123 if let Some(Precision::Exact(s)) = self.get(stat) {
125 return Ok(Some(s));
126 }
127
128 Ok(match stat {
129 Stat::Min => min_max(self.dyn_array_ref)?.map(|MinMaxResult { min, max: _ }| min),
130 Stat::Max => min_max(self.dyn_array_ref)?.map(|MinMaxResult { min: _, max }| max),
131 Stat::Sum => {
132 Stat::Sum
133 .dtype(self.dyn_array_ref.dtype())
134 .is_some()
135 .then(|| {
136 sum(self.dyn_array_ref)
138 })
139 .transpose()?
140 }
141 Stat::NullCount => Some(self.dyn_array_ref.invalid_count().into()),
142 Stat::IsConstant => {
143 if self.dyn_array_ref.is_empty() {
144 None
145 } else {
146 is_constant(self.dyn_array_ref)?.map(|v| v.into())
147 }
148 }
149 Stat::IsSorted => is_sorted(self.dyn_array_ref)?.map(|v| v.into()),
150 Stat::IsStrictSorted => is_strict_sorted(self.dyn_array_ref)?.map(|v| v.into()),
151 Stat::UncompressedSizeInBytes => {
152 let mut builder =
153 builder_with_capacity(self.dyn_array_ref.dtype(), self.dyn_array_ref.len());
154 unsafe {
155 builder.extend_from_array_unchecked(self.dyn_array_ref);
156 }
157 let nbytes = builder.finish().nbytes();
158 self.set(stat, Precision::exact(nbytes));
159 Some(nbytes.into())
160 }
161 Stat::NaNCount => {
162 Stat::NaNCount
163 .dtype(self.dyn_array_ref.dtype())
164 .is_some()
165 .then(|| {
166 nan_count(self.dyn_array_ref)
168 })
169 .transpose()?
170 .map(|s| s.into())
171 }
172 })
173 }
174
175 pub fn compute_all(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
176 let mut stats_set = StatsSet::default();
177 for &stat in stats {
178 if let Some(s) = self.compute_stat(stat)? {
179 stats_set.set(stat, Precision::exact(s.into_value()))
180 }
181 }
182 Ok(stats_set)
183 }
184}
185
186impl StatsSetRef<'_> {
187 pub fn compute_as<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
188 &self,
189 stat: Stat,
190 ) -> Option<U> {
191 self.compute_stat(stat)
192 .inspect_err(|e| log::warn!("Failed to compute stat {stat}: {e}"))
193 .ok()
194 .flatten()
195 .map(|s| U::try_from(&s))
196 .transpose()
197 .unwrap_or_else(|err| {
198 vortex_panic!(
199 err,
200 "Failed to compute stat {} as {}",
201 stat,
202 std::any::type_name::<U>()
203 )
204 })
205 }
206
207 pub fn set(&self, stat: Stat, value: Precision<ScalarValue>) {
208 self.array_stats.set(stat, value);
209 }
210
211 pub fn clear(&self, stat: Stat) {
212 self.array_stats.clear(stat);
213 }
214
215 pub fn retain(&self, stats: &[Stat]) {
216 self.array_stats.retain(stats);
217 }
218
219 pub fn compute_min<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(&self) -> Option<U> {
220 self.compute_as(Stat::Min)
221 }
222
223 pub fn compute_max<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(&self) -> Option<U> {
224 self.compute_as(Stat::Max)
225 }
226
227 pub fn compute_is_sorted(&self) -> Option<bool> {
228 self.compute_as(Stat::IsSorted)
229 }
230
231 pub fn compute_is_strict_sorted(&self) -> Option<bool> {
232 self.compute_as(Stat::IsStrictSorted)
233 }
234
235 pub fn compute_is_constant(&self) -> Option<bool> {
236 self.compute_as(Stat::IsConstant)
237 }
238
239 pub fn compute_null_count(&self) -> Option<usize> {
240 self.compute_as(Stat::NullCount)
241 }
242
243 pub fn compute_uncompressed_size_in_bytes(&self) -> Option<usize> {
244 self.compute_as(Stat::UncompressedSizeInBytes)
245 }
246}
247
248impl StatsProvider for StatsSetRef<'_> {
249 fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
250 self.array_stats
251 .inner
252 .read()
253 .as_typed_ref(self.dyn_array_ref.dtype())
254 .get(stat)
255 }
256
257 fn len(&self) -> usize {
258 self.array_stats.inner.read().len()
259 }
260}