Skip to main content

vortex_array/stats/
expr.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Expression constructors for statistics backed by aggregate functions.
5
6use crate::aggregate_fn::AggregateFnRef;
7use crate::aggregate_fn::AggregateFnVTableExt;
8use crate::aggregate_fn::EmptyOptions;
9use crate::aggregate_fn::NumericalAggregateOpts;
10use crate::aggregate_fn::fns::all_nan::AllNan;
11use crate::aggregate_fn::fns::all_non_nan::AllNonNan;
12use crate::aggregate_fn::fns::all_non_null::AllNonNull;
13use crate::aggregate_fn::fns::all_null::AllNull;
14use crate::aggregate_fn::fns::min_max::MinMax;
15use crate::aggregate_fn::fns::nan_count::NanCount;
16use crate::aggregate_fn::fns::null_count::NullCount;
17use crate::aggregate_fn::fns::sum::Sum;
18use crate::expr::Expression;
19use crate::scalar_fn::ScalarFnVTableExt;
20pub use crate::scalar_fn::fns::stat::StatFn;
21pub use crate::scalar_fn::fns::stat::StatOptions;
22
23/// Creates an expression that reads a stored aggregate statistic for `expr`.
24///
25/// If the statistic is not available in the current stats scope, evaluating the expression returns
26/// a nullable all-null array with the aggregate return type.
27pub fn stat(expr: Expression, aggregate_fn: AggregateFnRef) -> Expression {
28    StatFn.new_expr(StatOptions::new(aggregate_fn), [expr])
29}
30
31/// Creates `stat(expr, min_max)`, returning a nullable `{ min, max }` struct statistic.
32pub fn min_max(expr: Expression) -> Expression {
33    // Statistics follow NaN-skipping semantics; request it explicitly rather than via the default.
34    stat(expr, MinMax.bind(NumericalAggregateOpts::skip_nans()))
35}
36
37/// Creates `stat(expr, sum)`, returning a nullable sum statistic.
38pub fn sum(expr: Expression) -> Expression {
39    // Statistics follow NaN-skipping semantics; request it explicitly rather than via the default.
40    stat(expr, Sum.bind(NumericalAggregateOpts::skip_nans()))
41}
42
43/// Creates `stat(expr, null_count)`, returning a nullable null-count statistic.
44pub fn null_count(expr: Expression) -> Expression {
45    stat(expr, NullCount.bind(EmptyOptions))
46}
47
48/// Creates `stat(expr, all_null)`, returning a nullable all-null statistic.
49pub fn all_null(expr: Expression) -> Expression {
50    stat(expr, AllNull.bind(EmptyOptions))
51}
52
53/// Creates `stat(expr, all_nan)`, returning a nullable all-NaN statistic.
54pub fn all_nan(expr: Expression) -> Expression {
55    stat(expr, AllNan.bind(EmptyOptions))
56}
57
58/// Creates `stat(expr, all_non_null)`, returning a nullable all-non-null statistic.
59pub fn all_non_null(expr: Expression) -> Expression {
60    stat(expr, AllNonNull.bind(EmptyOptions))
61}
62
63/// Creates `stat(expr, all_non_nan)`, returning a nullable all-non-NaN statistic.
64pub fn all_non_nan(expr: Expression) -> Expression {
65    stat(expr, AllNonNan.bind(EmptyOptions))
66}
67
68/// Creates `stat(expr, nan_count)`, returning a nullable NaN-count statistic.
69pub fn nan_count(expr: Expression) -> Expression {
70    stat(expr, NanCount.bind(EmptyOptions))
71}
72
73#[cfg(test)]
74mod tests {
75    use std::sync::LazyLock;
76
77    use vortex_buffer::buffer;
78    use vortex_error::VortexExpect;
79    use vortex_error::VortexResult;
80    use vortex_session::VortexSession;
81
82    use super::all_nan;
83    use super::all_non_nan;
84    use super::all_non_null;
85    use super::all_null;
86    use super::null_count;
87    use super::stat;
88    use super::sum;
89    use crate::Canonical;
90    use crate::IntoArray;
91    use crate::VortexSessionExecute;
92    use crate::array_session;
93    use crate::arrays::Chunked;
94    use crate::arrays::ChunkedArray;
95    use crate::arrays::ConstantArray;
96    use crate::arrays::PrimitiveArray;
97    use crate::arrays::chunked::ChunkedArrayExt;
98    use crate::assert_arrays_eq;
99    use crate::dtype::DType;
100    use crate::dtype::Nullability;
101    use crate::dtype::PType;
102    use crate::expr::root;
103    use crate::expr::stats::Precision;
104    use crate::expr::stats::Stat;
105    use crate::scalar::Scalar;
106    use crate::scalar::ScalarValue;
107    use crate::validity::Validity;
108
109    static SESSION: LazyLock<VortexSession> = LazyLock::new(array_session);
110
111    #[test]
112    fn stat_expr_reads_cached_sum() -> VortexResult<()> {
113        let array = buffer![1i32, 2, 3].into_array();
114        let sum_scalar = Scalar::primitive(6i64, Nullability::Nullable);
115        array.statistics().set(
116            Stat::Sum,
117            Precision::exact(sum_scalar.into_value().vortex_expect("non-null sum")),
118        );
119
120        let result = array
121            .apply(&sum(root()))?
122            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
123            .into_array();
124
125        let expected =
126            ConstantArray::new(Scalar::primitive(6i64, Nullability::Nullable), 3).into_array();
127        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
128
129        Ok(())
130    }
131
132    #[test]
133    fn stat_expr_returns_null_when_sum_is_missing() -> VortexResult<()> {
134        let array = buffer![1i32, 2, 3].into_array();
135
136        let result = array
137            .apply(&sum(root()))?
138            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
139            .into_array();
140
141        let expected = ConstantArray::new(
142            Scalar::null(DType::Primitive(PType::I64, Nullability::Nullable)),
143            3,
144        )
145        .into_array();
146        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
147
148        Ok(())
149    }
150
151    #[test]
152    fn stat_expr_reads_cached_sum_per_chunk() -> VortexResult<()> {
153        let chunk0 = buffer![1i32, 2].into_array();
154        let sum_scalar = Scalar::primitive(3i64, Nullability::Nullable);
155        chunk0.statistics().set(
156            Stat::Sum,
157            Precision::exact(sum_scalar.into_value().vortex_expect("non-null sum")),
158        );
159        let chunk1 = buffer![4i32, 5, 6].into_array();
160        let chunked = ChunkedArray::try_new(
161            vec![chunk0, chunk1],
162            DType::Primitive(PType::I32, Nullability::NonNullable),
163        )?
164        .into_array();
165
166        let result = chunked.apply(&sum(root()))?;
167
168        let chunked_result = result
169            .as_opt::<Chunked>()
170            .vortex_expect("stat expression should preserve chunked alignment");
171        assert_eq!(chunked_result.nchunks(), 2);
172
173        let result = result
174            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
175            .into_array();
176        let expected = PrimitiveArray::new(
177            buffer![3i64, 3, 0, 0, 0],
178            Validity::from_iter([true, true, false, false, false]),
179        )
180        .into_array();
181        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
182
183        Ok(())
184    }
185
186    #[test]
187    fn stat_expr_reads_cached_null_count() -> VortexResult<()> {
188        let array =
189            PrimitiveArray::from_option_iter([Some(1i32), None, Some(3), None]).into_array();
190        let null_count_scalar = Scalar::primitive(2u64, Nullability::NonNullable);
191        array.statistics().set(
192            Stat::NullCount,
193            Precision::exact(
194                null_count_scalar
195                    .into_value()
196                    .vortex_expect("non-null null_count"),
197            ),
198        );
199
200        let result = array
201            .apply(&null_count(root()))?
202            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
203            .into_array();
204
205        let expected =
206            ConstantArray::new(Scalar::primitive(2u64, Nullability::Nullable), 4).into_array();
207        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
208
209        Ok(())
210    }
211
212    #[test]
213    fn stat_expr_reads_cached_all_null_from_null_count() -> VortexResult<()> {
214        let array = PrimitiveArray::from_option_iter::<i32, _>([None, None, None]).into_array();
215        array
216            .statistics()
217            .set(Stat::NullCount, Precision::exact(ScalarValue::from(3u64)));
218
219        let result = array
220            .apply(&all_null(root()))?
221            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
222            .into_array();
223
224        let expected =
225            ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
226        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
227
228        Ok(())
229    }
230
231    #[test]
232    fn stat_expr_reads_cached_all_null_false_from_inexact_low_null_count() -> VortexResult<()> {
233        let array = PrimitiveArray::from_option_iter::<i32, _>([None, Some(2), None]).into_array();
234        array
235            .statistics()
236            .set(Stat::NullCount, Precision::inexact(ScalarValue::from(2u64)));
237
238        let result = array
239            .apply(&all_null(root()))?
240            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
241            .into_array();
242
243        let expected =
244            ConstantArray::new(Scalar::bool(false, Nullability::Nullable), 3).into_array();
245        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
246
247        Ok(())
248    }
249
250    #[test]
251    fn stat_expr_returns_null_for_inexact_full_null_count_as_all_null() -> VortexResult<()> {
252        let array = PrimitiveArray::from_option_iter::<i32, _>([None, Some(2), None]).into_array();
253        array
254            .statistics()
255            .set(Stat::NullCount, Precision::inexact(ScalarValue::from(3u64)));
256
257        let result = array
258            .apply(&all_null(root()))?
259            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
260            .into_array();
261
262        let expected =
263            ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
264        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
265
266        Ok(())
267    }
268
269    #[test]
270    fn stat_expr_reads_cached_all_non_null_from_null_count() -> VortexResult<()> {
271        let array = buffer![1i32, 2, 3].into_array();
272        array
273            .statistics()
274            .set(Stat::NullCount, Precision::exact(ScalarValue::from(0u64)));
275
276        let result = array
277            .apply(&all_non_null(root()))?
278            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
279            .into_array();
280
281        let expected =
282            ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
283        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
284
285        Ok(())
286    }
287
288    #[test]
289    fn stat_expr_reads_cached_all_non_null_true_from_inexact_zero_null_count() -> VortexResult<()> {
290        let array = buffer![1i32, 2, 3].into_array();
291        array
292            .statistics()
293            .set(Stat::NullCount, Precision::inexact(ScalarValue::from(0u64)));
294
295        let result = array
296            .apply(&all_non_null(root()))?
297            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
298            .into_array();
299
300        let expected =
301            ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
302        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
303
304        Ok(())
305    }
306
307    #[test]
308    fn stat_expr_returns_null_for_inexact_nonzero_null_count_as_all_non_null() -> VortexResult<()> {
309        let array =
310            PrimitiveArray::from_option_iter([Some(1i32), None, Some(3), None]).into_array();
311        array
312            .statistics()
313            .set(Stat::NullCount, Precision::inexact(ScalarValue::from(2u64)));
314
315        let result = array
316            .apply(&all_non_null(root()))?
317            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
318            .into_array();
319
320        let expected =
321            ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 4).into_array();
322        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
323
324        Ok(())
325    }
326
327    #[test]
328    fn stat_expr_rejects_all_nan_for_non_float() -> VortexResult<()> {
329        let array = PrimitiveArray::empty::<i32>(Nullability::NonNullable).into_array();
330        let mut ctx = SESSION.create_execution_ctx();
331
332        let result = array
333            .apply(&all_nan(root()))
334            .and_then(|array| array.execute::<Canonical>(&mut ctx));
335
336        assert!(result.is_err());
337        Ok(())
338    }
339
340    #[test]
341    fn stat_expr_reads_cached_all_nan_from_nan_count() -> VortexResult<()> {
342        let array =
343            PrimitiveArray::from_option_iter([Some(f32::NAN), Some(f32::NAN), Some(f32::NAN)])
344                .into_array();
345        array
346            .statistics()
347            .set(Stat::NaNCount, Precision::exact(ScalarValue::from(3u64)));
348
349        let result = array
350            .apply(&all_nan(root()))?
351            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
352            .into_array();
353
354        let expected =
355            ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
356        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
357
358        Ok(())
359    }
360
361    #[test]
362    fn stat_expr_reads_cached_all_nan_false_from_inexact_low_nan_count() -> VortexResult<()> {
363        let array =
364            PrimitiveArray::from_option_iter([Some(f32::NAN), Some(1.0f32), Some(f32::NAN)])
365                .into_array();
366        array
367            .statistics()
368            .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(2u64)));
369
370        let result = array
371            .apply(&all_nan(root()))?
372            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
373            .into_array();
374
375        let expected =
376            ConstantArray::new(Scalar::bool(false, Nullability::Nullable), 3).into_array();
377        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
378
379        Ok(())
380    }
381
382    #[test]
383    fn stat_expr_returns_null_for_inexact_full_nan_count_as_all_nan() -> VortexResult<()> {
384        let array =
385            PrimitiveArray::from_option_iter([Some(f32::NAN), Some(1.0f32), Some(f32::NAN)])
386                .into_array();
387        array
388            .statistics()
389            .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(3u64)));
390
391        let result = array
392            .apply(&all_nan(root()))?
393            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
394            .into_array();
395
396        let expected =
397            ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
398        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
399
400        Ok(())
401    }
402
403    #[test]
404    fn stat_expr_reads_cached_all_non_nan_true_from_inexact_zero_nan_count() -> VortexResult<()> {
405        let array = buffer![1.0f32, 2.0, 3.0].into_array();
406        array
407            .statistics()
408            .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(0u64)));
409
410        let result = array
411            .apply(&all_non_nan(root()))?
412            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
413            .into_array();
414
415        let expected =
416            ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
417        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
418
419        Ok(())
420    }
421
422    #[test]
423    fn stat_expr_returns_null_for_inexact_nonzero_nan_count_as_all_non_nan() -> VortexResult<()> {
424        let array = PrimitiveArray::from_option_iter([Some(1.0f32), Some(f32::NAN), Some(3.0)])
425            .into_array();
426        array
427            .statistics()
428            .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(1u64)));
429
430        let result = array
431            .apply(&all_non_nan(root()))?
432            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
433            .into_array();
434
435        let expected =
436            ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
437        assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
438
439        Ok(())
440    }
441
442    #[test]
443    fn stat_expr_reads_cached_min_and_max() -> VortexResult<()> {
444        let array = buffer![3i32, 1, 2].into_array();
445        array
446            .statistics()
447            .set(Stat::Min, Precision::exact(ScalarValue::from(1i32)));
448        array
449            .statistics()
450            .set(Stat::Max, Precision::exact(ScalarValue::from(3i32)));
451
452        let min_result = array
453            .clone()
454            .apply(&stat(
455                root(),
456                Stat::Min
457                    .aggregate_fn()
458                    .vortex_expect("min should have an aggregate function"),
459            ))?
460            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
461            .into_array();
462        let expected_min =
463            ConstantArray::new(Scalar::primitive(1i32, Nullability::Nullable), 3).into_array();
464        assert_arrays_eq!(
465            min_result,
466            expected_min,
467            &mut SESSION.create_execution_ctx()
468        );
469
470        let max_result = array
471            .apply(&stat(
472                root(),
473                Stat::Max
474                    .aggregate_fn()
475                    .vortex_expect("max should have an aggregate function"),
476            ))?
477            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
478            .into_array();
479        let expected_max =
480            ConstantArray::new(Scalar::primitive(3i32, Nullability::Nullable), 3).into_array();
481        assert_arrays_eq!(
482            max_result,
483            expected_max,
484            &mut SESSION.create_execution_ctx()
485        );
486
487        Ok(())
488    }
489}