Skip to main content

vortex_array/stats/
expr.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Expression constructors for statistics backed by aggregate functions.
5
6use crate::aggregate_fn::AggregateFnRef;
7use crate::aggregate_fn::AggregateFnVTableExt;
8use crate::aggregate_fn::EmptyOptions;
9use crate::aggregate_fn::fns::all_nan::AllNan;
10use crate::aggregate_fn::fns::all_non_nan::AllNonNan;
11use crate::aggregate_fn::fns::all_non_null::AllNonNull;
12use crate::aggregate_fn::fns::all_null::AllNull;
13use crate::aggregate_fn::fns::min_max::MinMax;
14use crate::aggregate_fn::fns::nan_count::NanCount;
15use crate::aggregate_fn::fns::null_count::NullCount;
16use crate::aggregate_fn::fns::sum::Sum;
17use crate::expr::Expression;
18use crate::scalar_fn::ScalarFnVTableExt;
19pub use crate::scalar_fn::fns::stat::StatFn;
20pub use crate::scalar_fn::fns::stat::StatOptions;
21
22/// Creates an expression that reads a stored aggregate statistic for `expr`.
23///
24/// If the statistic is not available in the current stats scope, evaluating the expression returns
25/// a nullable all-null array with the aggregate return type.
26pub fn stat(expr: Expression, aggregate_fn: AggregateFnRef) -> Expression {
27    StatFn.new_expr(StatOptions::new(aggregate_fn), [expr])
28}
29
30/// Creates `stat(expr, min_max)`, returning a nullable `{ min, max }` struct statistic.
31pub fn min_max(expr: Expression) -> Expression {
32    stat(expr, MinMax.bind(EmptyOptions))
33}
34
35/// Creates `stat(expr, sum)`, returning a nullable sum statistic.
36pub fn sum(expr: Expression) -> Expression {
37    stat(expr, Sum.bind(EmptyOptions))
38}
39
40/// Creates `stat(expr, null_count)`, returning a nullable null-count statistic.
41pub fn null_count(expr: Expression) -> Expression {
42    stat(expr, NullCount.bind(EmptyOptions))
43}
44
45/// Creates `stat(expr, all_null)`, returning a nullable all-null statistic.
46pub fn all_null(expr: Expression) -> Expression {
47    stat(expr, AllNull.bind(EmptyOptions))
48}
49
50/// Creates `stat(expr, all_nan)`, returning a nullable all-NaN statistic.
51pub fn all_nan(expr: Expression) -> Expression {
52    stat(expr, AllNan.bind(EmptyOptions))
53}
54
55/// Creates `stat(expr, all_non_null)`, returning a nullable all-non-null statistic.
56pub fn all_non_null(expr: Expression) -> Expression {
57    stat(expr, AllNonNull.bind(EmptyOptions))
58}
59
60/// Creates `stat(expr, all_non_nan)`, returning a nullable all-non-NaN statistic.
61pub fn all_non_nan(expr: Expression) -> Expression {
62    stat(expr, AllNonNan.bind(EmptyOptions))
63}
64
65/// Creates `stat(expr, nan_count)`, returning a nullable NaN-count statistic.
66pub fn nan_count(expr: Expression) -> Expression {
67    stat(expr, NanCount.bind(EmptyOptions))
68}
69
70#[cfg(test)]
71mod tests {
72    use std::sync::LazyLock;
73
74    use vortex_buffer::buffer;
75    use vortex_error::VortexExpect;
76    use vortex_error::VortexResult;
77    use vortex_session::VortexSession;
78
79    use super::all_nan;
80    use super::all_non_nan;
81    use super::all_non_null;
82    use super::all_null;
83    use super::null_count;
84    use super::stat;
85    use super::sum;
86    use crate::Canonical;
87    use crate::IntoArray;
88    use crate::VortexSessionExecute;
89    use crate::arrays::Chunked;
90    use crate::arrays::ChunkedArray;
91    use crate::arrays::ConstantArray;
92    use crate::arrays::PrimitiveArray;
93    use crate::arrays::chunked::ChunkedArrayExt;
94    use crate::assert_arrays_eq;
95    use crate::dtype::DType;
96    use crate::dtype::Nullability;
97    use crate::dtype::PType;
98    use crate::expr::root;
99    use crate::expr::stats::Precision;
100    use crate::expr::stats::Stat;
101    use crate::scalar::Scalar;
102    use crate::scalar::ScalarValue;
103    use crate::session::ArraySession;
104    use crate::validity::Validity;
105
106    static SESSION: LazyLock<VortexSession> =
107        LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
108
109    #[test]
110    fn stat_expr_reads_cached_sum() -> VortexResult<()> {
111        let array = buffer![1i32, 2, 3].into_array();
112        let sum_scalar = Scalar::primitive(6i64, Nullability::Nullable);
113        array.statistics().set(
114            Stat::Sum,
115            Precision::exact(sum_scalar.into_value().vortex_expect("non-null sum")),
116        );
117
118        let result = array
119            .apply(&sum(root()))?
120            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
121            .into_array();
122
123        let expected =
124            ConstantArray::new(Scalar::primitive(6i64, Nullability::Nullable), 3).into_array();
125        assert_arrays_eq!(result, expected);
126
127        Ok(())
128    }
129
130    #[test]
131    fn stat_expr_returns_null_when_sum_is_missing() -> VortexResult<()> {
132        let array = buffer![1i32, 2, 3].into_array();
133
134        let result = array
135            .apply(&sum(root()))?
136            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
137            .into_array();
138
139        let expected = ConstantArray::new(
140            Scalar::null(DType::Primitive(PType::I64, Nullability::Nullable)),
141            3,
142        )
143        .into_array();
144        assert_arrays_eq!(result, expected);
145
146        Ok(())
147    }
148
149    #[test]
150    fn stat_expr_reads_cached_sum_per_chunk() -> VortexResult<()> {
151        let chunk0 = buffer![1i32, 2].into_array();
152        let sum_scalar = Scalar::primitive(3i64, Nullability::Nullable);
153        chunk0.statistics().set(
154            Stat::Sum,
155            Precision::exact(sum_scalar.into_value().vortex_expect("non-null sum")),
156        );
157        let chunk1 = buffer![4i32, 5, 6].into_array();
158        let chunked = ChunkedArray::try_new(
159            vec![chunk0, chunk1],
160            DType::Primitive(PType::I32, Nullability::NonNullable),
161        )?
162        .into_array();
163
164        let result = chunked.apply(&sum(root()))?;
165
166        let chunked_result = result
167            .as_opt::<Chunked>()
168            .vortex_expect("stat expression should preserve chunked alignment");
169        assert_eq!(chunked_result.nchunks(), 2);
170
171        let result = result
172            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
173            .into_array();
174        let expected = PrimitiveArray::new(
175            buffer![3i64, 3, 0, 0, 0],
176            Validity::from_iter([true, true, false, false, false]),
177        )
178        .into_array();
179        assert_arrays_eq!(result, expected);
180
181        Ok(())
182    }
183
184    #[test]
185    fn stat_expr_reads_cached_null_count() -> VortexResult<()> {
186        let array =
187            PrimitiveArray::from_option_iter([Some(1i32), None, Some(3), None]).into_array();
188        let null_count_scalar = Scalar::primitive(2u64, Nullability::NonNullable);
189        array.statistics().set(
190            Stat::NullCount,
191            Precision::exact(
192                null_count_scalar
193                    .into_value()
194                    .vortex_expect("non-null null_count"),
195            ),
196        );
197
198        let result = array
199            .apply(&null_count(root()))?
200            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
201            .into_array();
202
203        let expected =
204            ConstantArray::new(Scalar::primitive(2u64, Nullability::Nullable), 4).into_array();
205        assert_arrays_eq!(result, expected);
206
207        Ok(())
208    }
209
210    #[test]
211    fn stat_expr_reads_cached_all_null_from_null_count() -> VortexResult<()> {
212        let array = PrimitiveArray::from_option_iter::<i32, _>([None, None, None]).into_array();
213        array
214            .statistics()
215            .set(Stat::NullCount, Precision::exact(ScalarValue::from(3u64)));
216
217        let result = array
218            .apply(&all_null(root()))?
219            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
220            .into_array();
221
222        let expected =
223            ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
224        assert_arrays_eq!(result, expected);
225
226        Ok(())
227    }
228
229    #[test]
230    fn stat_expr_reads_cached_all_null_false_from_inexact_low_null_count() -> VortexResult<()> {
231        let array = PrimitiveArray::from_option_iter::<i32, _>([None, Some(2), None]).into_array();
232        array
233            .statistics()
234            .set(Stat::NullCount, Precision::inexact(ScalarValue::from(2u64)));
235
236        let result = array
237            .apply(&all_null(root()))?
238            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
239            .into_array();
240
241        let expected =
242            ConstantArray::new(Scalar::bool(false, Nullability::Nullable), 3).into_array();
243        assert_arrays_eq!(result, expected);
244
245        Ok(())
246    }
247
248    #[test]
249    fn stat_expr_returns_null_for_inexact_full_null_count_as_all_null() -> VortexResult<()> {
250        let array = PrimitiveArray::from_option_iter::<i32, _>([None, Some(2), None]).into_array();
251        array
252            .statistics()
253            .set(Stat::NullCount, Precision::inexact(ScalarValue::from(3u64)));
254
255        let result = array
256            .apply(&all_null(root()))?
257            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
258            .into_array();
259
260        let expected =
261            ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
262        assert_arrays_eq!(result, expected);
263
264        Ok(())
265    }
266
267    #[test]
268    fn stat_expr_reads_cached_all_non_null_from_null_count() -> VortexResult<()> {
269        let array = buffer![1i32, 2, 3].into_array();
270        array
271            .statistics()
272            .set(Stat::NullCount, Precision::exact(ScalarValue::from(0u64)));
273
274        let result = array
275            .apply(&all_non_null(root()))?
276            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
277            .into_array();
278
279        let expected =
280            ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
281        assert_arrays_eq!(result, expected);
282
283        Ok(())
284    }
285
286    #[test]
287    fn stat_expr_reads_cached_all_non_null_true_from_inexact_zero_null_count() -> VortexResult<()> {
288        let array = buffer![1i32, 2, 3].into_array();
289        array
290            .statistics()
291            .set(Stat::NullCount, Precision::inexact(ScalarValue::from(0u64)));
292
293        let result = array
294            .apply(&all_non_null(root()))?
295            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
296            .into_array();
297
298        let expected =
299            ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
300        assert_arrays_eq!(result, expected);
301
302        Ok(())
303    }
304
305    #[test]
306    fn stat_expr_returns_null_for_inexact_nonzero_null_count_as_all_non_null() -> VortexResult<()> {
307        let array =
308            PrimitiveArray::from_option_iter([Some(1i32), None, Some(3), None]).into_array();
309        array
310            .statistics()
311            .set(Stat::NullCount, Precision::inexact(ScalarValue::from(2u64)));
312
313        let result = array
314            .apply(&all_non_null(root()))?
315            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
316            .into_array();
317
318        let expected =
319            ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 4).into_array();
320        assert_arrays_eq!(result, expected);
321
322        Ok(())
323    }
324
325    #[test]
326    fn stat_expr_rejects_all_nan_for_non_float() -> VortexResult<()> {
327        let array = PrimitiveArray::empty::<i32>(Nullability::NonNullable).into_array();
328        let mut ctx = SESSION.create_execution_ctx();
329
330        let result = array
331            .apply(&all_nan(root()))
332            .and_then(|array| array.execute::<Canonical>(&mut ctx));
333
334        assert!(result.is_err());
335        Ok(())
336    }
337
338    #[test]
339    fn stat_expr_reads_cached_all_nan_from_nan_count() -> VortexResult<()> {
340        let array =
341            PrimitiveArray::from_option_iter([Some(f32::NAN), Some(f32::NAN), Some(f32::NAN)])
342                .into_array();
343        array
344            .statistics()
345            .set(Stat::NaNCount, Precision::exact(ScalarValue::from(3u64)));
346
347        let result = array
348            .apply(&all_nan(root()))?
349            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
350            .into_array();
351
352        let expected =
353            ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
354        assert_arrays_eq!(result, expected);
355
356        Ok(())
357    }
358
359    #[test]
360    fn stat_expr_reads_cached_all_nan_false_from_inexact_low_nan_count() -> VortexResult<()> {
361        let array =
362            PrimitiveArray::from_option_iter([Some(f32::NAN), Some(1.0f32), Some(f32::NAN)])
363                .into_array();
364        array
365            .statistics()
366            .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(2u64)));
367
368        let result = array
369            .apply(&all_nan(root()))?
370            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
371            .into_array();
372
373        let expected =
374            ConstantArray::new(Scalar::bool(false, Nullability::Nullable), 3).into_array();
375        assert_arrays_eq!(result, expected);
376
377        Ok(())
378    }
379
380    #[test]
381    fn stat_expr_returns_null_for_inexact_full_nan_count_as_all_nan() -> VortexResult<()> {
382        let array =
383            PrimitiveArray::from_option_iter([Some(f32::NAN), Some(1.0f32), Some(f32::NAN)])
384                .into_array();
385        array
386            .statistics()
387            .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(3u64)));
388
389        let result = array
390            .apply(&all_nan(root()))?
391            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
392            .into_array();
393
394        let expected =
395            ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
396        assert_arrays_eq!(result, expected);
397
398        Ok(())
399    }
400
401    #[test]
402    fn stat_expr_reads_cached_all_non_nan_true_from_inexact_zero_nan_count() -> VortexResult<()> {
403        let array = buffer![1.0f32, 2.0, 3.0].into_array();
404        array
405            .statistics()
406            .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(0u64)));
407
408        let result = array
409            .apply(&all_non_nan(root()))?
410            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
411            .into_array();
412
413        let expected =
414            ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
415        assert_arrays_eq!(result, expected);
416
417        Ok(())
418    }
419
420    #[test]
421    fn stat_expr_returns_null_for_inexact_nonzero_nan_count_as_all_non_nan() -> VortexResult<()> {
422        let array = PrimitiveArray::from_option_iter([Some(1.0f32), Some(f32::NAN), Some(3.0)])
423            .into_array();
424        array
425            .statistics()
426            .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(1u64)));
427
428        let result = array
429            .apply(&all_non_nan(root()))?
430            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
431            .into_array();
432
433        let expected =
434            ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
435        assert_arrays_eq!(result, expected);
436
437        Ok(())
438    }
439
440    #[test]
441    fn stat_expr_reads_cached_min_and_max() -> VortexResult<()> {
442        let array = buffer![3i32, 1, 2].into_array();
443        array
444            .statistics()
445            .set(Stat::Min, Precision::exact(ScalarValue::from(1i32)));
446        array
447            .statistics()
448            .set(Stat::Max, Precision::exact(ScalarValue::from(3i32)));
449
450        let min_result = array
451            .clone()
452            .apply(&stat(
453                root(),
454                Stat::Min
455                    .aggregate_fn()
456                    .vortex_expect("min should have an aggregate function"),
457            ))?
458            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
459            .into_array();
460        let expected_min =
461            ConstantArray::new(Scalar::primitive(1i32, Nullability::Nullable), 3).into_array();
462        assert_arrays_eq!(min_result, expected_min);
463
464        let max_result = array
465            .apply(&stat(
466                root(),
467                Stat::Max
468                    .aggregate_fn()
469                    .vortex_expect("max should have an aggregate function"),
470            ))?
471            .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
472            .into_array();
473        let expected_max =
474            ConstantArray::new(Scalar::primitive(3i32, Nullability::Nullable), 3).into_array();
475        assert_arrays_eq!(max_result, expected_max);
476
477        Ok(())
478    }
479}