Skip to main content

vortex_array/stats/
expr.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Expression constructors for statistics backed by aggregate functions.
5
6use crate::aggregate_fn::AggregateFnRef;
7use crate::aggregate_fn::AggregateFnVTableExt;
8use crate::aggregate_fn::EmptyOptions;
9use crate::aggregate_fn::fns::min_max::MinMax;
10use crate::aggregate_fn::fns::nan_count::NanCount;
11use crate::aggregate_fn::fns::null_count::NullCount;
12use crate::aggregate_fn::fns::sum::Sum;
13use crate::expr::Expression;
14use crate::scalar_fn::ScalarFnVTableExt;
15pub use crate::scalar_fn::fns::stat::StatFn;
16pub use crate::scalar_fn::fns::stat::StatOptions;
17
18/// Creates an expression that reads a stored aggregate statistic for `expr`.
19///
20/// If the statistic is not available in the current stats scope, evaluating the expression returns
21/// a nullable all-null array with the aggregate return type.
22pub fn stat(expr: Expression, aggregate_fn: AggregateFnRef) -> Expression {
23    StatFn.new_expr(StatOptions::new(aggregate_fn), [expr])
24}
25
26/// Creates `stat(expr, min_max)`, returning a nullable `{ min, max }` struct statistic.
27pub fn min_max(expr: Expression) -> Expression {
28    stat(expr, MinMax.bind(EmptyOptions))
29}
30
31/// Creates `stat(expr, sum)`, returning a nullable sum statistic.
32pub fn sum(expr: Expression) -> Expression {
33    stat(expr, Sum.bind(EmptyOptions))
34}
35
36/// Creates `stat(expr, null_count)`, returning a nullable null-count statistic.
37pub fn null_count(expr: Expression) -> Expression {
38    stat(expr, NullCount.bind(EmptyOptions))
39}
40
41/// Creates `stat(expr, nan_count)`, returning a nullable NaN-count statistic.
42pub fn nan_count(expr: Expression) -> Expression {
43    stat(expr, NanCount.bind(EmptyOptions))
44}
45
46#[cfg(test)]
47mod tests {
48    use vortex_buffer::buffer;
49    use vortex_error::VortexExpect;
50    use vortex_error::VortexResult;
51
52    use super::stat;
53    use crate::Canonical;
54    use crate::IntoArray;
55    use crate::LEGACY_SESSION;
56    use crate::VortexSessionExecute;
57    use crate::aggregate_fn::AggregateFn;
58    use crate::aggregate_fn::EmptyOptions;
59    use crate::aggregate_fn::fns::sum::Sum;
60    use crate::arrays::Chunked;
61    use crate::arrays::ChunkedArray;
62    use crate::arrays::ConstantArray;
63    use crate::arrays::PrimitiveArray;
64    use crate::arrays::chunked::ChunkedArrayExt;
65    use crate::assert_arrays_eq;
66    use crate::dtype::DType;
67    use crate::dtype::Nullability;
68    use crate::dtype::PType;
69    use crate::expr::root;
70    use crate::expr::stats::Precision;
71    use crate::expr::stats::Stat;
72    use crate::scalar::Scalar;
73    use crate::validity::Validity;
74
75    #[test]
76    fn stat_expr_reads_cached_sum() -> VortexResult<()> {
77        let array = buffer![1i32, 2, 3].into_array();
78        let sum_scalar = Scalar::primitive(6i64, Nullability::Nullable);
79        array.statistics().set(
80            Stat::Sum,
81            Precision::exact(sum_scalar.into_value().vortex_expect("non-null sum")),
82        );
83
84        let result = array
85            .apply(&stat(root(), AggregateFn::new(Sum, EmptyOptions).erased()))?
86            .execute::<Canonical>(&mut LEGACY_SESSION.create_execution_ctx())?
87            .into_array();
88
89        let expected =
90            ConstantArray::new(Scalar::primitive(6i64, Nullability::Nullable), 3).into_array();
91        assert_arrays_eq!(result, expected);
92
93        Ok(())
94    }
95
96    #[test]
97    fn stat_expr_returns_null_when_sum_is_missing() -> VortexResult<()> {
98        let array = buffer![1i32, 2, 3].into_array();
99
100        let result = array
101            .apply(&stat(root(), AggregateFn::new(Sum, EmptyOptions).erased()))?
102            .execute::<Canonical>(&mut LEGACY_SESSION.create_execution_ctx())?
103            .into_array();
104
105        let expected = ConstantArray::new(
106            Scalar::null(DType::Primitive(PType::I64, Nullability::Nullable)),
107            3,
108        )
109        .into_array();
110        assert_arrays_eq!(result, expected);
111
112        Ok(())
113    }
114
115    #[test]
116    fn stat_expr_reads_cached_sum_per_chunk() -> VortexResult<()> {
117        let chunk0 = buffer![1i32, 2].into_array();
118        let sum_scalar = Scalar::primitive(3i64, Nullability::Nullable);
119        chunk0.statistics().set(
120            Stat::Sum,
121            Precision::exact(sum_scalar.into_value().vortex_expect("non-null sum")),
122        );
123        let chunk1 = buffer![4i32, 5, 6].into_array();
124        let chunked = ChunkedArray::try_new(
125            vec![chunk0, chunk1],
126            DType::Primitive(PType::I32, Nullability::NonNullable),
127        )?
128        .into_array();
129
130        let result = chunked.apply(&stat(root(), AggregateFn::new(Sum, EmptyOptions).erased()))?;
131
132        let chunked_result = result
133            .as_opt::<Chunked>()
134            .vortex_expect("stat expression should preserve chunked alignment");
135        assert_eq!(chunked_result.nchunks(), 2);
136
137        let result = result
138            .execute::<Canonical>(&mut LEGACY_SESSION.create_execution_ctx())?
139            .into_array();
140        let expected = PrimitiveArray::new(
141            buffer![3i64, 3, 0, 0, 0],
142            Validity::from_iter([true, true, false, false, false]),
143        )
144        .into_array();
145        assert_arrays_eq!(result, expected);
146
147        Ok(())
148    }
149
150    #[test]
151    fn stat_expr_reads_cached_null_count() -> VortexResult<()> {
152        let array =
153            PrimitiveArray::from_option_iter([Some(1i32), None, Some(3), None]).into_array();
154        let null_count_scalar = Scalar::primitive(2u64, Nullability::NonNullable);
155        array.statistics().set(
156            Stat::NullCount,
157            Precision::exact(
158                null_count_scalar
159                    .into_value()
160                    .vortex_expect("non-null null_count"),
161            ),
162        );
163
164        let result = array
165            .apply(&super::null_count(root()))?
166            .execute::<Canonical>(&mut LEGACY_SESSION.create_execution_ctx())?
167            .into_array();
168
169        let expected =
170            ConstantArray::new(Scalar::primitive(2u64, Nullability::Nullable), 4).into_array();
171        assert_arrays_eq!(result, expected);
172
173        Ok(())
174    }
175}