1use crate::aggregate_fn::AggregateFnRef;
7use crate::aggregate_fn::AggregateFnVTableExt;
8use crate::aggregate_fn::EmptyOptions;
9use crate::aggregate_fn::fns::all_nan::AllNan;
10use crate::aggregate_fn::fns::all_non_nan::AllNonNan;
11use crate::aggregate_fn::fns::all_non_null::AllNonNull;
12use crate::aggregate_fn::fns::all_null::AllNull;
13use crate::aggregate_fn::fns::min_max::MinMax;
14use crate::aggregate_fn::fns::nan_count::NanCount;
15use crate::aggregate_fn::fns::null_count::NullCount;
16use crate::aggregate_fn::fns::sum::Sum;
17use crate::expr::Expression;
18use crate::scalar_fn::ScalarFnVTableExt;
19pub use crate::scalar_fn::fns::stat::StatFn;
20pub use crate::scalar_fn::fns::stat::StatOptions;
21
22pub fn stat(expr: Expression, aggregate_fn: AggregateFnRef) -> Expression {
27 StatFn.new_expr(StatOptions::new(aggregate_fn), [expr])
28}
29
30pub fn min_max(expr: Expression) -> Expression {
32 stat(expr, MinMax.bind(EmptyOptions))
33}
34
35pub fn sum(expr: Expression) -> Expression {
37 stat(expr, Sum.bind(EmptyOptions))
38}
39
40pub fn null_count(expr: Expression) -> Expression {
42 stat(expr, NullCount.bind(EmptyOptions))
43}
44
45pub fn all_null(expr: Expression) -> Expression {
47 stat(expr, AllNull.bind(EmptyOptions))
48}
49
50pub fn all_nan(expr: Expression) -> Expression {
52 stat(expr, AllNan.bind(EmptyOptions))
53}
54
55pub fn all_non_null(expr: Expression) -> Expression {
57 stat(expr, AllNonNull.bind(EmptyOptions))
58}
59
60pub fn all_non_nan(expr: Expression) -> Expression {
62 stat(expr, AllNonNan.bind(EmptyOptions))
63}
64
65pub fn nan_count(expr: Expression) -> Expression {
67 stat(expr, NanCount.bind(EmptyOptions))
68}
69
70#[cfg(test)]
71mod tests {
72 use std::sync::LazyLock;
73
74 use vortex_buffer::buffer;
75 use vortex_error::VortexExpect;
76 use vortex_error::VortexResult;
77 use vortex_session::VortexSession;
78
79 use super::all_nan;
80 use super::all_non_nan;
81 use super::all_non_null;
82 use super::all_null;
83 use super::null_count;
84 use super::stat;
85 use super::sum;
86 use crate::Canonical;
87 use crate::IntoArray;
88 use crate::VortexSessionExecute;
89 use crate::arrays::Chunked;
90 use crate::arrays::ChunkedArray;
91 use crate::arrays::ConstantArray;
92 use crate::arrays::PrimitiveArray;
93 use crate::arrays::chunked::ChunkedArrayExt;
94 use crate::assert_arrays_eq;
95 use crate::dtype::DType;
96 use crate::dtype::Nullability;
97 use crate::dtype::PType;
98 use crate::expr::root;
99 use crate::expr::stats::Precision;
100 use crate::expr::stats::Stat;
101 use crate::scalar::Scalar;
102 use crate::scalar::ScalarValue;
103 use crate::session::ArraySession;
104 use crate::validity::Validity;
105
106 static SESSION: LazyLock<VortexSession> =
107 LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
108
109 #[test]
110 fn stat_expr_reads_cached_sum() -> VortexResult<()> {
111 let array = buffer![1i32, 2, 3].into_array();
112 let sum_scalar = Scalar::primitive(6i64, Nullability::Nullable);
113 array.statistics().set(
114 Stat::Sum,
115 Precision::exact(sum_scalar.into_value().vortex_expect("non-null sum")),
116 );
117
118 let result = array
119 .apply(&sum(root()))?
120 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
121 .into_array();
122
123 let expected =
124 ConstantArray::new(Scalar::primitive(6i64, Nullability::Nullable), 3).into_array();
125 assert_arrays_eq!(result, expected);
126
127 Ok(())
128 }
129
130 #[test]
131 fn stat_expr_returns_null_when_sum_is_missing() -> VortexResult<()> {
132 let array = buffer![1i32, 2, 3].into_array();
133
134 let result = array
135 .apply(&sum(root()))?
136 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
137 .into_array();
138
139 let expected = ConstantArray::new(
140 Scalar::null(DType::Primitive(PType::I64, Nullability::Nullable)),
141 3,
142 )
143 .into_array();
144 assert_arrays_eq!(result, expected);
145
146 Ok(())
147 }
148
149 #[test]
150 fn stat_expr_reads_cached_sum_per_chunk() -> VortexResult<()> {
151 let chunk0 = buffer![1i32, 2].into_array();
152 let sum_scalar = Scalar::primitive(3i64, Nullability::Nullable);
153 chunk0.statistics().set(
154 Stat::Sum,
155 Precision::exact(sum_scalar.into_value().vortex_expect("non-null sum")),
156 );
157 let chunk1 = buffer![4i32, 5, 6].into_array();
158 let chunked = ChunkedArray::try_new(
159 vec![chunk0, chunk1],
160 DType::Primitive(PType::I32, Nullability::NonNullable),
161 )?
162 .into_array();
163
164 let result = chunked.apply(&sum(root()))?;
165
166 let chunked_result = result
167 .as_opt::<Chunked>()
168 .vortex_expect("stat expression should preserve chunked alignment");
169 assert_eq!(chunked_result.nchunks(), 2);
170
171 let result = result
172 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
173 .into_array();
174 let expected = PrimitiveArray::new(
175 buffer![3i64, 3, 0, 0, 0],
176 Validity::from_iter([true, true, false, false, false]),
177 )
178 .into_array();
179 assert_arrays_eq!(result, expected);
180
181 Ok(())
182 }
183
184 #[test]
185 fn stat_expr_reads_cached_null_count() -> VortexResult<()> {
186 let array =
187 PrimitiveArray::from_option_iter([Some(1i32), None, Some(3), None]).into_array();
188 let null_count_scalar = Scalar::primitive(2u64, Nullability::NonNullable);
189 array.statistics().set(
190 Stat::NullCount,
191 Precision::exact(
192 null_count_scalar
193 .into_value()
194 .vortex_expect("non-null null_count"),
195 ),
196 );
197
198 let result = array
199 .apply(&null_count(root()))?
200 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
201 .into_array();
202
203 let expected =
204 ConstantArray::new(Scalar::primitive(2u64, Nullability::Nullable), 4).into_array();
205 assert_arrays_eq!(result, expected);
206
207 Ok(())
208 }
209
210 #[test]
211 fn stat_expr_reads_cached_all_null_from_null_count() -> VortexResult<()> {
212 let array = PrimitiveArray::from_option_iter::<i32, _>([None, None, None]).into_array();
213 array
214 .statistics()
215 .set(Stat::NullCount, Precision::exact(ScalarValue::from(3u64)));
216
217 let result = array
218 .apply(&all_null(root()))?
219 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
220 .into_array();
221
222 let expected =
223 ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
224 assert_arrays_eq!(result, expected);
225
226 Ok(())
227 }
228
229 #[test]
230 fn stat_expr_reads_cached_all_null_false_from_inexact_low_null_count() -> VortexResult<()> {
231 let array = PrimitiveArray::from_option_iter::<i32, _>([None, Some(2), None]).into_array();
232 array
233 .statistics()
234 .set(Stat::NullCount, Precision::inexact(ScalarValue::from(2u64)));
235
236 let result = array
237 .apply(&all_null(root()))?
238 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
239 .into_array();
240
241 let expected =
242 ConstantArray::new(Scalar::bool(false, Nullability::Nullable), 3).into_array();
243 assert_arrays_eq!(result, expected);
244
245 Ok(())
246 }
247
248 #[test]
249 fn stat_expr_returns_null_for_inexact_full_null_count_as_all_null() -> VortexResult<()> {
250 let array = PrimitiveArray::from_option_iter::<i32, _>([None, Some(2), None]).into_array();
251 array
252 .statistics()
253 .set(Stat::NullCount, Precision::inexact(ScalarValue::from(3u64)));
254
255 let result = array
256 .apply(&all_null(root()))?
257 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
258 .into_array();
259
260 let expected =
261 ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
262 assert_arrays_eq!(result, expected);
263
264 Ok(())
265 }
266
267 #[test]
268 fn stat_expr_reads_cached_all_non_null_from_null_count() -> VortexResult<()> {
269 let array = buffer![1i32, 2, 3].into_array();
270 array
271 .statistics()
272 .set(Stat::NullCount, Precision::exact(ScalarValue::from(0u64)));
273
274 let result = array
275 .apply(&all_non_null(root()))?
276 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
277 .into_array();
278
279 let expected =
280 ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
281 assert_arrays_eq!(result, expected);
282
283 Ok(())
284 }
285
286 #[test]
287 fn stat_expr_reads_cached_all_non_null_true_from_inexact_zero_null_count() -> VortexResult<()> {
288 let array = buffer![1i32, 2, 3].into_array();
289 array
290 .statistics()
291 .set(Stat::NullCount, Precision::inexact(ScalarValue::from(0u64)));
292
293 let result = array
294 .apply(&all_non_null(root()))?
295 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
296 .into_array();
297
298 let expected =
299 ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
300 assert_arrays_eq!(result, expected);
301
302 Ok(())
303 }
304
305 #[test]
306 fn stat_expr_returns_null_for_inexact_nonzero_null_count_as_all_non_null() -> VortexResult<()> {
307 let array =
308 PrimitiveArray::from_option_iter([Some(1i32), None, Some(3), None]).into_array();
309 array
310 .statistics()
311 .set(Stat::NullCount, Precision::inexact(ScalarValue::from(2u64)));
312
313 let result = array
314 .apply(&all_non_null(root()))?
315 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
316 .into_array();
317
318 let expected =
319 ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 4).into_array();
320 assert_arrays_eq!(result, expected);
321
322 Ok(())
323 }
324
325 #[test]
326 fn stat_expr_rejects_all_nan_for_non_float() -> VortexResult<()> {
327 let array = PrimitiveArray::empty::<i32>(Nullability::NonNullable).into_array();
328 let mut ctx = SESSION.create_execution_ctx();
329
330 let result = array
331 .apply(&all_nan(root()))
332 .and_then(|array| array.execute::<Canonical>(&mut ctx));
333
334 assert!(result.is_err());
335 Ok(())
336 }
337
338 #[test]
339 fn stat_expr_reads_cached_all_nan_from_nan_count() -> VortexResult<()> {
340 let array =
341 PrimitiveArray::from_option_iter([Some(f32::NAN), Some(f32::NAN), Some(f32::NAN)])
342 .into_array();
343 array
344 .statistics()
345 .set(Stat::NaNCount, Precision::exact(ScalarValue::from(3u64)));
346
347 let result = array
348 .apply(&all_nan(root()))?
349 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
350 .into_array();
351
352 let expected =
353 ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
354 assert_arrays_eq!(result, expected);
355
356 Ok(())
357 }
358
359 #[test]
360 fn stat_expr_reads_cached_all_nan_false_from_inexact_low_nan_count() -> VortexResult<()> {
361 let array =
362 PrimitiveArray::from_option_iter([Some(f32::NAN), Some(1.0f32), Some(f32::NAN)])
363 .into_array();
364 array
365 .statistics()
366 .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(2u64)));
367
368 let result = array
369 .apply(&all_nan(root()))?
370 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
371 .into_array();
372
373 let expected =
374 ConstantArray::new(Scalar::bool(false, Nullability::Nullable), 3).into_array();
375 assert_arrays_eq!(result, expected);
376
377 Ok(())
378 }
379
380 #[test]
381 fn stat_expr_returns_null_for_inexact_full_nan_count_as_all_nan() -> VortexResult<()> {
382 let array =
383 PrimitiveArray::from_option_iter([Some(f32::NAN), Some(1.0f32), Some(f32::NAN)])
384 .into_array();
385 array
386 .statistics()
387 .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(3u64)));
388
389 let result = array
390 .apply(&all_nan(root()))?
391 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
392 .into_array();
393
394 let expected =
395 ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
396 assert_arrays_eq!(result, expected);
397
398 Ok(())
399 }
400
401 #[test]
402 fn stat_expr_reads_cached_all_non_nan_true_from_inexact_zero_nan_count() -> VortexResult<()> {
403 let array = buffer![1.0f32, 2.0, 3.0].into_array();
404 array
405 .statistics()
406 .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(0u64)));
407
408 let result = array
409 .apply(&all_non_nan(root()))?
410 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
411 .into_array();
412
413 let expected =
414 ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
415 assert_arrays_eq!(result, expected);
416
417 Ok(())
418 }
419
420 #[test]
421 fn stat_expr_returns_null_for_inexact_nonzero_nan_count_as_all_non_nan() -> VortexResult<()> {
422 let array = PrimitiveArray::from_option_iter([Some(1.0f32), Some(f32::NAN), Some(3.0)])
423 .into_array();
424 array
425 .statistics()
426 .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(1u64)));
427
428 let result = array
429 .apply(&all_non_nan(root()))?
430 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
431 .into_array();
432
433 let expected =
434 ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
435 assert_arrays_eq!(result, expected);
436
437 Ok(())
438 }
439
440 #[test]
441 fn stat_expr_reads_cached_min_and_max() -> VortexResult<()> {
442 let array = buffer![3i32, 1, 2].into_array();
443 array
444 .statistics()
445 .set(Stat::Min, Precision::exact(ScalarValue::from(1i32)));
446 array
447 .statistics()
448 .set(Stat::Max, Precision::exact(ScalarValue::from(3i32)));
449
450 let min_result = array
451 .clone()
452 .apply(&stat(
453 root(),
454 Stat::Min
455 .aggregate_fn()
456 .vortex_expect("min should have an aggregate function"),
457 ))?
458 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
459 .into_array();
460 let expected_min =
461 ConstantArray::new(Scalar::primitive(1i32, Nullability::Nullable), 3).into_array();
462 assert_arrays_eq!(min_result, expected_min);
463
464 let max_result = array
465 .apply(&stat(
466 root(),
467 Stat::Max
468 .aggregate_fn()
469 .vortex_expect("max should have an aggregate function"),
470 ))?
471 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
472 .into_array();
473 let expected_max =
474 ConstantArray::new(Scalar::primitive(3i32, Nullability::Nullable), 3).into_array();
475 assert_arrays_eq!(max_result, expected_max);
476
477 Ok(())
478 }
479}