1use crate::aggregate_fn::AggregateFnRef;
7use crate::aggregate_fn::AggregateFnVTableExt;
8use crate::aggregate_fn::EmptyOptions;
9use crate::aggregate_fn::NumericalAggregateOpts;
10use crate::aggregate_fn::fns::all_nan::AllNan;
11use crate::aggregate_fn::fns::all_non_nan::AllNonNan;
12use crate::aggregate_fn::fns::all_non_null::AllNonNull;
13use crate::aggregate_fn::fns::all_null::AllNull;
14use crate::aggregate_fn::fns::min_max::MinMax;
15use crate::aggregate_fn::fns::nan_count::NanCount;
16use crate::aggregate_fn::fns::null_count::NullCount;
17use crate::aggregate_fn::fns::sum::Sum;
18use crate::expr::Expression;
19use crate::scalar_fn::ScalarFnVTableExt;
20pub use crate::scalar_fn::fns::stat::StatFn;
21pub use crate::scalar_fn::fns::stat::StatOptions;
22
23pub fn stat(expr: Expression, aggregate_fn: AggregateFnRef) -> Expression {
28 StatFn.new_expr(StatOptions::new(aggregate_fn), [expr])
29}
30
31pub fn min_max(expr: Expression) -> Expression {
33 stat(expr, MinMax.bind(NumericalAggregateOpts::skip_nans()))
35}
36
37pub fn sum(expr: Expression) -> Expression {
39 stat(expr, Sum.bind(NumericalAggregateOpts::skip_nans()))
41}
42
43pub fn null_count(expr: Expression) -> Expression {
45 stat(expr, NullCount.bind(EmptyOptions))
46}
47
48pub fn all_null(expr: Expression) -> Expression {
50 stat(expr, AllNull.bind(EmptyOptions))
51}
52
53pub fn all_nan(expr: Expression) -> Expression {
55 stat(expr, AllNan.bind(EmptyOptions))
56}
57
58pub fn all_non_null(expr: Expression) -> Expression {
60 stat(expr, AllNonNull.bind(EmptyOptions))
61}
62
63pub fn all_non_nan(expr: Expression) -> Expression {
65 stat(expr, AllNonNan.bind(EmptyOptions))
66}
67
68pub fn nan_count(expr: Expression) -> Expression {
70 stat(expr, NanCount.bind(EmptyOptions))
71}
72
73#[cfg(test)]
74mod tests {
75 use std::sync::LazyLock;
76
77 use vortex_buffer::buffer;
78 use vortex_error::VortexExpect;
79 use vortex_error::VortexResult;
80 use vortex_session::VortexSession;
81
82 use super::all_nan;
83 use super::all_non_nan;
84 use super::all_non_null;
85 use super::all_null;
86 use super::null_count;
87 use super::stat;
88 use super::sum;
89 use crate::Canonical;
90 use crate::IntoArray;
91 use crate::VortexSessionExecute;
92 use crate::array_session;
93 use crate::arrays::Chunked;
94 use crate::arrays::ChunkedArray;
95 use crate::arrays::ConstantArray;
96 use crate::arrays::PrimitiveArray;
97 use crate::arrays::chunked::ChunkedArrayExt;
98 use crate::assert_arrays_eq;
99 use crate::dtype::DType;
100 use crate::dtype::Nullability;
101 use crate::dtype::PType;
102 use crate::expr::root;
103 use crate::expr::stats::Precision;
104 use crate::expr::stats::Stat;
105 use crate::scalar::Scalar;
106 use crate::scalar::ScalarValue;
107 use crate::validity::Validity;
108
109 static SESSION: LazyLock<VortexSession> = LazyLock::new(array_session);
110
111 #[test]
112 fn stat_expr_reads_cached_sum() -> VortexResult<()> {
113 let array = buffer![1i32, 2, 3].into_array();
114 let sum_scalar = Scalar::primitive(6i64, Nullability::Nullable);
115 array.statistics().set(
116 Stat::Sum,
117 Precision::exact(sum_scalar.into_value().vortex_expect("non-null sum")),
118 );
119
120 let result = array
121 .apply(&sum(root()))?
122 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
123 .into_array();
124
125 let expected =
126 ConstantArray::new(Scalar::primitive(6i64, Nullability::Nullable), 3).into_array();
127 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
128
129 Ok(())
130 }
131
132 #[test]
133 fn stat_expr_returns_null_when_sum_is_missing() -> VortexResult<()> {
134 let array = buffer![1i32, 2, 3].into_array();
135
136 let result = array
137 .apply(&sum(root()))?
138 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
139 .into_array();
140
141 let expected = ConstantArray::new(
142 Scalar::null(DType::Primitive(PType::I64, Nullability::Nullable)),
143 3,
144 )
145 .into_array();
146 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
147
148 Ok(())
149 }
150
151 #[test]
152 fn stat_expr_reads_cached_sum_per_chunk() -> VortexResult<()> {
153 let chunk0 = buffer![1i32, 2].into_array();
154 let sum_scalar = Scalar::primitive(3i64, Nullability::Nullable);
155 chunk0.statistics().set(
156 Stat::Sum,
157 Precision::exact(sum_scalar.into_value().vortex_expect("non-null sum")),
158 );
159 let chunk1 = buffer![4i32, 5, 6].into_array();
160 let chunked = ChunkedArray::try_new(
161 vec![chunk0, chunk1],
162 DType::Primitive(PType::I32, Nullability::NonNullable),
163 )?
164 .into_array();
165
166 let result = chunked.apply(&sum(root()))?;
167
168 let chunked_result = result
169 .as_opt::<Chunked>()
170 .vortex_expect("stat expression should preserve chunked alignment");
171 assert_eq!(chunked_result.nchunks(), 2);
172
173 let result = result
174 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
175 .into_array();
176 let expected = PrimitiveArray::new(
177 buffer![3i64, 3, 0, 0, 0],
178 Validity::from_iter([true, true, false, false, false]),
179 )
180 .into_array();
181 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
182
183 Ok(())
184 }
185
186 #[test]
187 fn stat_expr_reads_cached_null_count() -> VortexResult<()> {
188 let array =
189 PrimitiveArray::from_option_iter([Some(1i32), None, Some(3), None]).into_array();
190 let null_count_scalar = Scalar::primitive(2u64, Nullability::NonNullable);
191 array.statistics().set(
192 Stat::NullCount,
193 Precision::exact(
194 null_count_scalar
195 .into_value()
196 .vortex_expect("non-null null_count"),
197 ),
198 );
199
200 let result = array
201 .apply(&null_count(root()))?
202 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
203 .into_array();
204
205 let expected =
206 ConstantArray::new(Scalar::primitive(2u64, Nullability::Nullable), 4).into_array();
207 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
208
209 Ok(())
210 }
211
212 #[test]
213 fn stat_expr_reads_cached_all_null_from_null_count() -> VortexResult<()> {
214 let array = PrimitiveArray::from_option_iter::<i32, _>([None, None, None]).into_array();
215 array
216 .statistics()
217 .set(Stat::NullCount, Precision::exact(ScalarValue::from(3u64)));
218
219 let result = array
220 .apply(&all_null(root()))?
221 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
222 .into_array();
223
224 let expected =
225 ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
226 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
227
228 Ok(())
229 }
230
231 #[test]
232 fn stat_expr_reads_cached_all_null_false_from_inexact_low_null_count() -> VortexResult<()> {
233 let array = PrimitiveArray::from_option_iter::<i32, _>([None, Some(2), None]).into_array();
234 array
235 .statistics()
236 .set(Stat::NullCount, Precision::inexact(ScalarValue::from(2u64)));
237
238 let result = array
239 .apply(&all_null(root()))?
240 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
241 .into_array();
242
243 let expected =
244 ConstantArray::new(Scalar::bool(false, Nullability::Nullable), 3).into_array();
245 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
246
247 Ok(())
248 }
249
250 #[test]
251 fn stat_expr_returns_null_for_inexact_full_null_count_as_all_null() -> VortexResult<()> {
252 let array = PrimitiveArray::from_option_iter::<i32, _>([None, Some(2), None]).into_array();
253 array
254 .statistics()
255 .set(Stat::NullCount, Precision::inexact(ScalarValue::from(3u64)));
256
257 let result = array
258 .apply(&all_null(root()))?
259 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
260 .into_array();
261
262 let expected =
263 ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
264 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
265
266 Ok(())
267 }
268
269 #[test]
270 fn stat_expr_reads_cached_all_non_null_from_null_count() -> VortexResult<()> {
271 let array = buffer![1i32, 2, 3].into_array();
272 array
273 .statistics()
274 .set(Stat::NullCount, Precision::exact(ScalarValue::from(0u64)));
275
276 let result = array
277 .apply(&all_non_null(root()))?
278 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
279 .into_array();
280
281 let expected =
282 ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
283 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
284
285 Ok(())
286 }
287
288 #[test]
289 fn stat_expr_reads_cached_all_non_null_true_from_inexact_zero_null_count() -> VortexResult<()> {
290 let array = buffer![1i32, 2, 3].into_array();
291 array
292 .statistics()
293 .set(Stat::NullCount, Precision::inexact(ScalarValue::from(0u64)));
294
295 let result = array
296 .apply(&all_non_null(root()))?
297 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
298 .into_array();
299
300 let expected =
301 ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
302 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
303
304 Ok(())
305 }
306
307 #[test]
308 fn stat_expr_returns_null_for_inexact_nonzero_null_count_as_all_non_null() -> VortexResult<()> {
309 let array =
310 PrimitiveArray::from_option_iter([Some(1i32), None, Some(3), None]).into_array();
311 array
312 .statistics()
313 .set(Stat::NullCount, Precision::inexact(ScalarValue::from(2u64)));
314
315 let result = array
316 .apply(&all_non_null(root()))?
317 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
318 .into_array();
319
320 let expected =
321 ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 4).into_array();
322 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
323
324 Ok(())
325 }
326
327 #[test]
328 fn stat_expr_rejects_all_nan_for_non_float() -> VortexResult<()> {
329 let array = PrimitiveArray::empty::<i32>(Nullability::NonNullable).into_array();
330 let mut ctx = SESSION.create_execution_ctx();
331
332 let result = array
333 .apply(&all_nan(root()))
334 .and_then(|array| array.execute::<Canonical>(&mut ctx));
335
336 assert!(result.is_err());
337 Ok(())
338 }
339
340 #[test]
341 fn stat_expr_reads_cached_all_nan_from_nan_count() -> VortexResult<()> {
342 let array =
343 PrimitiveArray::from_option_iter([Some(f32::NAN), Some(f32::NAN), Some(f32::NAN)])
344 .into_array();
345 array
346 .statistics()
347 .set(Stat::NaNCount, Precision::exact(ScalarValue::from(3u64)));
348
349 let result = array
350 .apply(&all_nan(root()))?
351 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
352 .into_array();
353
354 let expected =
355 ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
356 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
357
358 Ok(())
359 }
360
361 #[test]
362 fn stat_expr_reads_cached_all_nan_false_from_inexact_low_nan_count() -> VortexResult<()> {
363 let array =
364 PrimitiveArray::from_option_iter([Some(f32::NAN), Some(1.0f32), Some(f32::NAN)])
365 .into_array();
366 array
367 .statistics()
368 .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(2u64)));
369
370 let result = array
371 .apply(&all_nan(root()))?
372 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
373 .into_array();
374
375 let expected =
376 ConstantArray::new(Scalar::bool(false, Nullability::Nullable), 3).into_array();
377 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
378
379 Ok(())
380 }
381
382 #[test]
383 fn stat_expr_returns_null_for_inexact_full_nan_count_as_all_nan() -> VortexResult<()> {
384 let array =
385 PrimitiveArray::from_option_iter([Some(f32::NAN), Some(1.0f32), Some(f32::NAN)])
386 .into_array();
387 array
388 .statistics()
389 .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(3u64)));
390
391 let result = array
392 .apply(&all_nan(root()))?
393 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
394 .into_array();
395
396 let expected =
397 ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
398 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
399
400 Ok(())
401 }
402
403 #[test]
404 fn stat_expr_reads_cached_all_non_nan_true_from_inexact_zero_nan_count() -> VortexResult<()> {
405 let array = buffer![1.0f32, 2.0, 3.0].into_array();
406 array
407 .statistics()
408 .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(0u64)));
409
410 let result = array
411 .apply(&all_non_nan(root()))?
412 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
413 .into_array();
414
415 let expected =
416 ConstantArray::new(Scalar::bool(true, Nullability::Nullable), 3).into_array();
417 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
418
419 Ok(())
420 }
421
422 #[test]
423 fn stat_expr_returns_null_for_inexact_nonzero_nan_count_as_all_non_nan() -> VortexResult<()> {
424 let array = PrimitiveArray::from_option_iter([Some(1.0f32), Some(f32::NAN), Some(3.0)])
425 .into_array();
426 array
427 .statistics()
428 .set(Stat::NaNCount, Precision::inexact(ScalarValue::from(1u64)));
429
430 let result = array
431 .apply(&all_non_nan(root()))?
432 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
433 .into_array();
434
435 let expected =
436 ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 3).into_array();
437 assert_arrays_eq!(result, expected, &mut SESSION.create_execution_ctx());
438
439 Ok(())
440 }
441
442 #[test]
443 fn stat_expr_reads_cached_min_and_max() -> VortexResult<()> {
444 let array = buffer![3i32, 1, 2].into_array();
445 array
446 .statistics()
447 .set(Stat::Min, Precision::exact(ScalarValue::from(1i32)));
448 array
449 .statistics()
450 .set(Stat::Max, Precision::exact(ScalarValue::from(3i32)));
451
452 let min_result = array
453 .clone()
454 .apply(&stat(
455 root(),
456 Stat::Min
457 .aggregate_fn()
458 .vortex_expect("min should have an aggregate function"),
459 ))?
460 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
461 .into_array();
462 let expected_min =
463 ConstantArray::new(Scalar::primitive(1i32, Nullability::Nullable), 3).into_array();
464 assert_arrays_eq!(
465 min_result,
466 expected_min,
467 &mut SESSION.create_execution_ctx()
468 );
469
470 let max_result = array
471 .apply(&stat(
472 root(),
473 Stat::Max
474 .aggregate_fn()
475 .vortex_expect("max should have an aggregate function"),
476 ))?
477 .execute::<Canonical>(&mut SESSION.create_execution_ctx())?
478 .into_array();
479 let expected_max =
480 ConstantArray::new(Scalar::primitive(3i32, Nullability::Nullable), 3).into_array();
481 assert_arrays_eq!(
482 max_result,
483 expected_max,
484 &mut SESSION.create_execution_ctx()
485 );
486
487 Ok(())
488 }
489}