Skip to main content

vortex_array/aggregate_fn/fns/min_max/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod bool;
5mod decimal;
6mod extension;
7mod primitive;
8mod varbin;
9
10use std::sync::LazyLock;
11
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_panic;
16use vortex_session::registry::CachedId;
17
18use self::bool::accumulate_bool;
19use self::decimal::accumulate_decimal;
20use self::extension::accumulate_extension;
21use self::primitive::accumulate_primitive;
22use self::varbin::accumulate_varbinview;
23use crate::ArrayRef;
24use crate::Canonical;
25use crate::Columnar;
26use crate::ExecutionCtx;
27use crate::aggregate_fn::Accumulator;
28use crate::aggregate_fn::AggregateFnId;
29use crate::aggregate_fn::AggregateFnVTable;
30use crate::aggregate_fn::DynAccumulator;
31use crate::aggregate_fn::NumericalAggregateOpts;
32use crate::dtype::DType;
33use crate::dtype::FieldNames;
34use crate::dtype::Nullability;
35use crate::dtype::PType;
36use crate::dtype::StructFields;
37use crate::dtype::half::f16;
38use crate::expr::stats::Precision;
39use crate::expr::stats::Stat;
40use crate::expr::stats::StatsProvider;
41use crate::expr::stats::StatsProviderExt;
42use crate::partial_ord::partial_max;
43use crate::partial_ord::partial_min;
44use crate::scalar::Scalar;
45
46static NAMES: LazyLock<FieldNames> = LazyLock::new(|| FieldNames::from(["min", "max"]));
47
48/// The minimum and maximum non-null values of an array, or `None` if there are no non-null values.
49///
50/// NaN handling for float inputs is controlled by [`NumericalAggregateOpts`]: with `skip_nans` (the
51/// default) NaN values are ignored and the cached `Stat::Min`/`Stat::Max` statistics are consulted
52/// and updated. With `skip_nans=false`, any NaN value in a float array poisons both extrema to
53/// NaN; an exact `Stat::NaNCount` statistic shortcircuits the NaN scan in either direction.
54///
55/// The result scalars have the non-nullable version of the array dtype.
56/// This will update the stats set of the array as a side effect.
57pub fn min_max(
58    array: &ArrayRef,
59    ctx: &mut ExecutionCtx,
60    options: NumericalAggregateOpts,
61) -> VortexResult<Option<MinMaxResult>> {
62    if !options.skip_nans && array.dtype().is_float() {
63        match array.statistics().get_as::<u64>(Stat::NaNCount) {
64            // NaN-free: identical to the NaN-skipping path below, including its stat caching.
65            Precision::Exact(0) => {}
66            // At least one NaN value poisons both extrema.
67            Precision::Exact(_) => return Ok(Some(nan_minmax_result(array.dtype()))),
68            _ => {
69                if array.is_empty() || array.valid_count(ctx)? == 0 {
70                    return Ok(None);
71                }
72                // Compute with NaN-including options; the NaN-skipping `Stat::Min`/`Stat::Max`
73                // caches are neither read nor written.
74                let mut acc = Accumulator::try_new(MinMax, options, array.dtype().clone())?;
75                acc.accumulate(array, ctx)?;
76                return MinMaxResult::from_scalar(acc.finish()?);
77            }
78        }
79    }
80
81    // NaN-skipping path. Also reached for NaN-free not-skipping float arrays and all non-float
82    // arrays, where `skip_nans` has no effect.
83
84    // Short-circuit using cached array statistics.
85    let cached_min = array.statistics().get(Stat::Min).as_exact();
86    let cached_max = array.statistics().get(Stat::Max).as_exact();
87    if let Some((min, max)) = cached_min.zip(cached_max) {
88        let non_nullable_dtype = array.dtype().as_nonnullable();
89        return Ok(Some(MinMaxResult {
90            min: min.cast(&non_nullable_dtype)?,
91            max: max.cast(&non_nullable_dtype)?,
92        }));
93    }
94
95    // Short-circuit for empty arrays or all-null arrays.
96    if array.is_empty() || array.valid_count(ctx)? == 0 {
97        return Ok(None);
98    }
99
100    // Short-circuit for dtypes this helper cannot currently compute.
101    if !minmax_compute_supported_dtype(array.dtype()) {
102        return Ok(None);
103    }
104
105    // Compute using Accumulator<MinMax>.
106    let mut acc = Accumulator::try_new(
107        MinMax,
108        NumericalAggregateOpts::default(),
109        array.dtype().clone(),
110    )?;
111    acc.accumulate(array, ctx)?;
112    let result_scalar = acc.finish()?;
113    let result = MinMaxResult::from_scalar(result_scalar)?;
114
115    // Cache the computed min/max as statistics.
116    if let Some(r) = &result {
117        if let Some(min_value) = r.min.value() {
118            array
119                .statistics()
120                .set(Stat::Min, Precision::Exact(min_value.clone()));
121        }
122        if let Some(max_value) = r.max.value() {
123            array
124                .statistics()
125                .set(Stat::Max, Precision::Exact(max_value.clone()));
126        }
127    }
128
129    Ok(result)
130}
131
132/// A `{min: NaN, max: NaN}` result for a poisoned NaN-including min/max over `dtype`.
133fn nan_minmax_result(dtype: &DType) -> MinMaxResult {
134    let nan = nan_scalar(dtype);
135    MinMaxResult {
136        min: nan.clone(),
137        max: nan,
138    }
139}
140
141/// A non-nullable NaN scalar of the float `dtype`.
142pub(crate) fn nan_scalar(dtype: &DType) -> Scalar {
143    match dtype.as_ptype() {
144        PType::F16 => Scalar::primitive(f16::NAN, Nullability::NonNullable),
145        PType::F32 => Scalar::primitive(f32::NAN, Nullability::NonNullable),
146        PType::F64 => Scalar::primitive(f64::NAN, Nullability::NonNullable),
147        _ => vortex_panic!("NaN scalar requested for non-float dtype {dtype}"),
148    }
149}
150
151/// Whether a scalar holds a primitive float NaN value.
152pub(crate) fn scalar_is_nan(scalar: &Scalar) -> bool {
153    if !scalar.dtype().is_float() {
154        return false;
155    }
156
157    scalar.as_primitive_opt().is_some_and(|p| p.is_nan())
158}
159
160/// The minimum and maximum non-null values of an array.
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub struct MinMaxResult {
163    pub min: Scalar,
164    pub max: Scalar,
165}
166
167impl MinMaxResult {
168    /// Extract a `MinMaxResult` from a struct scalar with `{min, max}` fields.
169    pub fn from_scalar(scalar: Scalar) -> VortexResult<Option<Self>> {
170        if scalar.is_null() {
171            Ok(None)
172        } else {
173            let min = scalar
174                .as_struct()
175                .field_by_idx(0)
176                .vortex_expect("missing min field");
177            let max = scalar
178                .as_struct()
179                .field_by_idx(1)
180                .vortex_expect("missing max field");
181            Ok(Some(MinMaxResult { min, max }))
182        }
183    }
184}
185
186/// Compute the min and max of an array.
187///
188/// Returns a nullable struct scalar `{min: T, max: T}` where `T` is the non-nullable input dtype.
189/// The struct is null when the array is empty or all-null.
190///
191/// NaN handling for float inputs is controlled by [`NumericalAggregateOpts`]: with `skip_nans` (the
192/// default) NaN values are ignored, otherwise any NaN value poisons both extrema to NaN.
193#[derive(Clone, Debug)]
194pub struct MinMax;
195
196/// Partial accumulator state for min/max.
197pub struct MinMaxPartial {
198    min: Option<Scalar>,
199    max: Option<Scalar>,
200    element_dtype: DType,
201    skip_nans: bool,
202}
203
204impl MinMaxPartial {
205    /// Merge a local `MinMaxResult` into this partial state.
206    fn merge(&mut self, local: Option<MinMaxResult>) {
207        let Some(MinMaxResult { min, max }) = local else {
208            return;
209        };
210
211        // NaN scalars are incomparable under `partial_min`/`partial_max`, so they are handled
212        // explicitly: a NaN extremum poisons the partial state when NaNs participate, and is
213        // dropped when they are skipped.
214        if scalar_is_nan(&min) || scalar_is_nan(&max) || self.is_poisoned() {
215            if !self.skip_nans {
216                self.poison();
217            }
218            return;
219        }
220
221        self.min = Some(match self.min.take() {
222            Some(current) => partial_min(min, current).vortex_expect("incomparable min scalars"),
223            None => min,
224        });
225
226        self.max = Some(match self.max.take() {
227            Some(current) => partial_max(max, current).vortex_expect("incomparable max scalars"),
228            None => max,
229        });
230    }
231
232    /// Poison the partial state to `{min: NaN, max: NaN}`.
233    fn poison(&mut self) {
234        let nan = nan_scalar(&self.element_dtype);
235        self.min = Some(nan.clone());
236        self.max = Some(nan);
237    }
238
239    /// Whether the partial state is poisoned to NaN.
240    fn is_poisoned(&self) -> bool {
241        self.element_dtype.is_float() && self.min.as_ref().is_some_and(scalar_is_nan)
242    }
243}
244
245/// Creates the struct dtype `{min: T, max: T}` (nullable) used for min/max aggregate results.
246pub fn make_minmax_dtype(element_dtype: &DType) -> DType {
247    DType::Struct(
248        StructFields::new(
249            NAMES.clone(),
250            vec![
251                element_dtype.as_nonnullable(),
252                element_dtype.as_nonnullable(),
253            ],
254        ),
255        Nullability::Nullable,
256    )
257}
258
259fn minmax_supported_dtype(input_dtype: &DType) -> bool {
260    match input_dtype {
261        DType::Bool(_)
262        | DType::Primitive(..)
263        | DType::Decimal(..)
264        | DType::Utf8(..)
265        | DType::Binary(..)
266        | DType::Extension(..) => true,
267        DType::List(element_dtype, _) => minmax_supported_dtype(element_dtype),
268        DType::FixedSizeList(element_dtype, ..) => minmax_supported_dtype(element_dtype),
269        _ => false,
270    }
271}
272
273/// Returns whether [`min_max`] can currently compute extrema for this logical dtype.
274///
275/// This is intentionally narrower than [`minmax_supported_dtype`]. List and fixed-size-list
276/// extrema have a defined output dtype for aggregate expression lowering, but the accumulator does
277/// not yet implement lexicographic list comparison.
278fn minmax_compute_supported_dtype(input_dtype: &DType) -> bool {
279    matches!(
280        input_dtype,
281        DType::Bool(_)
282            | DType::Primitive(..)
283            | DType::Decimal(..)
284            | DType::Utf8(..)
285            | DType::Binary(..)
286            | DType::Extension(..)
287    )
288}
289
290impl AggregateFnVTable for MinMax {
291    type Options = NumericalAggregateOpts;
292    type Partial = MinMaxPartial;
293
294    fn id(&self) -> AggregateFnId {
295        static ID: CachedId = CachedId::new("vortex.min_max");
296        *ID
297    }
298
299    fn serialize(&self, _options: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
300        Ok(None)
301    }
302
303    fn return_dtype(&self, _options: &Self::Options, input_dtype: &DType) -> Option<DType> {
304        minmax_supported_dtype(input_dtype).then(|| make_minmax_dtype(input_dtype))
305    }
306
307    fn partial_dtype(&self, options: &Self::Options, input_dtype: &DType) -> Option<DType> {
308        self.return_dtype(options, input_dtype)
309    }
310
311    fn empty_partial(
312        &self,
313        options: &Self::Options,
314        input_dtype: &DType,
315    ) -> VortexResult<Self::Partial> {
316        Ok(MinMaxPartial {
317            min: None,
318            max: None,
319            element_dtype: input_dtype.clone(),
320            skip_nans: options.skip_nans,
321        })
322    }
323
324    fn combine_partials(&self, partial: &mut Self::Partial, other: Scalar) -> VortexResult<()> {
325        let local = MinMaxResult::from_scalar(other)?;
326        partial.merge(local);
327        Ok(())
328    }
329
330    fn to_scalar(&self, partial: &Self::Partial) -> VortexResult<Scalar> {
331        let dtype = make_minmax_dtype(&partial.element_dtype);
332        Ok(match (&partial.min, &partial.max) {
333            (Some(min), Some(max)) => Scalar::struct_(dtype, vec![min.clone(), max.clone()]),
334            _ => Scalar::null(dtype),
335        })
336    }
337
338    fn reset(&self, partial: &mut Self::Partial) {
339        partial.min = None;
340        partial.max = None;
341    }
342
343    #[inline]
344    fn is_saturated(&self, partial: &Self::Partial) -> bool {
345        // A poisoned NaN-including min/max is fully determined.
346        partial.is_poisoned()
347    }
348
349    fn try_accumulate(
350        &self,
351        partial: &mut Self::Partial,
352        batch: &ArrayRef,
353        _ctx: &mut ExecutionCtx,
354    ) -> VortexResult<bool> {
355        // NaN-aware shortcircuits only apply to NaN-including float min/max; everything else
356        // takes the default dispatch path.
357        if partial.skip_nans || !partial.element_dtype.is_float() {
358            return Ok(false);
359        }
360        match batch.statistics().get_as::<u64>(Stat::NaNCount) {
361            Precision::Exact(0) => {
362                // NaN-free batch: the cached NaN-skipping extrema (if any) are valid.
363                let cached_min = batch.statistics().get(Stat::Min).as_exact();
364                let cached_max = batch.statistics().get(Stat::Max).as_exact();
365                if let Some((min, max)) = cached_min.zip(cached_max) {
366                    // Cached float stats carry the (possibly nullable) array dtype; `to_scalar`
367                    // builds a struct with non-nullable fields, so normalise here.
368                    let non_nullable_dtype = partial.element_dtype.as_nonnullable();
369                    partial.merge(Some(MinMaxResult {
370                        min: min.cast(&non_nullable_dtype)?,
371                        max: max.cast(&non_nullable_dtype)?,
372                    }));
373                    return Ok(true);
374                }
375                Ok(false)
376            }
377            Precision::Exact(_) => {
378                // At least one NaN value poisons both extrema without scanning the batch.
379                partial.poison();
380                Ok(true)
381            }
382            _ => Ok(false),
383        }
384    }
385
386    fn accumulate(
387        &self,
388        partial: &mut Self::Partial,
389        batch: &Columnar,
390        ctx: &mut ExecutionCtx,
391    ) -> VortexResult<()> {
392        match batch {
393            Columnar::Constant(c) => {
394                let scalar = c.scalar();
395                if scalar.is_null() {
396                    return Ok(());
397                }
398                // NaN float constants are skipped or poison the extrema, per the options.
399                if scalar_is_nan(scalar) {
400                    if !partial.skip_nans {
401                        partial.poison();
402                    }
403                    return Ok(());
404                }
405                let non_nullable_dtype = scalar.dtype().as_nonnullable();
406                let cast = scalar.cast(&non_nullable_dtype)?;
407                partial.merge(Some(MinMaxResult {
408                    min: cast.clone(),
409                    max: cast,
410                }));
411                Ok(())
412            }
413            Columnar::Canonical(c) => match c {
414                Canonical::Primitive(p) => accumulate_primitive(partial, p, ctx),
415                Canonical::Bool(b) => accumulate_bool(partial, b, ctx),
416                Canonical::VarBinView(v) => accumulate_varbinview(partial, v, ctx),
417                Canonical::Decimal(d) => accumulate_decimal(partial, d, ctx),
418                Canonical::Extension(e) => accumulate_extension(partial, e, ctx),
419                Canonical::Null(_) => Ok(()),
420                Canonical::Struct(_)
421                | Canonical::List(_)
422                | Canonical::FixedSizeList(_)
423                | Canonical::Variant(_) => {
424                    vortex_bail!("Unsupported canonical type for min_max: {}", batch.dtype())
425                }
426            },
427        }
428    }
429
430    fn finalize(&self, partials: ArrayRef) -> VortexResult<ArrayRef> {
431        Ok(partials)
432    }
433
434    fn finalize_scalar(&self, partial: &Self::Partial) -> VortexResult<Scalar> {
435        self.to_scalar(partial)
436    }
437}
438
439#[cfg(test)]
440mod tests {
441    use std::sync::Arc;
442    use std::sync::LazyLock;
443
444    use vortex_buffer::BitBuffer;
445    use vortex_buffer::buffer;
446    use vortex_error::VortexExpect;
447    use vortex_error::VortexResult;
448    use vortex_session::VortexSession;
449
450    use crate::IntoArray as _;
451    use crate::VortexSessionExecute;
452    use crate::aggregate_fn::Accumulator;
453    use crate::aggregate_fn::AggregateFnVTable;
454    use crate::aggregate_fn::DynAccumulator;
455    use crate::aggregate_fn::NumericalAggregateOpts;
456    use crate::aggregate_fn::fns::min_max::MinMax;
457    use crate::aggregate_fn::fns::min_max::MinMaxResult;
458    use crate::aggregate_fn::fns::min_max::make_minmax_dtype;
459    use crate::aggregate_fn::fns::min_max::min_max;
460    use crate::arrays::BoolArray;
461    use crate::arrays::ChunkedArray;
462    use crate::arrays::ConstantArray;
463    use crate::arrays::DecimalArray;
464    use crate::arrays::FixedSizeListArray;
465    use crate::arrays::ListArray;
466    use crate::arrays::NullArray;
467    use crate::arrays::PrimitiveArray;
468    use crate::arrays::VarBinArray;
469    use crate::dtype::DType;
470    use crate::dtype::DecimalDType;
471    use crate::dtype::Nullability;
472    use crate::dtype::PType;
473    use crate::expr::stats::Precision;
474    use crate::expr::stats::Stat;
475    use crate::scalar::DecimalValue;
476    use crate::scalar::Scalar;
477    use crate::scalar::ScalarValue;
478    use crate::validity::Validity;
479
480    static SESSION: LazyLock<VortexSession> = LazyLock::new(vortex_array::array_session);
481
482    #[test]
483    fn test_prim_min_max() -> VortexResult<()> {
484        let p = PrimitiveArray::new(buffer![1, 2, 3], Validity::NonNullable).into_array();
485        let mut ctx = SESSION.create_execution_ctx();
486        assert_eq!(
487            min_max(&p, &mut ctx, NumericalAggregateOpts::default())?,
488            Some(MinMaxResult {
489                min: 1.into(),
490                max: 3.into()
491            })
492        );
493        Ok(())
494    }
495
496    #[test]
497    fn test_prim_min_max_multiple_null_runs() -> VortexResult<()> {
498        // Several disjoint valid runs separated by nulls exercise the per-run fold; the extrema
499        // (min 1, max 9) fall in different runs.
500        let p = PrimitiveArray::from_option_iter([
501            Some(5i32),
502            Some(3),
503            None,
504            None,
505            Some(9),
506            None,
507            Some(1),
508            Some(7),
509        ])
510        .into_array();
511        let mut ctx = SESSION.create_execution_ctx();
512        assert_eq!(
513            min_max(&p, &mut ctx, NumericalAggregateOpts::default())?,
514            Some(MinMaxResult {
515                min: 1.into(),
516                max: 9.into()
517            })
518        );
519        Ok(())
520    }
521
522    #[test]
523    fn test_bool_min_max() -> VortexResult<()> {
524        let mut ctx = SESSION.create_execution_ctx();
525
526        let all_true = BoolArray::new(
527            BitBuffer::from([true, true, true].as_slice()),
528            Validity::NonNullable,
529        )
530        .into_array();
531        assert_eq!(
532            min_max(&all_true, &mut ctx, NumericalAggregateOpts::default())?,
533            Some(MinMaxResult {
534                min: true.into(),
535                max: true.into()
536            })
537        );
538
539        let all_false = BoolArray::new(
540            BitBuffer::from([false, false, false].as_slice()),
541            Validity::NonNullable,
542        )
543        .into_array();
544        assert_eq!(
545            min_max(&all_false, &mut ctx, NumericalAggregateOpts::default())?,
546            Some(MinMaxResult {
547                min: false.into(),
548                max: false.into()
549            })
550        );
551
552        let mixed = BoolArray::new(
553            BitBuffer::from([false, true, false].as_slice()),
554            Validity::NonNullable,
555        )
556        .into_array();
557        assert_eq!(
558            min_max(&mixed, &mut ctx, NumericalAggregateOpts::default())?,
559            Some(MinMaxResult {
560                min: false.into(),
561                max: true.into()
562            })
563        );
564        Ok(())
565    }
566
567    #[test]
568    fn test_null_array() -> VortexResult<()> {
569        let p = NullArray::new(1).into_array();
570        let mut ctx = SESSION.create_execution_ctx();
571        assert_eq!(
572            min_max(&p, &mut ctx, NumericalAggregateOpts::default())?,
573            None
574        );
575        Ok(())
576    }
577
578    #[test]
579    fn test_prim_nan() -> VortexResult<()> {
580        let array = PrimitiveArray::new(
581            buffer![f32::NAN, -f32::NAN, -1.0, 1.0],
582            Validity::NonNullable,
583        );
584        let mut ctx = SESSION.create_execution_ctx();
585        let result = min_max(
586            &array.into_array(),
587            &mut ctx,
588            NumericalAggregateOpts::default(),
589        )?
590        .vortex_expect("should have result");
591        assert_eq!(f32::try_from(&result.min)?, -1.0);
592        assert_eq!(f32::try_from(&result.max)?, 1.0);
593        Ok(())
594    }
595
596    #[test]
597    fn test_prim_inf() -> VortexResult<()> {
598        let array = PrimitiveArray::new(
599            buffer![f32::INFINITY, f32::NEG_INFINITY, -1.0, 1.0],
600            Validity::NonNullable,
601        );
602        let mut ctx = SESSION.create_execution_ctx();
603        let result = min_max(
604            &array.into_array(),
605            &mut ctx,
606            NumericalAggregateOpts::default(),
607        )?
608        .vortex_expect("should have result");
609        assert_eq!(f32::try_from(&result.min)?, f32::NEG_INFINITY);
610        assert_eq!(f32::try_from(&result.max)?, f32::INFINITY);
611        Ok(())
612    }
613
614    #[test]
615    fn test_multi_batch() -> VortexResult<()> {
616        let mut ctx = SESSION.create_execution_ctx();
617        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
618        let mut acc = Accumulator::try_new(MinMax, NumericalAggregateOpts::default(), dtype)?;
619
620        let batch1 = PrimitiveArray::new(buffer![10i32, 20, 5], Validity::NonNullable).into_array();
621        acc.accumulate(&batch1, &mut ctx)?;
622
623        let batch2 = PrimitiveArray::new(buffer![3i32, 25], Validity::NonNullable).into_array();
624        acc.accumulate(&batch2, &mut ctx)?;
625
626        let result = MinMaxResult::from_scalar(acc.finish()?)?.vortex_expect("should have result");
627        assert_eq!(result.min, Scalar::from(3i32));
628        assert_eq!(result.max, Scalar::from(25i32));
629        Ok(())
630    }
631
632    #[test]
633    fn test_finish_resets_state() -> VortexResult<()> {
634        let mut ctx = SESSION.create_execution_ctx();
635        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
636        let mut acc = Accumulator::try_new(MinMax, NumericalAggregateOpts::default(), dtype)?;
637
638        let batch1 = PrimitiveArray::new(buffer![10i32, 20], Validity::NonNullable).into_array();
639        acc.accumulate(&batch1, &mut ctx)?;
640        let result1 = MinMaxResult::from_scalar(acc.finish()?)?.vortex_expect("should have result");
641        assert_eq!(result1.min, Scalar::from(10i32));
642        assert_eq!(result1.max, Scalar::from(20i32));
643
644        let batch2 = PrimitiveArray::new(buffer![3i32, 6, 9], Validity::NonNullable).into_array();
645        acc.accumulate(&batch2, &mut ctx)?;
646        let result2 = MinMaxResult::from_scalar(acc.finish()?)?.vortex_expect("should have result");
647        assert_eq!(result2.min, Scalar::from(3i32));
648        assert_eq!(result2.max, Scalar::from(9i32));
649        Ok(())
650    }
651
652    #[test]
653    fn test_state_merge() -> VortexResult<()> {
654        let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
655        let mut state = MinMax.empty_partial(&NumericalAggregateOpts::default(), &dtype)?;
656
657        let struct_dtype = make_minmax_dtype(&dtype);
658        let scalar1 = Scalar::struct_(
659            struct_dtype.clone(),
660            vec![Scalar::from(5i32), Scalar::from(15i32)],
661        );
662        MinMax.combine_partials(&mut state, scalar1)?;
663
664        let scalar2 = Scalar::struct_(struct_dtype, vec![Scalar::from(2i32), Scalar::from(10i32)]);
665        MinMax.combine_partials(&mut state, scalar2)?;
666
667        let result = MinMaxResult::from_scalar(MinMax.to_scalar(&state)?)?
668            .vortex_expect("should have result");
669        assert_eq!(result.min, Scalar::from(2i32));
670        assert_eq!(result.max, Scalar::from(15i32));
671        Ok(())
672    }
673
674    #[test]
675    fn test_constant_nan() -> VortexResult<()> {
676        let scalar = Scalar::primitive(f16::NAN, Nullability::NonNullable);
677        let array = ConstantArray::new(scalar, 2).into_array();
678        let mut ctx = SESSION.create_execution_ctx();
679        assert_eq!(
680            min_max(&array, &mut ctx, NumericalAggregateOpts::default())?,
681            None
682        );
683        Ok(())
684    }
685
686    const KEEP_NANS: NumericalAggregateOpts = NumericalAggregateOpts::include_nans();
687
688    fn assert_poisoned(result: Option<MinMaxResult>) -> VortexResult<()> {
689        let result = result.vortex_expect("should have result");
690        assert!(f64::try_from(&result.min.cast(&result.min.dtype().as_nullable())?)?.is_nan());
691        assert!(f64::try_from(&result.max.cast(&result.max.dtype().as_nullable())?)?.is_nan());
692        Ok(())
693    }
694
695    #[test]
696    fn test_prim_nan_not_skipping() -> VortexResult<()> {
697        let array = PrimitiveArray::new(
698            buffer![f32::NAN, -f32::NAN, -1.0, 1.0],
699            Validity::NonNullable,
700        )
701        .into_array();
702        let mut ctx = SESSION.create_execution_ctx();
703        assert_poisoned(min_max(&array, &mut ctx, KEEP_NANS)?)
704    }
705
706    #[test]
707    fn test_prim_no_nan_not_skipping() -> VortexResult<()> {
708        let array =
709            PrimitiveArray::new(buffer![3.0f32, -1.0, 1.0], Validity::NonNullable).into_array();
710        let mut ctx = SESSION.create_execution_ctx();
711        let result = min_max(&array, &mut ctx, KEEP_NANS)?.vortex_expect("should have result");
712        assert_eq!(f32::try_from(&result.min)?, -1.0);
713        assert_eq!(f32::try_from(&result.max)?, 3.0);
714        Ok(())
715    }
716
717    #[test]
718    fn test_constant_nan_not_skipping() -> VortexResult<()> {
719        let scalar = Scalar::primitive(f64::NAN, Nullability::NonNullable);
720        let array = ConstantArray::new(scalar, 2).into_array();
721        let mut ctx = SESSION.create_execution_ctx();
722        assert_poisoned(min_max(&array, &mut ctx, KEEP_NANS)?)
723    }
724
725    #[test]
726    fn test_not_skipping_shortcircuits_on_exact_nan_count_stat() -> VortexResult<()> {
727        // The array has no NaNs; a planted exact NaNCount stat proves the poisoning came from
728        // the stat rather than a scan.
729        let array =
730            PrimitiveArray::new(buffer![1.0f64, 2.0, 3.0], Validity::NonNullable).into_array();
731        array
732            .statistics()
733            .set(Stat::NaNCount, Precision::Exact(ScalarValue::from(2u64)));
734        let mut ctx = SESSION.create_execution_ctx();
735        assert_poisoned(min_max(&array, &mut ctx, KEEP_NANS)?)
736    }
737
738    #[test]
739    fn test_not_skipping_uses_cached_stats_when_nan_free() -> VortexResult<()> {
740        // With an exact NaNCount of zero, the planted exact Min/Max stats are usable as-is.
741        let array =
742            PrimitiveArray::new(buffer![1.0f64, 2.0, 3.0], Validity::NonNullable).into_array();
743        array
744            .statistics()
745            .set(Stat::NaNCount, Precision::Exact(ScalarValue::from(0u64)));
746        array
747            .statistics()
748            .set(Stat::Min, Precision::Exact(ScalarValue::from(-10.0f64)));
749        array
750            .statistics()
751            .set(Stat::Max, Precision::Exact(ScalarValue::from(10.0f64)));
752        let mut ctx = SESSION.create_execution_ctx();
753        let result = min_max(&array, &mut ctx, KEEP_NANS)?.vortex_expect("should have result");
754        assert_eq!(f64::try_from(&result.min)?, -10.0);
755        assert_eq!(f64::try_from(&result.max)?, 10.0);
756        Ok(())
757    }
758
759    #[test]
760    fn test_accumulator_nan_including_nullable_cached_stats() -> VortexResult<()> {
761        // A nullable float array's cached Min/Max stats are reconstructed as nullable scalars.
762        // The NaN-including accumulator shortcircuit must normalise them to the non-nullable
763        // struct field dtype before building the result scalar.
764        let mut ctx = SESSION.create_execution_ctx();
765        let array =
766            PrimitiveArray::from_option_iter([Some(1.0f64), Some(2.0), Some(3.0)]).into_array();
767        array
768            .statistics()
769            .set(Stat::NaNCount, Precision::Exact(ScalarValue::from(0u64)));
770        array
771            .statistics()
772            .set(Stat::Min, Precision::Exact(ScalarValue::from(1.0f64)));
773        array
774            .statistics()
775            .set(Stat::Max, Precision::Exact(ScalarValue::from(3.0f64)));
776
777        let mut acc = Accumulator::try_new(MinMax, KEEP_NANS, array.dtype().clone())?;
778        acc.accumulate(&array, &mut ctx)?;
779        let result = MinMaxResult::from_scalar(acc.finish()?)?.vortex_expect("should have result");
780        assert_eq!(f64::try_from(&result.min)?, 1.0);
781        assert_eq!(f64::try_from(&result.max)?, 3.0);
782        Ok(())
783    }
784
785    #[test]
786    fn test_multi_batch_nan_poisoning() -> VortexResult<()> {
787        let mut ctx = SESSION.create_execution_ctx();
788        let dtype = DType::Primitive(PType::F64, Nullability::NonNullable);
789        let mut acc = Accumulator::try_new(MinMax, KEEP_NANS, dtype)?;
790
791        let batch1 = PrimitiveArray::new(buffer![1.0f64, 2.0], Validity::NonNullable).into_array();
792        acc.accumulate(&batch1, &mut ctx)?;
793        assert!(!acc.is_saturated());
794
795        let batch2 = PrimitiveArray::new(buffer![f64::NAN], Validity::NonNullable).into_array();
796        acc.accumulate(&batch2, &mut ctx)?;
797        assert!(acc.is_saturated());
798
799        assert_poisoned(MinMaxResult::from_scalar(acc.finish()?)?)
800    }
801
802    #[test]
803    fn test_chunked() -> VortexResult<()> {
804        let chunk1 = PrimitiveArray::from_option_iter([Some(5i32), None, Some(1)]);
805        let chunk2 = PrimitiveArray::from_option_iter([Some(10i32), Some(3), None]);
806        let dtype = chunk1.dtype().clone();
807        let chunked = ChunkedArray::try_new(vec![chunk1.into_array(), chunk2.into_array()], dtype)?;
808        let mut ctx = SESSION.create_execution_ctx();
809        let result = min_max(
810            &chunked.into_array(),
811            &mut ctx,
812            NumericalAggregateOpts::default(),
813        )?
814        .vortex_expect("should have result");
815        assert_eq!(result.min, Scalar::from(1i32));
816        assert_eq!(result.max, Scalar::from(10i32));
817        Ok(())
818    }
819
820    #[test]
821    fn test_all_null() -> VortexResult<()> {
822        let p = PrimitiveArray::from_option_iter::<i32, _>([None, None, None]);
823        let mut ctx = SESSION.create_execution_ctx();
824        assert_eq!(
825            min_max(&p.into_array(), &mut ctx, NumericalAggregateOpts::default())?,
826            None
827        );
828        Ok(())
829    }
830
831    #[test]
832    fn test_varbin() -> VortexResult<()> {
833        let array = VarBinArray::from_iter(
834            vec![
835                Some("hello world"),
836                None,
837                Some("hello world this is a long string"),
838                None,
839            ],
840            DType::Utf8(Nullability::Nullable),
841        );
842        let mut ctx = SESSION.create_execution_ctx();
843        let result = min_max(
844            &array.into_array(),
845            &mut ctx,
846            NumericalAggregateOpts::default(),
847        )?
848        .vortex_expect("should have result");
849        assert_eq!(
850            result.min,
851            Scalar::utf8("hello world", Nullability::NonNullable)
852        );
853        assert_eq!(
854            result.max,
855            Scalar::utf8(
856                "hello world this is a long string",
857                Nullability::NonNullable
858            )
859        );
860        Ok(())
861    }
862
863    #[test]
864    fn test_decimal() -> VortexResult<()> {
865        let decimal = DecimalArray::new(
866            buffer![100i32, 2000i32, 200i32],
867            DecimalDType::new(4, 2),
868            Validity::from_iter([true, false, true]),
869        );
870        let mut ctx = SESSION.create_execution_ctx();
871        let result = min_max(
872            &decimal.into_array(),
873            &mut ctx,
874            NumericalAggregateOpts::default(),
875        )?
876        .vortex_expect("should have result");
877
878        let non_nullable_dtype = DType::Decimal(DecimalDType::new(4, 2), Nullability::NonNullable);
879        let expected_min = Scalar::try_new(
880            non_nullable_dtype.clone(),
881            Some(ScalarValue::from(DecimalValue::from(100i32))),
882        )?;
883        let expected_max = Scalar::try_new(
884            non_nullable_dtype,
885            Some(ScalarValue::from(DecimalValue::from(200i32))),
886        )?;
887        assert_eq!(result.min, expected_min);
888        assert_eq!(result.max, expected_max);
889        Ok(())
890    }
891
892    #[test]
893    fn list_and_fixed_size_list_return_dtype() {
894        let element_dtype = DType::Primitive(PType::I32, Nullability::Nullable);
895        let list_dtype = DType::List(Arc::new(element_dtype.clone()), Nullability::Nullable);
896        let fixed_size_list_dtype =
897            DType::FixedSizeList(Arc::new(element_dtype), 1, Nullability::Nullable);
898
899        assert_eq!(
900            MinMax.return_dtype(&NumericalAggregateOpts::default(), &list_dtype),
901            Some(make_minmax_dtype(&list_dtype))
902        );
903        assert_eq!(
904            MinMax.return_dtype(&NumericalAggregateOpts::default(), &fixed_size_list_dtype),
905            Some(make_minmax_dtype(&fixed_size_list_dtype))
906        );
907    }
908
909    #[test]
910    fn list_and_fixed_size_list_min_max_returns_none() -> VortexResult<()> {
911        let mut ctx = SESSION.create_execution_ctx();
912
913        let list_array = ListArray::try_new(
914            buffer![1i32, 2, 3].into_array(),
915            buffer![0u32, 2, 3].into_array(),
916            Validity::NonNullable,
917        )?
918        .into_array();
919        assert_eq!(
920            min_max(&list_array, &mut ctx, NumericalAggregateOpts::default())?,
921            None
922        );
923
924        let fixed_size_list_array = FixedSizeListArray::try_new(
925            buffer![1i32, 2, 3, 4].into_array(),
926            2,
927            Validity::NonNullable,
928            2,
929        )?
930        .into_array();
931        assert_eq!(
932            min_max(
933                &fixed_size_list_array,
934                &mut ctx,
935                NumericalAggregateOpts::default()
936            )?,
937            None
938        );
939
940        Ok(())
941    }
942
943    use crate::dtype::half::f16;
944
945    #[test]
946    fn test_bool_with_nulls() -> VortexResult<()> {
947        let mut ctx = SESSION.create_execution_ctx();
948
949        let result = min_max(
950            &BoolArray::from_iter(vec![Some(true), Some(true), None, None]).into_array(),
951            &mut ctx,
952            NumericalAggregateOpts::default(),
953        )?;
954        assert_eq!(
955            result,
956            Some(MinMaxResult {
957                min: Scalar::bool(true, Nullability::NonNullable),
958                max: Scalar::bool(true, Nullability::NonNullable),
959            })
960        );
961
962        let result = min_max(
963            &BoolArray::from_iter(vec![None, Some(true), Some(true)]).into_array(),
964            &mut ctx,
965            NumericalAggregateOpts::default(),
966        )?;
967        assert_eq!(
968            result,
969            Some(MinMaxResult {
970                min: Scalar::bool(true, Nullability::NonNullable),
971                max: Scalar::bool(true, Nullability::NonNullable),
972            })
973        );
974
975        let result = min_max(
976            &BoolArray::from_iter(vec![None, Some(true), Some(true), None]).into_array(),
977            &mut ctx,
978            NumericalAggregateOpts::default(),
979        )?;
980        assert_eq!(
981            result,
982            Some(MinMaxResult {
983                min: Scalar::bool(true, Nullability::NonNullable),
984                max: Scalar::bool(true, Nullability::NonNullable),
985            })
986        );
987
988        let result = min_max(
989            &BoolArray::from_iter(vec![Some(false), Some(false), None, None]).into_array(),
990            &mut ctx,
991            NumericalAggregateOpts::default(),
992        )?;
993        assert_eq!(
994            result,
995            Some(MinMaxResult {
996                min: Scalar::bool(false, Nullability::NonNullable),
997                max: Scalar::bool(false, Nullability::NonNullable),
998            })
999        );
1000        Ok(())
1001    }
1002
1003    /// Regression test for <https://github.com/vortex-data/vortex/issues/7074>.
1004    ///
1005    /// A chunked all-true bool array with an empty first chunk returned min=false because
1006    /// `accumulate_bool` on the empty chunk incorrectly merged min=false,max=false into the
1007    /// partial state.
1008    #[test]
1009    fn test_bool_chunked_with_empty_chunk() -> VortexResult<()> {
1010        let mut ctx = SESSION.create_execution_ctx();
1011
1012        let empty = BoolArray::new(BitBuffer::from([].as_slice()), Validity::NonNullable);
1013        let chunk1 = BoolArray::new(
1014            BitBuffer::from([true, true].as_slice()),
1015            Validity::NonNullable,
1016        );
1017        let chunk2 = BoolArray::new(
1018            BitBuffer::from([true, true, true].as_slice()),
1019            Validity::NonNullable,
1020        );
1021        let chunked = ChunkedArray::try_new(
1022            vec![empty.into_array(), chunk1.into_array(), chunk2.into_array()],
1023            DType::Bool(Nullability::NonNullable),
1024        )?;
1025
1026        let result = min_max(
1027            &chunked.into_array(),
1028            &mut ctx,
1029            NumericalAggregateOpts::default(),
1030        )?;
1031        assert_eq!(
1032            result,
1033            Some(MinMaxResult {
1034                min: Scalar::bool(true, Nullability::NonNullable),
1035                max: Scalar::bool(true, Nullability::NonNullable),
1036            })
1037        );
1038        Ok(())
1039    }
1040
1041    /// Regression test for <https://github.com/vortex-data/vortex/issues/8145>.
1042    ///
1043    /// A chunked array whose first chunk is an *empty* constant array — as produced by
1044    /// `fill_null` on an empty all-null chunk — returned `max = u32::MAX` because
1045    /// `ChunkedArrayAggregate` accumulated the empty chunk, folding its fill scalar into the
1046    /// running min/max. Empty chunks are now skipped during chunked aggregation.
1047    #[test]
1048    fn test_chunked_with_empty_constant_chunk() -> VortexResult<()> {
1049        let mut ctx = SESSION.create_execution_ctx();
1050
1051        let empty = ConstantArray::new(Scalar::primitive(u32::MAX, Nullability::NonNullable), 0)
1052            .into_array();
1053        let chunk1 = PrimitiveArray::new(buffer![7631471u32], Validity::NonNullable).into_array();
1054        let chunk2 = PrimitiveArray::new(buffer![0u32], Validity::NonNullable).into_array();
1055        let chunked = ChunkedArray::try_new(
1056            vec![empty, chunk1, chunk2],
1057            DType::Primitive(PType::U32, Nullability::NonNullable),
1058        )?;
1059
1060        assert_eq!(
1061            min_max(
1062                &chunked.into_array(),
1063                &mut ctx,
1064                NumericalAggregateOpts::default()
1065            )?,
1066            Some(MinMaxResult {
1067                min: Scalar::primitive(0u32, Nullability::NonNullable),
1068                max: Scalar::primitive(7631471u32, Nullability::NonNullable),
1069            })
1070        );
1071        Ok(())
1072    }
1073
1074    #[test]
1075    fn test_varbin_all_nulls() -> VortexResult<()> {
1076        let array = VarBinArray::from_iter(
1077            vec![Option::<&str>::None, None, None],
1078            DType::Utf8(Nullability::Nullable),
1079        );
1080        let mut ctx = SESSION.create_execution_ctx();
1081        assert_eq!(
1082            min_max(
1083                &array.into_array(),
1084                &mut ctx,
1085                NumericalAggregateOpts::default()
1086            )?,
1087            None
1088        );
1089        Ok(())
1090    }
1091}