vortex_array/compute/
min_max.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::LazyLock;
5
6use arcref::ArcRef;
7use vortex_dtype::{DType, Nullability, StructFields};
8use vortex_error::{VortexExpect, VortexResult, vortex_bail};
9use vortex_scalar::Scalar;
10
11use crate::Array;
12use crate::arrays::ConstantVTable;
13use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output, UnaryArgs};
14use crate::stats::{Precision, Stat, StatsProvider};
15use crate::vtable::VTable;
16
17static MIN_MAX_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
18    let compute = ComputeFn::new("min_max".into(), ArcRef::new_ref(&MinMax));
19    for kernel in inventory::iter::<MinMaxKernelRef> {
20        compute.register_kernel(kernel.0.clone());
21    }
22    compute
23});
24
25pub(crate) fn warm_up_vtable() -> usize {
26    MIN_MAX_FN.kernels().len()
27}
28
29/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
30///
31/// The return value dtype is the non-nullable version of the array dtype.
32///
33/// This will update the stats set of this array (as a side effect).
34pub fn min_max(array: &dyn Array) -> VortexResult<Option<MinMaxResult>> {
35    let scalar = MIN_MAX_FN
36        .invoke(&InvocationArgs {
37            inputs: &[array.into()],
38            options: &(),
39        })?
40        .unwrap_scalar()?;
41    MinMaxResult::from_scalar(scalar)
42}
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub struct MinMaxResult {
46    pub min: Scalar,
47    pub max: Scalar,
48}
49
50impl MinMaxResult {
51    pub fn from_scalar(scalar: Scalar) -> VortexResult<Option<Self>> {
52        if scalar.is_null() {
53            Ok(None)
54        } else {
55            let min = scalar
56                .as_struct()
57                .field_by_idx(0)
58                .vortex_expect("missing min field");
59            let max = scalar
60                .as_struct()
61                .field_by_idx(1)
62                .vortex_expect("missing max field");
63            Ok(Some(MinMaxResult { min, max }))
64        }
65    }
66}
67
68pub struct MinMax;
69
70impl ComputeFnVTable for MinMax {
71    fn invoke(
72        &self,
73        args: &InvocationArgs,
74        kernels: &[ArcRef<dyn Kernel>],
75    ) -> VortexResult<Output> {
76        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
77
78        let return_dtype = self.return_dtype(args)?;
79
80        match min_max_impl(array, kernels)? {
81            None => Ok(Scalar::null(return_dtype).into()),
82            Some(MinMaxResult { min, max }) => {
83                assert!(
84                    min <= max,
85                    "min > max: min={} max={} encoding={}",
86                    min,
87                    max,
88                    array.encoding_id()
89                );
90
91                // Update the stats set with the computed min/max
92                array
93                    .statistics()
94                    .set(Stat::Min, Precision::Exact(min.value().clone()));
95                array
96                    .statistics()
97                    .set(Stat::Max, Precision::Exact(max.value().clone()));
98
99                // Return the min/max as a struct scalar
100                Ok(Scalar::struct_(return_dtype, vec![min, max]).into())
101            }
102        }
103    }
104
105    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
106        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
107
108        // We return a min/max struct scalar, where the overall struct is nullable in the case
109        // that the array is all null or empty.
110        Ok(DType::Struct(
111            StructFields::new(
112                ["min", "max"].into(),
113                vec![
114                    array.dtype().as_nonnullable(),
115                    array.dtype().as_nonnullable(),
116                ],
117            ),
118            Nullability::Nullable,
119        ))
120    }
121
122    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
123        Ok(1)
124    }
125
126    fn is_elementwise(&self) -> bool {
127        false
128    }
129}
130
131fn min_max_impl(
132    array: &dyn Array,
133    kernels: &[ArcRef<dyn Kernel>],
134) -> VortexResult<Option<MinMaxResult>> {
135    if array.is_empty() || array.valid_count() == 0 {
136        return Ok(None);
137    }
138
139    if let Some(array) = array.as_opt::<ConstantVTable>() {
140        return ConstantVTable.min_max(array);
141    }
142
143    let min = array
144        .statistics()
145        .get(Stat::Min)
146        .and_then(Precision::as_exact);
147    let max = array
148        .statistics()
149        .get(Stat::Max)
150        .and_then(Precision::as_exact);
151
152    if let Some((min, max)) = min.zip(max) {
153        let non_nullable_dtype = array.dtype().as_nonnullable();
154        return Ok(Some(MinMaxResult {
155            min: min.cast(&non_nullable_dtype)?,
156            max: max.cast(&non_nullable_dtype)?,
157        }));
158    }
159
160    let args = InvocationArgs {
161        inputs: &[array.into()],
162        options: &(),
163    };
164    for kernel in kernels {
165        if let Some(output) = kernel.invoke(&args)? {
166            return MinMaxResult::from_scalar(output.unwrap_scalar()?);
167        }
168    }
169    if let Some(output) = array.invoke(&MIN_MAX_FN, &args)? {
170        return MinMaxResult::from_scalar(output.unwrap_scalar()?);
171    }
172
173    if !array.is_canonical() {
174        let array = array.to_canonical();
175        return min_max(array.as_ref());
176    }
177
178    vortex_bail!(NotImplemented: "min_max", array.encoding_id());
179}
180
181/// The minimum and maximum non-null values of an array, or None if there are no non-null/or non-nan
182/// values.
183pub trait MinMaxKernel: VTable {
184    fn min_max(&self, array: &Self::Array) -> VortexResult<Option<MinMaxResult>>;
185}
186
187pub struct MinMaxKernelRef(ArcRef<dyn Kernel>);
188inventory::collect!(MinMaxKernelRef);
189
190#[derive(Debug)]
191pub struct MinMaxKernelAdapter<V: VTable>(pub V);
192
193impl<V: VTable + MinMaxKernel> MinMaxKernelAdapter<V> {
194    pub const fn lift(&'static self) -> MinMaxKernelRef {
195        MinMaxKernelRef(ArcRef::new_ref(self))
196    }
197}
198
199impl<V: VTable + MinMaxKernel> Kernel for MinMaxKernelAdapter<V> {
200    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
201        let inputs = UnaryArgs::<()>::try_from(args)?;
202        let Some(array) = inputs.array.as_opt::<V>() else {
203            return Ok(None);
204        };
205        let non_nullable_dtype = array.dtype().as_nonnullable();
206        let dtype = DType::Struct(
207            StructFields::new(
208                ["min", "max"].into(),
209                vec![non_nullable_dtype.clone(), non_nullable_dtype],
210            ),
211            Nullability::Nullable,
212        );
213        Ok(Some(match V::min_max(&self.0, array)? {
214            None => Scalar::null(dtype).into(),
215            Some(MinMaxResult { min, max }) => Scalar::struct_(dtype, vec![min, max]).into(),
216        }))
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use vortex_buffer::{BitBuffer, buffer};
223
224    use crate::arrays::{BoolArray, NullArray, PrimitiveArray};
225    use crate::compute::{MinMaxResult, min_max};
226    use crate::validity::Validity;
227
228    #[test]
229    fn test_prim_max() {
230        let p = PrimitiveArray::new(buffer![1, 2, 3], Validity::NonNullable);
231        assert_eq!(
232            min_max(p.as_ref()).unwrap(),
233            Some(MinMaxResult {
234                min: 1.into(),
235                max: 3.into()
236            })
237        );
238    }
239
240    #[test]
241    fn test_bool_max() {
242        let p = BoolArray::from_bit_buffer(
243            BitBuffer::from([true, true, true].as_slice()),
244            Validity::NonNullable,
245        );
246        assert_eq!(
247            min_max(p.as_ref()).unwrap(),
248            Some(MinMaxResult {
249                min: true.into(),
250                max: true.into()
251            })
252        );
253
254        let p = BoolArray::from_bit_buffer(
255            BitBuffer::from([false, false, false].as_slice()),
256            Validity::NonNullable,
257        );
258        assert_eq!(
259            min_max(p.as_ref()).unwrap(),
260            Some(MinMaxResult {
261                min: false.into(),
262                max: false.into()
263            })
264        );
265
266        let p = BoolArray::from_bit_buffer(
267            BitBuffer::from([false, true, false].as_slice()),
268            Validity::NonNullable,
269        );
270        assert_eq!(
271            min_max(p.as_ref()).unwrap(),
272            Some(MinMaxResult {
273                min: false.into(),
274                max: true.into()
275            })
276        );
277    }
278
279    #[test]
280    fn test_null() {
281        let p = NullArray::new(1);
282        assert_eq!(min_max(p.as_ref()).unwrap(), None);
283    }
284}