Skip to main content

vortex_array/compute/
min_max.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::LazyLock;
5
6use arcref::ArcRef;
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10
11use crate::Array;
12use crate::ArrayRef;
13use crate::IntoArray as _;
14use crate::arrays::ConstantVTable;
15use crate::compute::ComputeFn;
16use crate::compute::ComputeFnVTable;
17use crate::compute::InvocationArgs;
18use crate::compute::Kernel;
19use crate::compute::Output;
20use crate::compute::UnaryArgs;
21use crate::dtype::DType;
22use crate::dtype::FieldNames;
23use crate::dtype::Nullability;
24use crate::dtype::StructFields;
25use crate::expr::stats::Precision;
26use crate::expr::stats::Stat;
27use crate::expr::stats::StatsProvider;
28use crate::scalar::Scalar;
29use crate::vtable::VTable;
30
31static NAMES: LazyLock<FieldNames> = LazyLock::new(|| FieldNames::from(["min", "max"]));
32
33static MIN_MAX_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
34    let compute = ComputeFn::new("min_max".into(), ArcRef::new_ref(&MinMax));
35    for kernel in inventory::iter::<MinMaxKernelRef> {
36        compute.register_kernel(kernel.0.clone());
37    }
38    compute
39});
40
41pub(crate) fn warm_up_vtable() -> usize {
42    MIN_MAX_FN.kernels().len()
43}
44
45/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
46///
47/// The return value dtype is the non-nullable version of the array dtype.
48///
49/// This will update the stats set of this array (as a side effect).
50pub fn min_max(array: &ArrayRef) -> VortexResult<Option<MinMaxResult>> {
51    let scalar = MIN_MAX_FN
52        .invoke(&InvocationArgs {
53            inputs: &[array.into()],
54            options: &(),
55        })?
56        .unwrap_scalar()?;
57    MinMaxResult::from_scalar(scalar)
58}
59
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub struct MinMaxResult {
62    pub min: Scalar,
63    pub max: Scalar,
64}
65
66impl MinMaxResult {
67    pub fn from_scalar(scalar: Scalar) -> VortexResult<Option<Self>> {
68        if scalar.is_null() {
69            Ok(None)
70        } else {
71            let min = scalar
72                .as_struct()
73                .field_by_idx(0)
74                .vortex_expect("missing min field");
75            let max = scalar
76                .as_struct()
77                .field_by_idx(1)
78                .vortex_expect("missing max field");
79            Ok(Some(MinMaxResult { min, max }))
80        }
81    }
82}
83
84pub struct MinMax;
85
86impl ComputeFnVTable for MinMax {
87    fn invoke(
88        &self,
89        args: &InvocationArgs,
90        kernels: &[ArcRef<dyn Kernel>],
91    ) -> VortexResult<Output> {
92        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
93        let array = array.to_array();
94
95        let return_dtype = self.return_dtype(args)?;
96
97        match min_max_impl(&array, kernels)? {
98            None => Ok(Scalar::null(return_dtype).into()),
99            Some(MinMaxResult { min, max }) => {
100                assert!(
101                    min <= max,
102                    "min > max: min={} max={} encoding={}",
103                    min,
104                    max,
105                    array.encoding_id()
106                );
107
108                // Update the stats set with the computed min/max.
109                if let Some(min_value) = min.value() {
110                    array
111                        .statistics()
112                        .set(Stat::Min, Precision::Exact(min_value.clone()));
113                }
114                if let Some(max_value) = max.value() {
115                    array
116                        .statistics()
117                        .set(Stat::Max, Precision::Exact(max_value.clone()));
118                }
119
120                // Return the min/max as a struct scalar
121                Ok(Scalar::struct_(return_dtype, vec![min, max]).into())
122            }
123        }
124    }
125
126    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
127        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
128
129        // We return a min/max struct scalar, where the overall struct is nullable in the case
130        // that the array is all null or empty.
131        Ok(DType::Struct(
132            StructFields::new(
133                NAMES.clone(),
134                vec![
135                    array.dtype().as_nonnullable(),
136                    array.dtype().as_nonnullable(),
137                ],
138            ),
139            Nullability::Nullable,
140        ))
141    }
142
143    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
144        Ok(1)
145    }
146
147    fn is_elementwise(&self) -> bool {
148        false
149    }
150}
151
152fn min_max_impl(
153    array: &ArrayRef,
154    kernels: &[ArcRef<dyn Kernel>],
155) -> VortexResult<Option<MinMaxResult>> {
156    if array.is_empty() || array.valid_count()? == 0 {
157        return Ok(None);
158    }
159
160    if let Some(array) = array.as_opt::<ConstantVTable>() {
161        return ConstantVTable.min_max(array);
162    }
163
164    let min = array
165        .statistics()
166        .get(Stat::Min)
167        .and_then(Precision::as_exact);
168    let max = array
169        .statistics()
170        .get(Stat::Max)
171        .and_then(Precision::as_exact);
172
173    if let Some((min, max)) = min.zip(max) {
174        let non_nullable_dtype = array.dtype().as_nonnullable();
175        return Ok(Some(MinMaxResult {
176            min: min.cast(&non_nullable_dtype)?,
177            max: max.cast(&non_nullable_dtype)?,
178        }));
179    }
180
181    let args = InvocationArgs {
182        inputs: &[array.into()],
183        options: &(),
184    };
185    for kernel in kernels {
186        if let Some(output) = kernel.invoke(&args)? {
187            return MinMaxResult::from_scalar(output.unwrap_scalar()?);
188        }
189    }
190
191    if !array.is_canonical() {
192        let array = array.to_canonical()?.into_array();
193        return min_max(&array);
194    }
195
196    vortex_bail!(NotImplemented: "min_max", array.encoding_id());
197}
198
199/// The minimum and maximum non-null values of an array, or None if there are no non-null/or non-nan
200/// values.
201pub trait MinMaxKernel: VTable {
202    fn min_max(&self, array: &Self::Array) -> VortexResult<Option<MinMaxResult>>;
203}
204
205pub struct MinMaxKernelRef(ArcRef<dyn Kernel>);
206inventory::collect!(MinMaxKernelRef);
207
208#[derive(Debug)]
209pub struct MinMaxKernelAdapter<V: VTable>(pub V);
210
211impl<V: VTable + MinMaxKernel> MinMaxKernelAdapter<V> {
212    pub const fn lift(&'static self) -> MinMaxKernelRef {
213        MinMaxKernelRef(ArcRef::new_ref(self))
214    }
215}
216
217impl<V: VTable + MinMaxKernel> Kernel for MinMaxKernelAdapter<V> {
218    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
219        let inputs = UnaryArgs::<()>::try_from(args)?;
220        let Some(array) = inputs.array.as_opt::<V>() else {
221            return Ok(None);
222        };
223        let non_nullable_dtype = array.dtype().as_nonnullable();
224        let dtype = DType::Struct(
225            StructFields::new(
226                NAMES.clone(),
227                vec![non_nullable_dtype.clone(), non_nullable_dtype],
228            ),
229            Nullability::Nullable,
230        );
231        Ok(Some(match V::min_max(&self.0, array)? {
232            None => Scalar::null(dtype).into(),
233            Some(MinMaxResult { min, max }) => Scalar::struct_(dtype, vec![min, max]).into(),
234        }))
235    }
236}
237
238#[cfg(test)]
239mod tests {
240    use vortex_buffer::BitBuffer;
241    use vortex_buffer::buffer;
242
243    use crate::IntoArray as _;
244    use crate::arrays::BoolArray;
245    use crate::arrays::NullArray;
246    use crate::arrays::PrimitiveArray;
247    use crate::compute::MinMaxResult;
248    use crate::compute::min_max;
249    use crate::validity::Validity;
250
251    #[test]
252    fn test_prim_max() {
253        let p = PrimitiveArray::new(buffer![1, 2, 3], Validity::NonNullable).into_array();
254        assert_eq!(
255            min_max(&p).unwrap(),
256            Some(MinMaxResult {
257                min: 1.into(),
258                max: 3.into()
259            })
260        );
261    }
262
263    #[test]
264    fn test_bool_max() {
265        let p = BoolArray::new(
266            BitBuffer::from([true, true, true].as_slice()),
267            Validity::NonNullable,
268        )
269        .into_array();
270        assert_eq!(
271            min_max(&p).unwrap(),
272            Some(MinMaxResult {
273                min: true.into(),
274                max: true.into()
275            })
276        );
277
278        let p = BoolArray::new(
279            BitBuffer::from([false, false, false].as_slice()),
280            Validity::NonNullable,
281        )
282        .into_array();
283        assert_eq!(
284            min_max(&p).unwrap(),
285            Some(MinMaxResult {
286                min: false.into(),
287                max: false.into()
288            })
289        );
290
291        let p = BoolArray::new(
292            BitBuffer::from([false, true, false].as_slice()),
293            Validity::NonNullable,
294        )
295        .into_array();
296        assert_eq!(
297            min_max(&p).unwrap(),
298            Some(MinMaxResult {
299                min: false.into(),
300                max: true.into()
301            })
302        );
303    }
304
305    #[test]
306    fn test_null() {
307        let p = NullArray::new(1).into_array();
308        assert_eq!(min_max(&p).unwrap(), None);
309    }
310}