vortex_array/compute/
min_max.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::LazyLock;
5
6use arcref::ArcRef;
7use vortex_dtype::{DType, Nullability, StructFields};
8use vortex_error::{VortexExpect, VortexResult, vortex_bail};
9use vortex_scalar::Scalar;
10
11use crate::Array;
12use crate::arrays::ConstantVTable;
13use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output, UnaryArgs};
14use crate::stats::{Precision, Stat, StatsProvider};
15use crate::vtable::VTable;
16
17static MIN_MAX_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
18    let compute = ComputeFn::new("min_max".into(), ArcRef::new_ref(&MinMax));
19    for kernel in inventory::iter::<MinMaxKernelRef> {
20        compute.register_kernel(kernel.0.clone());
21    }
22    compute
23});
24
25/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
26///
27/// The return value dtype is the non-nullable version of the array dtype.
28///
29/// This will update the stats set of this array (as a side effect).
30pub fn min_max(array: &dyn Array) -> VortexResult<Option<MinMaxResult>> {
31    let scalar = MIN_MAX_FN
32        .invoke(&InvocationArgs {
33            inputs: &[array.into()],
34            options: &(),
35        })?
36        .unwrap_scalar()?;
37    MinMaxResult::from_scalar(scalar)
38}
39
40#[derive(Debug, Clone, PartialEq, Eq)]
41pub struct MinMaxResult {
42    pub min: Scalar,
43    pub max: Scalar,
44}
45
46impl MinMaxResult {
47    pub fn from_scalar(scalar: Scalar) -> VortexResult<Option<Self>> {
48        if scalar.is_null() {
49            Ok(None)
50        } else {
51            let min = scalar
52                .as_struct()
53                .field_by_idx(0)
54                .vortex_expect("missing min field");
55            let max = scalar
56                .as_struct()
57                .field_by_idx(1)
58                .vortex_expect("missing max field");
59            Ok(Some(MinMaxResult { min, max }))
60        }
61    }
62}
63
64pub struct MinMax;
65
66impl ComputeFnVTable for MinMax {
67    fn invoke(
68        &self,
69        args: &InvocationArgs,
70        kernels: &[ArcRef<dyn Kernel>],
71    ) -> VortexResult<Output> {
72        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
73
74        let return_dtype = self.return_dtype(args)?;
75
76        match min_max_impl(array, kernels)? {
77            None => Ok(Scalar::null(return_dtype).into()),
78            Some(MinMaxResult { min, max }) => {
79                assert!(
80                    min <= max,
81                    "min > max: min={} max={} encoding={}",
82                    min,
83                    max,
84                    array.encoding_id()
85                );
86
87                // Update the stats set with the computed min/max
88                array
89                    .statistics()
90                    .set(Stat::Min, Precision::Exact(min.value().clone()));
91                array
92                    .statistics()
93                    .set(Stat::Max, Precision::Exact(max.value().clone()));
94
95                // Return the min/max as a struct scalar
96                Ok(Scalar::struct_(return_dtype, vec![min, max]).into())
97            }
98        }
99    }
100
101    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
102        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
103
104        // We return a min/max struct scalar, where the overall struct is nullable in the case
105        // that the array is all null or empty.
106        Ok(DType::Struct(
107            StructFields::new(
108                ["min", "max"].into(),
109                vec![array.dtype().clone(), array.dtype().clone()],
110            ),
111            Nullability::Nullable,
112        ))
113    }
114
115    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
116        Ok(1)
117    }
118
119    fn is_elementwise(&self) -> bool {
120        false
121    }
122}
123
124fn min_max_impl(
125    array: &dyn Array,
126    kernels: &[ArcRef<dyn Kernel>],
127) -> VortexResult<Option<MinMaxResult>> {
128    if array.is_empty() || array.valid_count()? == 0 {
129        return Ok(None);
130    }
131
132    if let Some(array) = array.as_opt::<ConstantVTable>()
133        && !array.scalar().is_null()
134    {
135        return Ok(Some(MinMaxResult {
136            min: array.scalar().clone(),
137            max: array.scalar().clone(),
138        }));
139    }
140
141    let min = array
142        .statistics()
143        .get(Stat::Min)
144        .and_then(Precision::as_exact);
145    let max = array
146        .statistics()
147        .get(Stat::Max)
148        .and_then(Precision::as_exact);
149
150    if let Some((min, max)) = min.zip(max) {
151        return Ok(Some(MinMaxResult { min, max }));
152    }
153
154    let args = InvocationArgs {
155        inputs: &[array.into()],
156        options: &(),
157    };
158    for kernel in kernels {
159        if let Some(output) = kernel.invoke(&args)? {
160            return MinMaxResult::from_scalar(output.unwrap_scalar()?);
161        }
162    }
163    if let Some(output) = array.invoke(&MIN_MAX_FN, &args)? {
164        return MinMaxResult::from_scalar(output.unwrap_scalar()?);
165    }
166
167    if !array.is_canonical() {
168        let array = array.to_canonical()?;
169        return min_max(array.as_ref());
170    }
171
172    vortex_bail!(NotImplemented: "min_max", array.encoding_id());
173}
174
175/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
176pub trait MinMaxKernel: VTable {
177    fn min_max(&self, array: &Self::Array) -> VortexResult<Option<MinMaxResult>>;
178}
179
180pub struct MinMaxKernelRef(ArcRef<dyn Kernel>);
181inventory::collect!(MinMaxKernelRef);
182
183#[derive(Debug)]
184pub struct MinMaxKernelAdapter<V: VTable>(pub V);
185
186impl<V: VTable + MinMaxKernel> MinMaxKernelAdapter<V> {
187    pub const fn lift(&'static self) -> MinMaxKernelRef {
188        MinMaxKernelRef(ArcRef::new_ref(self))
189    }
190}
191
192impl<V: VTable + MinMaxKernel> Kernel for MinMaxKernelAdapter<V> {
193    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
194        let inputs = UnaryArgs::<()>::try_from(args)?;
195        let Some(array) = inputs.array.as_opt::<V>() else {
196            return Ok(None);
197        };
198        let dtype = DType::Struct(
199            StructFields::new(
200                ["min", "max"].into(),
201                vec![array.dtype().clone(), array.dtype().clone()],
202            ),
203            Nullability::Nullable,
204        );
205        Ok(Some(match V::min_max(&self.0, array)? {
206            None => Scalar::null(dtype).into(),
207            Some(MinMaxResult { min, max }) => Scalar::struct_(dtype, vec![min, max]).into(),
208        }))
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use arrow_buffer::BooleanBuffer;
215    use vortex_buffer::buffer;
216
217    use crate::arrays::{BoolArray, NullArray, PrimitiveArray};
218    use crate::compute::{MinMaxResult, min_max};
219    use crate::validity::Validity;
220
221    #[test]
222    fn test_prim_max() {
223        let p = PrimitiveArray::new(buffer![1, 2, 3], Validity::NonNullable);
224        assert_eq!(
225            min_max(p.as_ref()).unwrap(),
226            Some(MinMaxResult {
227                min: 1.into(),
228                max: 3.into()
229            })
230        );
231    }
232
233    #[test]
234    fn test_bool_max() {
235        let p = BoolArray::new(
236            BooleanBuffer::from([true, true, true].as_slice()),
237            Validity::NonNullable,
238        );
239        assert_eq!(
240            min_max(p.as_ref()).unwrap(),
241            Some(MinMaxResult {
242                min: true.into(),
243                max: true.into()
244            })
245        );
246
247        let p = BoolArray::new(
248            BooleanBuffer::from([false, false, false].as_slice()),
249            Validity::NonNullable,
250        );
251        assert_eq!(
252            min_max(p.as_ref()).unwrap(),
253            Some(MinMaxResult {
254                min: false.into(),
255                max: false.into()
256            })
257        );
258
259        let p = BoolArray::new(
260            BooleanBuffer::from([false, true, false].as_slice()),
261            Validity::NonNullable,
262        );
263        assert_eq!(
264            min_max(p.as_ref()).unwrap(),
265            Some(MinMaxResult {
266                min: false.into(),
267                max: true.into()
268            })
269        );
270    }
271
272    #[test]
273    fn test_null() {
274        let p = NullArray::new(1);
275        assert_eq!(min_max(p.as_ref()).unwrap(), None);
276    }
277}