vortex_array/compute/
min_max.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::LazyLock;
5
6use arcref::ArcRef;
7use vortex_dtype::{DType, Nullability, StructFields};
8use vortex_error::{VortexExpect, VortexResult, vortex_bail};
9use vortex_scalar::Scalar;
10
11use crate::Array;
12use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output, UnaryArgs};
13use crate::stats::{Precision, Stat, StatsProviderExt};
14use crate::vtable::VTable;
15
16/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
17///
18/// The return value dtype is the non-nullable version of the array dtype.
19///
20/// This will update the stats set of this array (as a side effect).
21pub fn min_max(array: &dyn Array) -> VortexResult<Option<MinMaxResult>> {
22    let scalar = MIN_MAX_FN
23        .invoke(&InvocationArgs {
24            inputs: &[array.into()],
25            options: &(),
26        })?
27        .unwrap_scalar()?;
28    MinMaxResult::from_scalar(scalar)
29}
30
31#[derive(Debug, Clone, PartialEq, Eq)]
32pub struct MinMaxResult {
33    pub min: Scalar,
34    pub max: Scalar,
35}
36
37impl MinMaxResult {
38    pub fn from_scalar(scalar: Scalar) -> VortexResult<Option<Self>> {
39        if scalar.is_null() {
40            Ok(None)
41        } else {
42            let min = scalar
43                .as_struct()
44                .field_by_idx(0)
45                .vortex_expect("missing min field");
46            let max = scalar
47                .as_struct()
48                .field_by_idx(1)
49                .vortex_expect("missing max field");
50            Ok(Some(MinMaxResult { min, max }))
51        }
52    }
53}
54
55pub struct MinMax;
56
57impl ComputeFnVTable for MinMax {
58    fn invoke(
59        &self,
60        args: &InvocationArgs,
61        kernels: &[ArcRef<dyn Kernel>],
62    ) -> VortexResult<Output> {
63        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
64
65        let return_dtype = self.return_dtype(args)?;
66
67        match min_max_impl(array, kernels)? {
68            None => Ok(Scalar::null(return_dtype).into()),
69            Some(MinMaxResult { min, max }) => {
70                assert!(
71                    min <= max,
72                    "min > max: min={} max={} encoding={}",
73                    min,
74                    max,
75                    array.encoding_id()
76                );
77
78                // Update the stats set with the computed min/max
79                array
80                    .statistics()
81                    .set(Stat::Min, Precision::Exact(min.value().clone()));
82                array
83                    .statistics()
84                    .set(Stat::Max, Precision::Exact(max.value().clone()));
85
86                // Return the min/max as a struct scalar
87                Ok(Scalar::struct_(return_dtype, vec![min, max]).into())
88            }
89        }
90    }
91
92    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
93        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
94
95        // We return a min/max struct scalar, where the overall struct is nullable in the case
96        // that the array is all null or empty.
97        Ok(DType::Struct(
98            StructFields::new(
99                ["min", "max"].into(),
100                vec![array.dtype().clone(), array.dtype().clone()],
101            ),
102            Nullability::Nullable,
103        ))
104    }
105
106    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
107        Ok(1)
108    }
109
110    fn is_elementwise(&self) -> bool {
111        false
112    }
113}
114
115fn min_max_impl(
116    array: &dyn Array,
117    kernels: &[ArcRef<dyn Kernel>],
118) -> VortexResult<Option<MinMaxResult>> {
119    if array.is_empty() || array.valid_count()? == 0 {
120        return Ok(None);
121    }
122
123    let min = array
124        .statistics()
125        .get_scalar(Stat::Min, array.dtype())
126        .and_then(Precision::as_exact);
127    let max = array
128        .statistics()
129        .get_scalar(Stat::Max, array.dtype())
130        .and_then(Precision::as_exact);
131
132    if let Some((min, max)) = min.zip(max) {
133        return Ok(Some(MinMaxResult { min, max }));
134    }
135
136    let args = InvocationArgs {
137        inputs: &[array.into()],
138        options: &(),
139    };
140    for kernel in kernels {
141        if let Some(output) = kernel.invoke(&args)? {
142            return MinMaxResult::from_scalar(output.unwrap_scalar()?);
143        }
144    }
145    if let Some(output) = array.invoke(&MIN_MAX_FN, &args)? {
146        return MinMaxResult::from_scalar(output.unwrap_scalar()?);
147    }
148
149    if !array.is_canonical() {
150        let array = array.to_canonical()?;
151        return min_max(array.as_ref());
152    }
153
154    vortex_bail!(NotImplemented: "min_max", array.encoding_id());
155}
156
157/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
158pub trait MinMaxKernel: VTable {
159    fn min_max(&self, array: &Self::Array) -> VortexResult<Option<MinMaxResult>>;
160}
161
162pub struct MinMaxKernelRef(ArcRef<dyn Kernel>);
163inventory::collect!(MinMaxKernelRef);
164
165#[derive(Debug)]
166pub struct MinMaxKernelAdapter<V: VTable>(pub V);
167
168impl<V: VTable + MinMaxKernel> MinMaxKernelAdapter<V> {
169    pub const fn lift(&'static self) -> MinMaxKernelRef {
170        MinMaxKernelRef(ArcRef::new_ref(self))
171    }
172}
173
174impl<V: VTable + MinMaxKernel> Kernel for MinMaxKernelAdapter<V> {
175    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
176        let inputs = UnaryArgs::<()>::try_from(args)?;
177        let Some(array) = inputs.array.as_opt::<V>() else {
178            return Ok(None);
179        };
180        let dtype = DType::Struct(
181            StructFields::new(
182                ["min", "max"].into(),
183                vec![array.dtype().clone(), array.dtype().clone()],
184            ),
185            Nullability::Nullable,
186        );
187        Ok(Some(match V::min_max(&self.0, array)? {
188            None => Scalar::null(dtype).into(),
189            Some(MinMaxResult { min, max }) => Scalar::struct_(dtype, vec![min, max]).into(),
190        }))
191    }
192}
193
194pub static MIN_MAX_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
195    let compute = ComputeFn::new("min_max".into(), ArcRef::new_ref(&MinMax));
196    for kernel in inventory::iter::<MinMaxKernelRef> {
197        compute.register_kernel(kernel.0.clone());
198    }
199    compute
200});
201
202#[cfg(test)]
203mod tests {
204    use arrow_buffer::BooleanBuffer;
205    use vortex_buffer::buffer;
206
207    use crate::arrays::{BoolArray, NullArray, PrimitiveArray};
208    use crate::compute::{MinMaxResult, min_max};
209    use crate::validity::Validity;
210
211    #[test]
212    fn test_prim_max() {
213        let p = PrimitiveArray::new(buffer![1, 2, 3], Validity::NonNullable);
214        assert_eq!(
215            min_max(p.as_ref()).unwrap(),
216            Some(MinMaxResult {
217                min: 1.into(),
218                max: 3.into()
219            })
220        );
221    }
222
223    #[test]
224    fn test_bool_max() {
225        let p = BoolArray::new(
226            BooleanBuffer::from([true, true, true].as_slice()),
227            Validity::NonNullable,
228        );
229        assert_eq!(
230            min_max(p.as_ref()).unwrap(),
231            Some(MinMaxResult {
232                min: true.into(),
233                max: true.into()
234            })
235        );
236
237        let p = BoolArray::new(
238            BooleanBuffer::from([false, false, false].as_slice()),
239            Validity::NonNullable,
240        );
241        assert_eq!(
242            min_max(p.as_ref()).unwrap(),
243            Some(MinMaxResult {
244                min: false.into(),
245                max: false.into()
246            })
247        );
248
249        let p = BoolArray::new(
250            BooleanBuffer::from([false, true, false].as_slice()),
251            Validity::NonNullable,
252        );
253        assert_eq!(
254            min_max(p.as_ref()).unwrap(),
255            Some(MinMaxResult {
256                min: false.into(),
257                max: true.into()
258            })
259        );
260    }
261
262    #[test]
263    fn test_null() {
264        let p = NullArray::new(1);
265        assert_eq!(min_max(p.as_ref()).unwrap(), None);
266    }
267}