vortex_array/compute/
min_max.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::LazyLock;
5
6use arcref::ArcRef;
7use vortex_dtype::{DType, Nullability, StructFields};
8use vortex_error::{VortexExpect, VortexResult, vortex_bail};
9use vortex_scalar::Scalar;
10
11use crate::Array;
12use crate::arrays::ConstantVTable;
13use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output, UnaryArgs};
14use crate::stats::{Precision, Stat, StatsProvider};
15use crate::vtable::VTable;
16
17static MIN_MAX_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
18    let compute = ComputeFn::new("min_max".into(), ArcRef::new_ref(&MinMax));
19    for kernel in inventory::iter::<MinMaxKernelRef> {
20        compute.register_kernel(kernel.0.clone());
21    }
22    compute
23});
24
25pub(crate) fn warm_up_vtable() -> usize {
26    MIN_MAX_FN.kernels().len()
27}
28
29/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
30///
31/// The return value dtype is the non-nullable version of the array dtype.
32///
33/// This will update the stats set of this array (as a side effect).
34pub fn min_max(array: &dyn Array) -> VortexResult<Option<MinMaxResult>> {
35    let scalar = MIN_MAX_FN
36        .invoke(&InvocationArgs {
37            inputs: &[array.into()],
38            options: &(),
39        })?
40        .unwrap_scalar()?;
41    MinMaxResult::from_scalar(scalar)
42}
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub struct MinMaxResult {
46    pub min: Scalar,
47    pub max: Scalar,
48}
49
50impl MinMaxResult {
51    pub fn from_scalar(scalar: Scalar) -> VortexResult<Option<Self>> {
52        if scalar.is_null() {
53            Ok(None)
54        } else {
55            let min = scalar
56                .as_struct()
57                .field_by_idx(0)
58                .vortex_expect("missing min field");
59            let max = scalar
60                .as_struct()
61                .field_by_idx(1)
62                .vortex_expect("missing max field");
63            Ok(Some(MinMaxResult { min, max }))
64        }
65    }
66}
67
68pub struct MinMax;
69
70impl ComputeFnVTable for MinMax {
71    fn invoke(
72        &self,
73        args: &InvocationArgs,
74        kernels: &[ArcRef<dyn Kernel>],
75    ) -> VortexResult<Output> {
76        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
77
78        let return_dtype = self.return_dtype(args)?;
79
80        match min_max_impl(array, kernels)? {
81            None => Ok(Scalar::null(return_dtype).into()),
82            Some(MinMaxResult { min, max }) => {
83                assert!(
84                    min <= max,
85                    "min > max: min={} max={} encoding={}",
86                    min,
87                    max,
88                    array.encoding_id()
89                );
90
91                // Update the stats set with the computed min/max
92                array
93                    .statistics()
94                    .set(Stat::Min, Precision::Exact(min.value().clone()));
95                array
96                    .statistics()
97                    .set(Stat::Max, Precision::Exact(max.value().clone()));
98
99                // Return the min/max as a struct scalar
100                Ok(Scalar::struct_(return_dtype, vec![min, max]).into())
101            }
102        }
103    }
104
105    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
106        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
107
108        // We return a min/max struct scalar, where the overall struct is nullable in the case
109        // that the array is all null or empty.
110        Ok(DType::Struct(
111            StructFields::new(
112                ["min", "max"].into(),
113                vec![array.dtype().clone(), array.dtype().clone()],
114            ),
115            Nullability::Nullable,
116        ))
117    }
118
119    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
120        Ok(1)
121    }
122
123    fn is_elementwise(&self) -> bool {
124        false
125    }
126}
127
128fn min_max_impl(
129    array: &dyn Array,
130    kernels: &[ArcRef<dyn Kernel>],
131) -> VortexResult<Option<MinMaxResult>> {
132    if array.is_empty() || array.valid_count() == 0 {
133        return Ok(None);
134    }
135
136    if let Some(array) = array.as_opt::<ConstantVTable>()
137        && !array.scalar().is_null()
138    {
139        return Ok(Some(MinMaxResult {
140            min: array.scalar().clone(),
141            max: array.scalar().clone(),
142        }));
143    }
144
145    let min = array
146        .statistics()
147        .get(Stat::Min)
148        .and_then(Precision::as_exact);
149    let max = array
150        .statistics()
151        .get(Stat::Max)
152        .and_then(Precision::as_exact);
153
154    if let Some((min, max)) = min.zip(max) {
155        return Ok(Some(MinMaxResult { min, max }));
156    }
157
158    let args = InvocationArgs {
159        inputs: &[array.into()],
160        options: &(),
161    };
162    for kernel in kernels {
163        if let Some(output) = kernel.invoke(&args)? {
164            return MinMaxResult::from_scalar(output.unwrap_scalar()?);
165        }
166    }
167    if let Some(output) = array.invoke(&MIN_MAX_FN, &args)? {
168        return MinMaxResult::from_scalar(output.unwrap_scalar()?);
169    }
170
171    if !array.is_canonical() {
172        let array = array.to_canonical();
173        return min_max(array.as_ref());
174    }
175
176    vortex_bail!(NotImplemented: "min_max", array.encoding_id());
177}
178
179/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
180pub trait MinMaxKernel: VTable {
181    fn min_max(&self, array: &Self::Array) -> VortexResult<Option<MinMaxResult>>;
182}
183
184pub struct MinMaxKernelRef(ArcRef<dyn Kernel>);
185inventory::collect!(MinMaxKernelRef);
186
187#[derive(Debug)]
188pub struct MinMaxKernelAdapter<V: VTable>(pub V);
189
190impl<V: VTable + MinMaxKernel> MinMaxKernelAdapter<V> {
191    pub const fn lift(&'static self) -> MinMaxKernelRef {
192        MinMaxKernelRef(ArcRef::new_ref(self))
193    }
194}
195
196impl<V: VTable + MinMaxKernel> Kernel for MinMaxKernelAdapter<V> {
197    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
198        let inputs = UnaryArgs::<()>::try_from(args)?;
199        let Some(array) = inputs.array.as_opt::<V>() else {
200            return Ok(None);
201        };
202        let dtype = DType::Struct(
203            StructFields::new(
204                ["min", "max"].into(),
205                vec![array.dtype().clone(), array.dtype().clone()],
206            ),
207            Nullability::Nullable,
208        );
209        Ok(Some(match V::min_max(&self.0, array)? {
210            None => Scalar::null(dtype).into(),
211            Some(MinMaxResult { min, max }) => Scalar::struct_(dtype, vec![min, max]).into(),
212        }))
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use arrow_buffer::BooleanBuffer;
219    use vortex_buffer::buffer;
220
221    use crate::arrays::{BoolArray, NullArray, PrimitiveArray};
222    use crate::compute::{MinMaxResult, min_max};
223    use crate::validity::Validity;
224
225    #[test]
226    fn test_prim_max() {
227        let p = PrimitiveArray::new(buffer![1, 2, 3], Validity::NonNullable);
228        assert_eq!(
229            min_max(p.as_ref()).unwrap(),
230            Some(MinMaxResult {
231                min: 1.into(),
232                max: 3.into()
233            })
234        );
235    }
236
237    #[test]
238    fn test_bool_max() {
239        let p = BoolArray::from_bool_buffer(
240            BooleanBuffer::from([true, true, true].as_slice()),
241            Validity::NonNullable,
242        );
243        assert_eq!(
244            min_max(p.as_ref()).unwrap(),
245            Some(MinMaxResult {
246                min: true.into(),
247                max: true.into()
248            })
249        );
250
251        let p = BoolArray::from_bool_buffer(
252            BooleanBuffer::from([false, false, false].as_slice()),
253            Validity::NonNullable,
254        );
255        assert_eq!(
256            min_max(p.as_ref()).unwrap(),
257            Some(MinMaxResult {
258                min: false.into(),
259                max: false.into()
260            })
261        );
262
263        let p = BoolArray::from_bool_buffer(
264            BooleanBuffer::from([false, true, false].as_slice()),
265            Validity::NonNullable,
266        );
267        assert_eq!(
268            min_max(p.as_ref()).unwrap(),
269            Some(MinMaxResult {
270                min: false.into(),
271                max: true.into()
272            })
273        );
274    }
275
276    #[test]
277    fn test_null() {
278        let p = NullArray::new(1);
279        assert_eq!(min_max(p.as_ref()).unwrap(), None);
280    }
281}