vortex_array/compute/
min_max.rs

1use std::sync::LazyLock;
2
3use arcref::ArcRef;
4use vortex_dtype::{DType, Nullability, StructFields};
5use vortex_error::{VortexExpect, VortexResult, vortex_bail};
6use vortex_scalar::Scalar;
7
8use crate::Array;
9use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output, UnaryArgs};
10use crate::stats::{Precision, Stat, StatsProviderExt};
11use crate::vtable::VTable;
12
13/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
14///
15/// The return value dtype is the non-nullable version of the array dtype.
16///
17/// This will update the stats set of this array (as a side effect).
18pub fn min_max(array: &dyn Array) -> VortexResult<Option<MinMaxResult>> {
19    let scalar = MIN_MAX_FN
20        .invoke(&InvocationArgs {
21            inputs: &[array.into()],
22            options: &(),
23        })?
24        .unwrap_scalar()?;
25    MinMaxResult::from_scalar(scalar)
26}
27
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct MinMaxResult {
30    pub min: Scalar,
31    pub max: Scalar,
32}
33
34impl MinMaxResult {
35    pub fn from_scalar(scalar: Scalar) -> VortexResult<Option<Self>> {
36        if scalar.is_null() {
37            Ok(None)
38        } else {
39            let min = scalar
40                .as_struct()
41                .field_by_idx(0)
42                .vortex_expect("missing min field");
43            let max = scalar
44                .as_struct()
45                .field_by_idx(1)
46                .vortex_expect("missing max field");
47            Ok(Some(MinMaxResult { min, max }))
48        }
49    }
50}
51
52pub struct MinMax;
53
54impl ComputeFnVTable for MinMax {
55    fn invoke(
56        &self,
57        args: &InvocationArgs,
58        kernels: &[ArcRef<dyn Kernel>],
59    ) -> VortexResult<Output> {
60        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
61
62        let return_dtype = self.return_dtype(args)?;
63
64        match min_max_impl(array, kernels)? {
65            None => Ok(Scalar::null(return_dtype).into()),
66            Some(MinMaxResult { min, max }) => {
67                assert!(
68                    min <= max,
69                    "min > max: min={} max={} encoding={}",
70                    min,
71                    max,
72                    array.encoding_id()
73                );
74
75                // Update the stats set with the computed min/max
76                array
77                    .statistics()
78                    .set(Stat::Min, Precision::Exact(min.value().clone()));
79                array
80                    .statistics()
81                    .set(Stat::Max, Precision::Exact(max.value().clone()));
82
83                // Return the min/max as a struct scalar
84                Ok(Scalar::struct_(return_dtype, vec![min, max]).into())
85            }
86        }
87    }
88
89    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
90        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
91
92        // We return a min/max struct scalar, where the overall struct is nullable in the case
93        // that the array is all null or empty.
94        Ok(DType::Struct(
95            StructFields::new(
96                ["min", "max"].into(),
97                vec![array.dtype().clone(), array.dtype().clone()],
98            ),
99            Nullability::Nullable,
100        ))
101    }
102
103    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
104        Ok(1)
105    }
106
107    fn is_elementwise(&self) -> bool {
108        false
109    }
110}
111
112fn min_max_impl(
113    array: &dyn Array,
114    kernels: &[ArcRef<dyn Kernel>],
115) -> VortexResult<Option<MinMaxResult>> {
116    if array.is_empty() || array.valid_count()? == 0 {
117        return Ok(None);
118    }
119
120    let min = array
121        .statistics()
122        .get_scalar(Stat::Min, array.dtype())
123        .and_then(Precision::as_exact);
124    let max = array
125        .statistics()
126        .get_scalar(Stat::Max, array.dtype())
127        .and_then(Precision::as_exact);
128
129    if let Some((min, max)) = min.zip(max) {
130        return Ok(Some(MinMaxResult { min, max }));
131    }
132
133    let args = InvocationArgs {
134        inputs: &[array.into()],
135        options: &(),
136    };
137    for kernel in kernels {
138        if let Some(output) = kernel.invoke(&args)? {
139            return MinMaxResult::from_scalar(output.unwrap_scalar()?);
140        }
141    }
142    if let Some(output) = array.invoke(&MIN_MAX_FN, &args)? {
143        return MinMaxResult::from_scalar(output.unwrap_scalar()?);
144    }
145
146    if !array.is_canonical() {
147        let array = array.to_canonical()?;
148        return min_max(array.as_ref());
149    }
150
151    vortex_bail!(NotImplemented: "min_max", array.encoding_id());
152}
153
154/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
155pub trait MinMaxKernel: VTable {
156    fn min_max(&self, array: &Self::Array) -> VortexResult<Option<MinMaxResult>>;
157}
158
159pub struct MinMaxKernelRef(ArcRef<dyn Kernel>);
160inventory::collect!(MinMaxKernelRef);
161
162#[derive(Debug)]
163pub struct MinMaxKernelAdapter<V: VTable>(pub V);
164
165impl<V: VTable + MinMaxKernel> MinMaxKernelAdapter<V> {
166    pub const fn lift(&'static self) -> MinMaxKernelRef {
167        MinMaxKernelRef(ArcRef::new_ref(self))
168    }
169}
170
171impl<V: VTable + MinMaxKernel> Kernel for MinMaxKernelAdapter<V> {
172    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
173        let inputs = UnaryArgs::<()>::try_from(args)?;
174        let Some(array) = inputs.array.as_opt::<V>() else {
175            return Ok(None);
176        };
177        let dtype = DType::Struct(
178            StructFields::new(
179                ["min", "max"].into(),
180                vec![array.dtype().clone(), array.dtype().clone()],
181            ),
182            Nullability::Nullable,
183        );
184        Ok(Some(match V::min_max(&self.0, array)? {
185            None => Scalar::null(dtype).into(),
186            Some(MinMaxResult { min, max }) => Scalar::struct_(dtype, vec![min, max]).into(),
187        }))
188    }
189}
190
191pub static MIN_MAX_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
192    let compute = ComputeFn::new("min_max".into(), ArcRef::new_ref(&MinMax));
193    for kernel in inventory::iter::<MinMaxKernelRef> {
194        compute.register_kernel(kernel.0.clone());
195    }
196    compute
197});
198
199#[cfg(test)]
200mod tests {
201    use arrow_buffer::BooleanBuffer;
202    use vortex_buffer::buffer;
203
204    use crate::arrays::{BoolArray, NullArray, PrimitiveArray};
205    use crate::compute::{MinMaxResult, min_max};
206    use crate::validity::Validity;
207
208    #[test]
209    fn test_prim_max() {
210        let p = PrimitiveArray::new(buffer![1, 2, 3], Validity::NonNullable);
211        assert_eq!(
212            min_max(p.as_ref()).unwrap(),
213            Some(MinMaxResult {
214                min: 1.into(),
215                max: 3.into()
216            })
217        );
218    }
219
220    #[test]
221    fn test_bool_max() {
222        let p = BoolArray::new(
223            BooleanBuffer::from([true, true, true].as_slice()),
224            Validity::NonNullable,
225        );
226        assert_eq!(
227            min_max(p.as_ref()).unwrap(),
228            Some(MinMaxResult {
229                min: true.into(),
230                max: true.into()
231            })
232        );
233
234        let p = BoolArray::new(
235            BooleanBuffer::from([false, false, false].as_slice()),
236            Validity::NonNullable,
237        );
238        assert_eq!(
239            min_max(p.as_ref()).unwrap(),
240            Some(MinMaxResult {
241                min: false.into(),
242                max: false.into()
243            })
244        );
245
246        let p = BoolArray::new(
247            BooleanBuffer::from([false, true, false].as_slice()),
248            Validity::NonNullable,
249        );
250        assert_eq!(
251            min_max(p.as_ref()).unwrap(),
252            Some(MinMaxResult {
253                min: false.into(),
254                max: true.into()
255            })
256        );
257    }
258
259    #[test]
260    fn test_null() {
261        let p = NullArray::new(1);
262        assert_eq!(min_max(p.as_ref()).unwrap(), None);
263    }
264}