vortex_array/compute/
min_max.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::LazyLock;
5
6use arcref::ArcRef;
7use vortex_dtype::DType;
8use vortex_dtype::Nullability;
9use vortex_dtype::StructFields;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_scalar::Scalar;
14
15use crate::Array;
16use crate::arrays::ConstantVTable;
17use crate::compute::ComputeFn;
18use crate::compute::ComputeFnVTable;
19use crate::compute::InvocationArgs;
20use crate::compute::Kernel;
21use crate::compute::Output;
22use crate::compute::UnaryArgs;
23use crate::expr::stats::Precision;
24use crate::expr::stats::Stat;
25use crate::expr::stats::StatsProvider;
26use crate::vtable::VTable;
27
28static MIN_MAX_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
29    let compute = ComputeFn::new("min_max".into(), ArcRef::new_ref(&MinMax));
30    for kernel in inventory::iter::<MinMaxKernelRef> {
31        compute.register_kernel(kernel.0.clone());
32    }
33    compute
34});
35
36pub(crate) fn warm_up_vtable() -> usize {
37    MIN_MAX_FN.kernels().len()
38}
39
40/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
41///
42/// The return value dtype is the non-nullable version of the array dtype.
43///
44/// This will update the stats set of this array (as a side effect).
45pub fn min_max(array: &dyn Array) -> VortexResult<Option<MinMaxResult>> {
46    let scalar = MIN_MAX_FN
47        .invoke(&InvocationArgs {
48            inputs: &[array.into()],
49            options: &(),
50        })?
51        .unwrap_scalar()?;
52    MinMaxResult::from_scalar(scalar)
53}
54
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct MinMaxResult {
57    pub min: Scalar,
58    pub max: Scalar,
59}
60
61impl MinMaxResult {
62    pub fn from_scalar(scalar: Scalar) -> VortexResult<Option<Self>> {
63        if scalar.is_null() {
64            Ok(None)
65        } else {
66            let min = scalar
67                .as_struct()
68                .field_by_idx(0)
69                .vortex_expect("missing min field");
70            let max = scalar
71                .as_struct()
72                .field_by_idx(1)
73                .vortex_expect("missing max field");
74            Ok(Some(MinMaxResult { min, max }))
75        }
76    }
77}
78
79pub struct MinMax;
80
81impl ComputeFnVTable for MinMax {
82    fn invoke(
83        &self,
84        args: &InvocationArgs,
85        kernels: &[ArcRef<dyn Kernel>],
86    ) -> VortexResult<Output> {
87        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
88
89        let return_dtype = self.return_dtype(args)?;
90
91        match min_max_impl(array, kernels)? {
92            None => Ok(Scalar::null(return_dtype).into()),
93            Some(MinMaxResult { min, max }) => {
94                assert!(
95                    min <= max,
96                    "min > max: min={} max={} encoding={}",
97                    min,
98                    max,
99                    array.encoding_id()
100                );
101
102                // Update the stats set with the computed min/max
103                array
104                    .statistics()
105                    .set(Stat::Min, Precision::Exact(min.value().clone()));
106                array
107                    .statistics()
108                    .set(Stat::Max, Precision::Exact(max.value().clone()));
109
110                // Return the min/max as a struct scalar
111                Ok(Scalar::struct_(return_dtype, vec![min, max]).into())
112            }
113        }
114    }
115
116    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
117        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
118
119        // We return a min/max struct scalar, where the overall struct is nullable in the case
120        // that the array is all null or empty.
121        Ok(DType::Struct(
122            StructFields::new(
123                ["min", "max"].into(),
124                vec![
125                    array.dtype().as_nonnullable(),
126                    array.dtype().as_nonnullable(),
127                ],
128            ),
129            Nullability::Nullable,
130        ))
131    }
132
133    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
134        Ok(1)
135    }
136
137    fn is_elementwise(&self) -> bool {
138        false
139    }
140}
141
142fn min_max_impl(
143    array: &dyn Array,
144    kernels: &[ArcRef<dyn Kernel>],
145) -> VortexResult<Option<MinMaxResult>> {
146    if array.is_empty() || array.valid_count() == 0 {
147        return Ok(None);
148    }
149
150    if let Some(array) = array.as_opt::<ConstantVTable>() {
151        return ConstantVTable.min_max(array);
152    }
153
154    let min = array
155        .statistics()
156        .get(Stat::Min)
157        .and_then(Precision::as_exact);
158    let max = array
159        .statistics()
160        .get(Stat::Max)
161        .and_then(Precision::as_exact);
162
163    if let Some((min, max)) = min.zip(max) {
164        let non_nullable_dtype = array.dtype().as_nonnullable();
165        return Ok(Some(MinMaxResult {
166            min: min.cast(&non_nullable_dtype)?,
167            max: max.cast(&non_nullable_dtype)?,
168        }));
169    }
170
171    let args = InvocationArgs {
172        inputs: &[array.into()],
173        options: &(),
174    };
175    for kernel in kernels {
176        if let Some(output) = kernel.invoke(&args)? {
177            return MinMaxResult::from_scalar(output.unwrap_scalar()?);
178        }
179    }
180    if let Some(output) = array.invoke(&MIN_MAX_FN, &args)? {
181        return MinMaxResult::from_scalar(output.unwrap_scalar()?);
182    }
183
184    if !array.is_canonical() {
185        let array = array.to_canonical();
186        return min_max(array.as_ref());
187    }
188
189    vortex_bail!(NotImplemented: "min_max", array.encoding_id());
190}
191
192/// The minimum and maximum non-null values of an array, or None if there are no non-null/or non-nan
193/// values.
194pub trait MinMaxKernel: VTable {
195    fn min_max(&self, array: &Self::Array) -> VortexResult<Option<MinMaxResult>>;
196}
197
198pub struct MinMaxKernelRef(ArcRef<dyn Kernel>);
199inventory::collect!(MinMaxKernelRef);
200
201#[derive(Debug)]
202pub struct MinMaxKernelAdapter<V: VTable>(pub V);
203
204impl<V: VTable + MinMaxKernel> MinMaxKernelAdapter<V> {
205    pub const fn lift(&'static self) -> MinMaxKernelRef {
206        MinMaxKernelRef(ArcRef::new_ref(self))
207    }
208}
209
210impl<V: VTable + MinMaxKernel> Kernel for MinMaxKernelAdapter<V> {
211    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
212        let inputs = UnaryArgs::<()>::try_from(args)?;
213        let Some(array) = inputs.array.as_opt::<V>() else {
214            return Ok(None);
215        };
216        let non_nullable_dtype = array.dtype().as_nonnullable();
217        let dtype = DType::Struct(
218            StructFields::new(
219                ["min", "max"].into(),
220                vec![non_nullable_dtype.clone(), non_nullable_dtype],
221            ),
222            Nullability::Nullable,
223        );
224        Ok(Some(match V::min_max(&self.0, array)? {
225            None => Scalar::null(dtype).into(),
226            Some(MinMaxResult { min, max }) => Scalar::struct_(dtype, vec![min, max]).into(),
227        }))
228    }
229}
230
231#[cfg(test)]
232mod tests {
233    use vortex_buffer::BitBuffer;
234    use vortex_buffer::buffer;
235
236    use crate::arrays::BoolArray;
237    use crate::arrays::NullArray;
238    use crate::arrays::PrimitiveArray;
239    use crate::compute::MinMaxResult;
240    use crate::compute::min_max;
241    use crate::validity::Validity;
242
243    #[test]
244    fn test_prim_max() {
245        let p = PrimitiveArray::new(buffer![1, 2, 3], Validity::NonNullable);
246        assert_eq!(
247            min_max(p.as_ref()).unwrap(),
248            Some(MinMaxResult {
249                min: 1.into(),
250                max: 3.into()
251            })
252        );
253    }
254
255    #[test]
256    fn test_bool_max() {
257        let p = BoolArray::from_bit_buffer(
258            BitBuffer::from([true, true, true].as_slice()),
259            Validity::NonNullable,
260        );
261        assert_eq!(
262            min_max(p.as_ref()).unwrap(),
263            Some(MinMaxResult {
264                min: true.into(),
265                max: true.into()
266            })
267        );
268
269        let p = BoolArray::from_bit_buffer(
270            BitBuffer::from([false, false, false].as_slice()),
271            Validity::NonNullable,
272        );
273        assert_eq!(
274            min_max(p.as_ref()).unwrap(),
275            Some(MinMaxResult {
276                min: false.into(),
277                max: false.into()
278            })
279        );
280
281        let p = BoolArray::from_bit_buffer(
282            BitBuffer::from([false, true, false].as_slice()),
283            Validity::NonNullable,
284        );
285        assert_eq!(
286            min_max(p.as_ref()).unwrap(),
287            Some(MinMaxResult {
288                min: false.into(),
289                max: true.into()
290            })
291        );
292    }
293
294    #[test]
295    fn test_null() {
296        let p = NullArray::new(1);
297        assert_eq!(min_max(p.as_ref()).unwrap(), None);
298    }
299}