Skip to main content

vortex_array/compute/
min_max.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::LazyLock;
5
6use arcref::ArcRef;
7use vortex_dtype::DType;
8use vortex_dtype::FieldNames;
9use vortex_dtype::Nullability;
10use vortex_dtype::StructFields;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_bail;
14
15use crate::Array;
16use crate::arrays::ConstantVTable;
17use crate::compute::ComputeFn;
18use crate::compute::ComputeFnVTable;
19use crate::compute::InvocationArgs;
20use crate::compute::Kernel;
21use crate::compute::Output;
22use crate::compute::UnaryArgs;
23use crate::expr::stats::Precision;
24use crate::expr::stats::Stat;
25use crate::expr::stats::StatsProvider;
26use crate::scalar::Scalar;
27use crate::vtable::VTable;
28
29static NAMES: LazyLock<FieldNames> = LazyLock::new(|| FieldNames::from(["min", "max"]));
30
31static MIN_MAX_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
32    let compute = ComputeFn::new("min_max".into(), ArcRef::new_ref(&MinMax));
33    for kernel in inventory::iter::<MinMaxKernelRef> {
34        compute.register_kernel(kernel.0.clone());
35    }
36    compute
37});
38
39pub(crate) fn warm_up_vtable() -> usize {
40    MIN_MAX_FN.kernels().len()
41}
42
43/// The minimum and maximum non-null values of an array, or None if there are no non-null values.
44///
45/// The return value dtype is the non-nullable version of the array dtype.
46///
47/// This will update the stats set of this array (as a side effect).
48pub fn min_max(array: &dyn Array) -> VortexResult<Option<MinMaxResult>> {
49    let scalar = MIN_MAX_FN
50        .invoke(&InvocationArgs {
51            inputs: &[array.into()],
52            options: &(),
53        })?
54        .unwrap_scalar()?;
55    MinMaxResult::from_scalar(scalar)
56}
57
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct MinMaxResult {
60    pub min: Scalar,
61    pub max: Scalar,
62}
63
64impl MinMaxResult {
65    pub fn from_scalar(scalar: Scalar) -> VortexResult<Option<Self>> {
66        if scalar.is_null() {
67            Ok(None)
68        } else {
69            let min = scalar
70                .as_struct()
71                .field_by_idx(0)
72                .vortex_expect("missing min field");
73            let max = scalar
74                .as_struct()
75                .field_by_idx(1)
76                .vortex_expect("missing max field");
77            Ok(Some(MinMaxResult { min, max }))
78        }
79    }
80}
81
82pub struct MinMax;
83
84impl ComputeFnVTable for MinMax {
85    fn invoke(
86        &self,
87        args: &InvocationArgs,
88        kernels: &[ArcRef<dyn Kernel>],
89    ) -> VortexResult<Output> {
90        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
91
92        let return_dtype = self.return_dtype(args)?;
93
94        match min_max_impl(array, kernels)? {
95            None => Ok(Scalar::null(return_dtype).into()),
96            Some(MinMaxResult { min, max }) => {
97                assert!(
98                    min <= max,
99                    "min > max: min={} max={} encoding={}",
100                    min,
101                    max,
102                    array.encoding_id()
103                );
104
105                // Update the stats set with the computed min/max.
106                if let Some(min_value) = min.value() {
107                    array
108                        .statistics()
109                        .set(Stat::Min, Precision::Exact(min_value.clone()));
110                }
111                if let Some(max_value) = max.value() {
112                    array
113                        .statistics()
114                        .set(Stat::Max, Precision::Exact(max_value.clone()));
115                }
116
117                // Return the min/max as a struct scalar
118                Ok(Scalar::struct_(return_dtype, vec![min, max]).into())
119            }
120        }
121    }
122
123    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
124        let UnaryArgs { array, .. } = UnaryArgs::<()>::try_from(args)?;
125
126        // We return a min/max struct scalar, where the overall struct is nullable in the case
127        // that the array is all null or empty.
128        Ok(DType::Struct(
129            StructFields::new(
130                NAMES.clone(),
131                vec![
132                    array.dtype().as_nonnullable(),
133                    array.dtype().as_nonnullable(),
134                ],
135            ),
136            Nullability::Nullable,
137        ))
138    }
139
140    fn return_len(&self, _args: &InvocationArgs) -> VortexResult<usize> {
141        Ok(1)
142    }
143
144    fn is_elementwise(&self) -> bool {
145        false
146    }
147}
148
149fn min_max_impl(
150    array: &dyn Array,
151    kernels: &[ArcRef<dyn Kernel>],
152) -> VortexResult<Option<MinMaxResult>> {
153    if array.is_empty() || array.valid_count()? == 0 {
154        return Ok(None);
155    }
156
157    if let Some(array) = array.as_opt::<ConstantVTable>() {
158        return ConstantVTable.min_max(array);
159    }
160
161    let min = array
162        .statistics()
163        .get(Stat::Min)
164        .and_then(Precision::as_exact);
165    let max = array
166        .statistics()
167        .get(Stat::Max)
168        .and_then(Precision::as_exact);
169
170    if let Some((min, max)) = min.zip(max) {
171        let non_nullable_dtype = array.dtype().as_nonnullable();
172        return Ok(Some(MinMaxResult {
173            min: min.cast(&non_nullable_dtype)?,
174            max: max.cast(&non_nullable_dtype)?,
175        }));
176    }
177
178    let args = InvocationArgs {
179        inputs: &[array.into()],
180        options: &(),
181    };
182    for kernel in kernels {
183        if let Some(output) = kernel.invoke(&args)? {
184            return MinMaxResult::from_scalar(output.unwrap_scalar()?);
185        }
186    }
187
188    if !array.is_canonical() {
189        let array = array.to_canonical()?;
190        return min_max(array.as_ref());
191    }
192
193    vortex_bail!(NotImplemented: "min_max", array.encoding_id());
194}
195
196/// The minimum and maximum non-null values of an array, or None if there are no non-null/or non-nan
197/// values.
198pub trait MinMaxKernel: VTable {
199    fn min_max(&self, array: &Self::Array) -> VortexResult<Option<MinMaxResult>>;
200}
201
202pub struct MinMaxKernelRef(ArcRef<dyn Kernel>);
203inventory::collect!(MinMaxKernelRef);
204
205#[derive(Debug)]
206pub struct MinMaxKernelAdapter<V: VTable>(pub V);
207
208impl<V: VTable + MinMaxKernel> MinMaxKernelAdapter<V> {
209    pub const fn lift(&'static self) -> MinMaxKernelRef {
210        MinMaxKernelRef(ArcRef::new_ref(self))
211    }
212}
213
214impl<V: VTable + MinMaxKernel> Kernel for MinMaxKernelAdapter<V> {
215    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
216        let inputs = UnaryArgs::<()>::try_from(args)?;
217        let Some(array) = inputs.array.as_opt::<V>() else {
218            return Ok(None);
219        };
220        let non_nullable_dtype = array.dtype().as_nonnullable();
221        let dtype = DType::Struct(
222            StructFields::new(
223                NAMES.clone(),
224                vec![non_nullable_dtype.clone(), non_nullable_dtype],
225            ),
226            Nullability::Nullable,
227        );
228        Ok(Some(match V::min_max(&self.0, array)? {
229            None => Scalar::null(dtype).into(),
230            Some(MinMaxResult { min, max }) => Scalar::struct_(dtype, vec![min, max]).into(),
231        }))
232    }
233}
234
235#[cfg(test)]
236mod tests {
237    use vortex_buffer::BitBuffer;
238    use vortex_buffer::buffer;
239
240    use crate::arrays::BoolArray;
241    use crate::arrays::NullArray;
242    use crate::arrays::PrimitiveArray;
243    use crate::compute::MinMaxResult;
244    use crate::compute::min_max;
245    use crate::validity::Validity;
246
247    #[test]
248    fn test_prim_max() {
249        let p = PrimitiveArray::new(buffer![1, 2, 3], Validity::NonNullable);
250        assert_eq!(
251            min_max(p.as_ref()).unwrap(),
252            Some(MinMaxResult {
253                min: 1.into(),
254                max: 3.into()
255            })
256        );
257    }
258
259    #[test]
260    fn test_bool_max() {
261        let p = BoolArray::new(
262            BitBuffer::from([true, true, true].as_slice()),
263            Validity::NonNullable,
264        );
265        assert_eq!(
266            min_max(p.as_ref()).unwrap(),
267            Some(MinMaxResult {
268                min: true.into(),
269                max: true.into()
270            })
271        );
272
273        let p = BoolArray::new(
274            BitBuffer::from([false, false, false].as_slice()),
275            Validity::NonNullable,
276        );
277        assert_eq!(
278            min_max(p.as_ref()).unwrap(),
279            Some(MinMaxResult {
280                min: false.into(),
281                max: false.into()
282            })
283        );
284
285        let p = BoolArray::new(
286            BitBuffer::from([false, true, false].as_slice()),
287            Validity::NonNullable,
288        );
289        assert_eq!(
290            min_max(p.as_ref()).unwrap(),
291            Some(MinMaxResult {
292                min: false.into(),
293                max: true.into()
294            })
295        );
296    }
297
298    #[test]
299    fn test_null() {
300        let p = NullArray::new(1);
301        assert_eq!(min_max(p.as_ref()).unwrap(), None);
302    }
303}