vortex_array/arrays/varbin/compute/
min_max.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_dtype::DType;
6use vortex_dtype::Nullability::NonNullable;
7use vortex_error::{VortexResult, vortex_panic};
8use vortex_scalar::Scalar;
9
10use crate::accessor::ArrayAccessor;
11use crate::arrays::{VarBinArray, VarBinVTable};
12use crate::compute::{MinMaxKernel, MinMaxKernelAdapter, MinMaxResult};
13use crate::register_kernel;
14
15impl MinMaxKernel for VarBinVTable {
16    fn min_max(&self, array: &VarBinArray) -> VortexResult<Option<MinMaxResult>> {
17        Ok(varbin_compute_min_max(array, array.dtype()))
18    }
19}
20
21register_kernel!(MinMaxKernelAdapter(VarBinVTable).lift());
22
23/// Compute the min and max of VarBin like array.
24pub(crate) fn varbin_compute_min_max<T: ArrayAccessor<[u8]>>(
25    array: &T,
26    dtype: &DType,
27) -> Option<MinMaxResult> {
28    array.with_iterator(|iter| match iter.flatten().minmax() {
29        itertools::MinMaxResult::NoElements => None,
30        itertools::MinMaxResult::OneElement(value) => {
31            let scalar = make_scalar(dtype, value);
32            Some(MinMaxResult {
33                min: scalar.clone(),
34                max: scalar,
35            })
36        }
37        itertools::MinMaxResult::MinMax(min, max) => Some(MinMaxResult {
38            min: make_scalar(dtype, min),
39            max: make_scalar(dtype, max),
40        }),
41    })
42}
43
44/// Helper function to make sure that min/max has the right [`Scalar`] type.
45fn make_scalar(dtype: &DType, value: &[u8]) -> Scalar {
46    match dtype {
47        DType::Binary(_) => Scalar::binary(value.to_vec(), NonNullable),
48        DType::Utf8(_) => {
49            // SAFETY: We only call `compute_min_max` within `varbin/`, in which we always validate
50            // the arrays, and we always pass `array.dtype()` in as the `dtype` argument.
51            let value = unsafe { str::from_utf8_unchecked(value) };
52            Scalar::utf8(value, NonNullable)
53        }
54        _ => vortex_panic!("cannot make Scalar from bytes with dtype {dtype}"),
55    }
56}
57
58#[cfg(test)]
59mod tests {
60    use vortex_buffer::BufferString;
61    use vortex_dtype::DType::Utf8;
62    use vortex_dtype::Nullability::{NonNullable, Nullable};
63    use vortex_scalar::Scalar;
64
65    use crate::arrays::VarBinArray;
66    use crate::compute::{MinMaxResult, min_max};
67    use crate::stats::{Stat, StatsProvider};
68
69    #[test]
70    fn some_nulls() {
71        let array = VarBinArray::from_iter(
72            vec![
73                Some("hello world"),
74                None,
75                Some("hello world this is a long string"),
76                None,
77            ],
78            Utf8(Nullable),
79        );
80        let MinMaxResult { min, max } = min_max(array.as_ref()).unwrap().unwrap();
81
82        assert_eq!(
83            min,
84            Scalar::new(
85                Utf8(NonNullable),
86                BufferString::from("hello world".to_string()).into(),
87            )
88        );
89        assert_eq!(
90            max,
91            Scalar::new(
92                Utf8(NonNullable),
93                BufferString::from("hello world this is a long string".to_string()).into()
94            )
95        );
96    }
97
98    #[test]
99    fn all_nulls() {
100        let array = VarBinArray::from_iter(vec![Option::<&str>::None, None, None], Utf8(Nullable));
101        let stats = array.statistics();
102        assert!(stats.get(Stat::Min).is_none());
103        assert!(stats.get(Stat::Max).is_none());
104    }
105}