vortex_array/arrays/varbin/compute/
min_max.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_dtype::DType;
6use vortex_dtype::Nullability::NonNullable;
7use vortex_error::VortexResult;
8use vortex_error::vortex_panic;
9use vortex_scalar::Scalar;
10
11use crate::accessor::ArrayAccessor;
12use crate::arrays::VarBinArray;
13use crate::arrays::VarBinVTable;
14use crate::compute::MinMaxKernel;
15use crate::compute::MinMaxKernelAdapter;
16use crate::compute::MinMaxResult;
17use crate::register_kernel;
18
19impl MinMaxKernel for VarBinVTable {
20    fn min_max(&self, array: &VarBinArray) -> VortexResult<Option<MinMaxResult>> {
21        Ok(varbin_compute_min_max(array, array.dtype()))
22    }
23}
24
25register_kernel!(MinMaxKernelAdapter(VarBinVTable).lift());
26
27/// Compute the min and max of VarBin like array.
28pub(crate) fn varbin_compute_min_max<T: ArrayAccessor<[u8]>>(
29    array: &T,
30    dtype: &DType,
31) -> Option<MinMaxResult> {
32    array.with_iterator(|iter| match iter.flatten().minmax() {
33        itertools::MinMaxResult::NoElements => None,
34        itertools::MinMaxResult::OneElement(value) => {
35            let scalar = make_scalar(dtype, value);
36            Some(MinMaxResult {
37                min: scalar.clone(),
38                max: scalar,
39            })
40        }
41        itertools::MinMaxResult::MinMax(min, max) => Some(MinMaxResult {
42            min: make_scalar(dtype, min),
43            max: make_scalar(dtype, max),
44        }),
45    })
46}
47
48/// Helper function to make sure that min/max has the right [`Scalar`] type.
49fn make_scalar(dtype: &DType, value: &[u8]) -> Scalar {
50    match dtype {
51        DType::Binary(_) => Scalar::binary(value.to_vec(), NonNullable),
52        DType::Utf8(_) => {
53            // SAFETY: We only call `compute_min_max` within `varbin/`, in which we always validate
54            // the arrays, and we always pass `array.dtype()` in as the `dtype` argument.
55            let value = unsafe { str::from_utf8_unchecked(value) };
56            Scalar::utf8(value, NonNullable)
57        }
58        _ => vortex_panic!("cannot make Scalar from bytes with dtype {dtype}"),
59    }
60}
61
62#[cfg(test)]
63mod tests {
64    use vortex_buffer::BufferString;
65    use vortex_dtype::DType::Utf8;
66    use vortex_dtype::Nullability::NonNullable;
67    use vortex_dtype::Nullability::Nullable;
68    use vortex_scalar::Scalar;
69
70    use crate::arrays::VarBinArray;
71    use crate::compute::MinMaxResult;
72    use crate::compute::min_max;
73    use crate::expr::stats::Stat;
74    use crate::expr::stats::StatsProvider;
75
76    #[test]
77    fn some_nulls() {
78        let array = VarBinArray::from_iter(
79            vec![
80                Some("hello world"),
81                None,
82                Some("hello world this is a long string"),
83                None,
84            ],
85            Utf8(Nullable),
86        );
87        let MinMaxResult { min, max } = min_max(array.as_ref()).unwrap().unwrap();
88
89        assert_eq!(
90            min,
91            Scalar::new(
92                Utf8(NonNullable),
93                BufferString::from("hello world".to_string()).into(),
94            )
95        );
96        assert_eq!(
97            max,
98            Scalar::new(
99                Utf8(NonNullable),
100                BufferString::from("hello world this is a long string".to_string()).into()
101            )
102        );
103    }
104
105    #[test]
106    fn all_nulls() {
107        let array = VarBinArray::from_iter(vec![Option::<&str>::None, None, None], Utf8(Nullable));
108        let stats = array.statistics();
109        assert!(stats.get(Stat::Min).is_none());
110        assert!(stats.get(Stat::Max).is_none());
111    }
112}