vortex_array/arrays/varbin/compute/
min_max.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_dtype::DType;
6use vortex_error::VortexResult;
7use vortex_scalar::Scalar;
8
9use crate::accessor::ArrayAccessor;
10use crate::arrays::{VarBinArray, VarBinVTable};
11use crate::compute::{MinMaxKernel, MinMaxKernelAdapter, MinMaxResult};
12use crate::register_kernel;
13
14impl MinMaxKernel for VarBinVTable {
15    fn min_max(&self, array: &VarBinArray) -> VortexResult<Option<MinMaxResult>> {
16        compute_min_max(array, array.dtype())
17    }
18}
19
20register_kernel!(MinMaxKernelAdapter(VarBinVTable).lift());
21
22/// Compute the min and max of VarBin like array.
23pub fn compute_min_max<T: ArrayAccessor<[u8]>>(
24    array: &T,
25    dtype: &DType,
26) -> VortexResult<Option<MinMaxResult>> {
27    let minmax = array.with_iterator(|iter| match iter.flatten().minmax() {
28        itertools::MinMaxResult::NoElements => None,
29        itertools::MinMaxResult::OneElement(value) => {
30            let scalar = make_scalar(dtype, value);
31            Some(MinMaxResult {
32                min: scalar.clone(),
33                max: scalar,
34            })
35        }
36        itertools::MinMaxResult::MinMax(min, max) => Some(MinMaxResult {
37            min: make_scalar(dtype, min),
38            max: make_scalar(dtype, max),
39        }),
40    })?;
41
42    Ok(minmax)
43}
44
45/// Helper function to make sure that min/max has the right [`ScalarValue`] type.
46fn make_scalar(dtype: &DType, value: &[u8]) -> Scalar {
47    match dtype {
48        DType::Binary(_) => Scalar::new(dtype.clone(), value.into()),
49        DType::Utf8(_) => {
50            // Safety:
51            // We trust the array's dtype here
52            let value = unsafe { str::from_utf8_unchecked(value) };
53            Scalar::new(dtype.clone(), value.into())
54        }
55        _ => unreachable!(),
56    }
57}
58
59#[cfg(test)]
60mod tests {
61    use vortex_buffer::BufferString;
62    use vortex_dtype::DType::Utf8;
63    use vortex_dtype::Nullability::Nullable;
64    use vortex_scalar::Scalar;
65
66    use crate::arrays::VarBinArray;
67    use crate::compute::{MinMaxResult, min_max};
68    use crate::stats::{Stat, StatsProvider};
69
70    #[test]
71    fn some_nulls() {
72        let array = VarBinArray::from_iter(
73            vec![
74                Some("hello world"),
75                None,
76                Some("hello world this is a long string"),
77                None,
78            ],
79            Utf8(Nullable),
80        );
81        let MinMaxResult { min, max } = min_max(array.as_ref()).unwrap().unwrap();
82
83        assert_eq!(
84            min,
85            Scalar::new(
86                Utf8(Nullable),
87                BufferString::from("hello world".to_string()).into(),
88            )
89        );
90        assert_eq!(
91            max,
92            Scalar::new(
93                Utf8(Nullable),
94                BufferString::from("hello world this is a long string".to_string()).into()
95            )
96        );
97    }
98
99    #[test]
100    fn all_nulls() {
101        let array = VarBinArray::from_iter(vec![Option::<&str>::None, None, None], Utf8(Nullable));
102        let stats = array.statistics();
103        assert!(stats.get(Stat::Min).is_none());
104        assert!(stats.get(Stat::Max).is_none());
105    }
106}