vortex_array/arrays/varbin/compute/
min_max.rs

1use itertools::Itertools;
2use vortex_dtype::DType;
3use vortex_error::VortexResult;
4use vortex_scalar::Scalar;
5
6use crate::Array;
7use crate::accessor::ArrayAccessor;
8use crate::arrays::{VarBinArray, VarBinEncoding};
9use crate::compute::{MinMaxFn, MinMaxResult};
10
11impl MinMaxFn<&VarBinArray> for VarBinEncoding {
12    fn min_max(&self, array: &VarBinArray) -> VortexResult<Option<MinMaxResult>> {
13        compute_min_max(array, array.dtype())
14    }
15}
16
17/// Compute the min and max of VarBin like array.
18pub fn compute_min_max<T: ArrayAccessor<[u8]>>(
19    array: &T,
20    dtype: &DType,
21) -> VortexResult<Option<MinMaxResult>> {
22    let minmax = array.with_iterator(|iter| match iter.flatten().minmax() {
23        itertools::MinMaxResult::NoElements => None,
24        itertools::MinMaxResult::OneElement(value) => {
25            let scalar = make_scalar(dtype, value);
26            Some(MinMaxResult {
27                min: scalar.clone(),
28                max: scalar,
29            })
30        }
31        itertools::MinMaxResult::MinMax(min, max) => Some(MinMaxResult {
32            min: make_scalar(dtype, min),
33            max: make_scalar(dtype, max),
34        }),
35    })?;
36
37    Ok(minmax)
38}
39
40/// Helper function to make sure that min/max has the right [`ScalarValue`] type.
41fn make_scalar(dtype: &DType, value: &[u8]) -> Scalar {
42    match dtype {
43        DType::Binary(_) => Scalar::new(dtype.clone(), value.into()),
44        DType::Utf8(_) => {
45            // Safety:
46            // We trust the array's dtype here
47            let value = unsafe { str::from_utf8_unchecked(value) };
48            Scalar::new(dtype.clone(), value.into())
49        }
50        _ => unreachable!(),
51    }
52}
53
54#[cfg(test)]
55mod tests {
56    use vortex_buffer::BufferString;
57    use vortex_dtype::DType::Utf8;
58    use vortex_dtype::Nullability::Nullable;
59    use vortex_scalar::Scalar;
60
61    use crate::Array;
62    use crate::arrays::VarBinArray;
63    use crate::compute::{MinMaxResult, min_max};
64    use crate::stats::{Stat, StatsProvider};
65
66    #[test]
67    fn some_nulls() {
68        let array = VarBinArray::from_iter(
69            vec![
70                Some("hello world"),
71                None,
72                Some("hello world this is a long string"),
73                None,
74            ],
75            Utf8(Nullable),
76        );
77        let MinMaxResult { min, max } = min_max(&array).unwrap().unwrap();
78
79        assert_eq!(
80            min,
81            Scalar::new(
82                Utf8(Nullable),
83                BufferString::from("hello world".to_string()).into(),
84            )
85        );
86        assert_eq!(
87            max,
88            Scalar::new(
89                Utf8(Nullable),
90                BufferString::from("hello world this is a long string".to_string()).into()
91            )
92        );
93    }
94
95    #[test]
96    fn all_nulls() {
97        let array = VarBinArray::from_iter(vec![Option::<&str>::None, None, None], Utf8(Nullable));
98        let stats = array.statistics();
99        assert!(stats.get(Stat::Min).is_none());
100        assert!(stats.get(Stat::Max).is_none());
101    }
102}