use itertools::Itertools;
use vortex_error::VortexResult;
use vortex_error::vortex_panic;
use crate::accessor::ArrayAccessor;
use crate::arrays::VarBinArray;
use crate::arrays::VarBinVTable;
use crate::compute::MinMaxKernel;
use crate::compute::MinMaxKernelAdapter;
use crate::compute::MinMaxResult;
use crate::dtype::DType;
use crate::dtype::Nullability::NonNullable;
use crate::register_kernel;
use crate::scalar::Scalar;
impl MinMaxKernel for VarBinVTable {
fn min_max(&self, array: &VarBinArray) -> VortexResult<Option<MinMaxResult>> {
Ok(varbin_compute_min_max(array, array.dtype()))
}
}
register_kernel!(MinMaxKernelAdapter(VarBinVTable).lift());
pub(crate) fn varbin_compute_min_max<T: ArrayAccessor<[u8]>>(
array: &T,
dtype: &DType,
) -> Option<MinMaxResult> {
array.with_iterator(|iter| match iter.flatten().minmax() {
itertools::MinMaxResult::NoElements => None,
itertools::MinMaxResult::OneElement(value) => {
let scalar = make_scalar(dtype, value);
Some(MinMaxResult {
min: scalar.clone(),
max: scalar,
})
}
itertools::MinMaxResult::MinMax(min, max) => Some(MinMaxResult {
min: make_scalar(dtype, min),
max: make_scalar(dtype, max),
}),
})
}
fn make_scalar(dtype: &DType, value: &[u8]) -> Scalar {
match dtype {
DType::Binary(_) => Scalar::binary(value.to_vec(), NonNullable),
DType::Utf8(_) => {
let value = unsafe { str::from_utf8_unchecked(value) };
Scalar::utf8(value, NonNullable)
}
_ => vortex_panic!("cannot make Scalar from bytes with dtype {dtype}"),
}
}
#[cfg(test)]
mod tests {
use vortex_buffer::BufferString;
use crate::IntoArray;
use crate::arrays::VarBinArray;
use crate::compute::MinMaxResult;
use crate::compute::min_max;
use crate::dtype::DType::Utf8;
use crate::dtype::Nullability::NonNullable;
use crate::dtype::Nullability::Nullable;
use crate::expr::stats::Stat;
use crate::expr::stats::StatsProvider;
use crate::scalar::Scalar;
#[test]
fn some_nulls() {
let array = VarBinArray::from_iter(
vec![
Some("hello world"),
None,
Some("hello world this is a long string"),
None,
],
Utf8(Nullable),
);
let MinMaxResult { min, max } = min_max(&array.into_array()).unwrap().unwrap();
assert_eq!(
min,
Scalar::try_new(
Utf8(NonNullable),
Some(BufferString::from("hello world".to_string()).into()),
)
.unwrap()
);
assert_eq!(
max,
Scalar::try_new(
Utf8(NonNullable),
Some(BufferString::from("hello world this is a long string".to_string()).into()),
)
.unwrap()
);
}
#[test]
fn all_nulls() {
let array = VarBinArray::from_iter(vec![Option::<&str>::None, None, None], Utf8(Nullable));
let stats = array.statistics();
assert!(stats.get(Stat::Min).is_none());
assert!(stats.get(Stat::Max).is_none());
}
}