use std::fmt::Debug;
use enum_iterator::Sequence;
use enum_iterator::all;
use num_traits::CheckedAdd;
use vortex_error::VortexError;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_err;
use vortex_error::vortex_panic;
use crate::dtype::DType;
use crate::expr::stats::IsConstant;
use crate::expr::stats::IsSorted;
use crate::expr::stats::IsStrictSorted;
use crate::expr::stats::Max;
use crate::expr::stats::Min;
use crate::expr::stats::NaNCount;
use crate::expr::stats::NullCount;
use crate::expr::stats::Precision;
use crate::expr::stats::Stat;
use crate::expr::stats::StatBound;
use crate::expr::stats::StatType;
use crate::expr::stats::StatsProvider;
use crate::expr::stats::StatsProviderExt;
use crate::expr::stats::Sum;
use crate::expr::stats::UncompressedSizeInBytes;
use crate::scalar::Scalar;
use crate::scalar::ScalarValue;
#[derive(Default, Debug, Clone)]
pub struct StatsSet {
values: Vec<(Stat, Precision<ScalarValue>)>,
}
impl StatsSet {
pub unsafe fn new_unchecked(values: Vec<(Stat, Precision<ScalarValue>)>) -> Self {
Self { values }
}
pub fn of(stat: Stat, value: Precision<ScalarValue>) -> Self {
unsafe { Self::new_unchecked(vec![(stat, value)]) }
}
fn reserve_full_capacity(&mut self) {
if self.values.capacity() < Stat::CARDINALITY {
self.values
.reserve_exact(Stat::CARDINALITY - self.values.capacity());
}
}
pub fn as_mut_typed_ref<'a, 'b>(&'a mut self, dtype: &'b DType) -> MutTypedStatsSetRef<'a, 'b> {
MutTypedStatsSetRef {
values: self,
dtype,
}
}
pub fn as_typed_ref<'a, 'b>(&'a self, dtype: &'b DType) -> TypedStatsSetRef<'a, 'b> {
TypedStatsSetRef {
values: self,
dtype,
}
}
}
impl StatsSet {
pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
self.reserve_full_capacity();
if let Some(existing) = self.values.iter_mut().find(|(s, _)| *s == stat) {
*existing = (stat, value);
} else {
self.values.push((stat, value));
}
}
pub fn clear(&mut self, stat: Stat) {
self.values.retain(|(s, _)| *s != stat);
}
pub fn retain_only(&mut self, stats: &[Stat]) {
self.values.retain(|(s, _)| stats.contains(s));
}
pub fn iter(&self) -> impl Iterator<Item = &(Stat, Precision<ScalarValue>)> {
self.values.iter()
}
pub fn get(&self, stat: Stat) -> Option<Precision<ScalarValue>> {
self.values
.iter()
.find(|(s, _)| *s == stat)
.map(|(_, v)| v.clone())
}
pub fn len(&self) -> usize {
self.values.len()
}
pub fn is_empty(&self) -> bool {
self.values.is_empty()
}
pub fn get_as<T: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
&self,
stat: Stat,
dtype: &DType,
) -> Option<Precision<T>> {
self.get(stat).map(|v| {
v.map(|v| {
T::try_from(
&Scalar::try_new(dtype.clone(), Some(v))
.vortex_expect("failed to construct a scalar statistic"),
)
.unwrap_or_else(|err| {
vortex_panic!(
err,
"Failed to get stat {} as {}",
stat,
std::any::type_name::<T>()
)
})
})
})
}
}
pub struct StatsSetIntoIter(std::vec::IntoIter<(Stat, Precision<ScalarValue>)>);
impl Iterator for StatsSetIntoIter {
type Item = (Stat, Precision<ScalarValue>);
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
impl IntoIterator for StatsSet {
type Item = (Stat, Precision<ScalarValue>);
type IntoIter = StatsSetIntoIter;
fn into_iter(self) -> Self::IntoIter {
StatsSetIntoIter(self.values.into_iter())
}
}
impl FromIterator<(Stat, Precision<ScalarValue>)> for StatsSet {
fn from_iter<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(iter: T) -> Self {
let iter = iter.into_iter();
let mut values = Vec::default();
values.reserve_exact(Stat::CARDINALITY);
let mut this = Self { values };
this.extend(iter);
this
}
}
impl Extend<(Stat, Precision<ScalarValue>)> for StatsSet {
#[inline]
fn extend<T: IntoIterator<Item = (Stat, Precision<ScalarValue>)>>(&mut self, iter: T) {
let iter = iter.into_iter();
self.reserve_full_capacity();
iter.for_each(|(stat, value)| self.set(stat, value));
}
}
impl StatsSet {
pub fn merge_ordered(mut self, other: &Self, dtype: &DType) -> Self {
self.as_mut_typed_ref(dtype)
.merge_ordered(&other.as_typed_ref(dtype));
self
}
pub fn merge_unordered(mut self, other: &Self, dtype: &DType) -> Self {
self.as_mut_typed_ref(dtype)
.merge_unordered(&other.as_typed_ref(dtype));
self
}
pub fn combine_sets(&mut self, other: &Self, dtype: &DType) -> VortexResult<()> {
self.as_mut_typed_ref(dtype)
.combine_sets(&other.as_typed_ref(dtype))
}
}
pub struct TypedStatsSetRef<'a, 'b> {
pub values: &'a StatsSet,
pub dtype: &'b DType,
}
impl StatsProvider for TypedStatsSetRef<'_, '_> {
fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
self.values.get(stat).map(|p| {
p.map(|sv| {
Scalar::try_new(
stat.dtype(self.dtype)
.vortex_expect("Must have valid dtype if value is present"),
Some(sv),
)
.vortex_expect("failed to construct a scalar statistic")
})
})
}
fn len(&self) -> usize {
self.values.len()
}
}
pub struct MutTypedStatsSetRef<'a, 'b> {
pub values: &'a mut StatsSet,
pub dtype: &'b DType,
}
impl MutTypedStatsSetRef<'_, '_> {
pub fn set(&mut self, stat: Stat, value: Precision<ScalarValue>) {
self.values.set(stat, value);
}
pub fn clear(&mut self, stat: Stat) {
self.values.clear(stat);
}
}
impl StatsProvider for MutTypedStatsSetRef<'_, '_> {
fn get(&self, stat: Stat) -> Option<Precision<Scalar>> {
self.values.get(stat).map(|p| {
p.map(|sv| {
Scalar::try_new(
stat.dtype(self.dtype)
.vortex_expect("Must have valid dtype if value is present"),
Some(sv),
)
.vortex_expect("failed to construct a scalar statistic")
})
})
}
fn len(&self) -> usize {
self.values.len()
}
}
impl MutTypedStatsSetRef<'_, '_> {
pub fn merge_ordered(mut self, other: &TypedStatsSetRef) -> Self {
for s in all::<Stat>() {
match s {
Stat::IsConstant => self.merge_is_constant(other),
Stat::IsSorted => self.merge_is_sorted(other),
Stat::IsStrictSorted => self.merge_is_strict_sorted(other),
Stat::Max => self.merge_max(other),
Stat::Min => self.merge_min(other),
Stat::Sum => self.merge_sum(other),
Stat::NullCount => self.merge_null_count(other),
Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
Stat::NaNCount => self.merge_nan_count(other),
}
}
self
}
pub fn merge_unordered(mut self, other: &TypedStatsSetRef) -> Self {
for s in all::<Stat>() {
if !s.is_commutative() {
self.clear(s);
continue;
}
match s {
Stat::IsConstant => self.merge_is_constant(other),
Stat::Max => self.merge_max(other),
Stat::Min => self.merge_min(other),
Stat::Sum => self.merge_sum(other),
Stat::NullCount => self.merge_null_count(other),
Stat::UncompressedSizeInBytes => self.merge_uncompressed_size_in_bytes(other),
Stat::IsSorted | Stat::IsStrictSorted => {
unreachable!("not commutative")
}
Stat::NaNCount => self.merge_nan_count(other),
}
}
self
}
pub fn combine_sets(&mut self, other: &TypedStatsSetRef) -> VortexResult<()> {
let other_stats: Vec<_> = other.values.iter().map(|(stat, _)| *stat).collect();
for s in other_stats {
match s {
Stat::Max => self.combine_bound::<Max>(other)?,
Stat::Min => self.combine_bound::<Min>(other)?,
Stat::UncompressedSizeInBytes => {
self.combine_bound::<UncompressedSizeInBytes>(other)?
}
Stat::IsConstant => self.combine_bool_stat::<IsConstant>(other)?,
Stat::IsSorted => self.combine_bool_stat::<IsSorted>(other)?,
Stat::IsStrictSorted => self.combine_bool_stat::<IsStrictSorted>(other)?,
Stat::NullCount => self.combine_bound::<NullCount>(other)?,
Stat::Sum => self.combine_bound::<Sum>(other)?,
Stat::NaNCount => self.combine_bound::<NaNCount>(other)?,
}
}
Ok(())
}
fn combine_bound<S: StatType<Scalar>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
where
S::Bound: StatBound<Scalar> + Debug + Eq + PartialEq,
{
match (self.get_scalar_bound::<S>(), other.get_scalar_bound::<S>()) {
(Some(m1), Some(m2)) => {
let meet = m1
.intersection(&m2)
.vortex_expect("can always compare scalar")
.ok_or_else(|| {
vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
})?;
if meet != m1 {
self.set(
S::STAT,
meet.into_value().map(|s| {
s.into_value()
.vortex_expect("stat scalar value cannot be null")
}),
);
}
}
(None, Some(m)) => self.set(
S::STAT,
m.into_value().map(|s| {
s.into_value()
.vortex_expect("stat scalar value cannot be null")
}),
),
(Some(_), _) => (),
(None, None) => self.clear(S::STAT),
}
Ok(())
}
fn combine_bool_stat<S: StatType<bool>>(&mut self, other: &TypedStatsSetRef) -> VortexResult<()>
where
S::Bound: StatBound<bool> + Debug + Eq + PartialEq,
{
match (
self.get_as_bound::<S, bool>(),
other.get_as_bound::<S, bool>(),
) {
(Some(m1), Some(m2)) => {
let intersection = m1
.intersection(&m2)
.vortex_expect("can always compare boolean")
.ok_or_else(|| {
vortex_err!("{:?} bounds ({m1:?}, {m2:?}) do not overlap", S::STAT)
})?;
if intersection != m1 {
self.set(S::STAT, intersection.into_value().map(ScalarValue::from));
}
}
(None, Some(m)) => self.set(S::STAT, m.into_value().map(ScalarValue::from)),
(Some(_), None) => (),
(None, None) => self.clear(S::STAT),
}
Ok(())
}
fn merge_min(&mut self, other: &TypedStatsSetRef) {
match (
self.get_scalar_bound::<Min>(),
other.get_scalar_bound::<Min>(),
) {
(Some(m1), Some(m2)) => {
let meet = m1.union(&m2).vortex_expect("can compare scalar");
if meet != m1 {
self.set(
Stat::Min,
meet.into_value().map(|s| {
s.into_value()
.vortex_expect("stat scalar value cannot be null")
}),
);
}
}
_ => self.clear(Stat::Min),
}
}
fn merge_max(&mut self, other: &TypedStatsSetRef) {
match (
self.get_scalar_bound::<Max>(),
other.get_scalar_bound::<Max>(),
) {
(Some(m1), Some(m2)) => {
let meet = m1.union(&m2).vortex_expect("can compare scalar");
if meet != m1 {
self.set(
Stat::Max,
meet.into_value().map(|s| {
s.into_value()
.vortex_expect("stat scalar value cannot be null")
}),
);
}
}
_ => self.clear(Stat::Max),
}
}
fn merge_sum(&mut self, other: &TypedStatsSetRef) {
match (
self.get_scalar_bound::<Sum>(),
other.get_scalar_bound::<Sum>(),
) {
(Some(m1), Some(m2)) => {
if let Some(scalar_value) = m1.zip(m2).as_exact().and_then(|(s1, s2)| {
s1.as_primitive()
.checked_add(&s2.as_primitive())
.and_then(|pscalar| pscalar.pvalue().map(ScalarValue::Primitive))
}) {
self.set(Stat::Sum, Precision::Exact(scalar_value));
}
}
_ => self.clear(Stat::Sum),
}
}
fn merge_is_constant(&mut self, other: &TypedStatsSetRef) {
let self_const = self.get_as(Stat::IsConstant);
let other_const = other.get_as(Stat::IsConstant);
let self_min = self.get(Stat::Min);
let other_min = other.get(Stat::Min);
if let (
Some(Precision::Exact(self_const)),
Some(Precision::Exact(other_const)),
Some(Precision::Exact(self_min)),
Some(Precision::Exact(other_min)),
) = (self_const, other_const, self_min, other_min)
{
if self_const && other_const && self_min == other_min {
self.set(Stat::IsConstant, Precision::exact(true));
} else {
self.set(Stat::IsConstant, Precision::inexact(false));
}
}
self.set(Stat::IsConstant, Precision::exact(false));
}
fn merge_is_sorted(&mut self, other: &TypedStatsSetRef) {
self.merge_sortedness_stat(other, Stat::IsSorted, PartialOrd::le)
}
fn merge_is_strict_sorted(&mut self, other: &TypedStatsSetRef) {
self.merge_sortedness_stat(other, Stat::IsStrictSorted, PartialOrd::lt)
}
fn merge_sortedness_stat<F: Fn(&Scalar, &Scalar) -> bool>(
&mut self,
other: &TypedStatsSetRef,
stat: Stat,
cmp: F,
) {
if (Some(Precision::Exact(true)), Some(Precision::Exact(true)))
== (self.get_as(stat), other.get_as(stat))
{
if let (Some(self_max), Some(other_min)) = (
self.get_scalar_bound::<Max>(),
other.get_scalar_bound::<Min>(),
) {
return if cmp(&self_max.max_value(), &other_min.min_value()) {
} else {
self.set(stat, Precision::inexact(false));
};
}
}
self.clear(stat);
}
fn merge_null_count(&mut self, other: &TypedStatsSetRef) {
self.merge_sum_stat(Stat::NullCount, other)
}
fn merge_nan_count(&mut self, other: &TypedStatsSetRef) {
self.merge_sum_stat(Stat::NaNCount, other)
}
fn merge_uncompressed_size_in_bytes(&mut self, other: &TypedStatsSetRef) {
self.merge_sum_stat(Stat::UncompressedSizeInBytes, other)
}
fn merge_sum_stat(&mut self, stat: Stat, other: &TypedStatsSetRef) {
match (self.get_as::<usize>(stat), other.get_as::<usize>(stat)) {
(Some(nc1), Some(nc2)) => {
self.set(
stat,
nc1.zip(nc2).map(|(nc1, nc2)| ScalarValue::from(nc1 + nc2)),
);
}
_ => self.clear(stat),
}
}
}
#[cfg(test)]
mod test {
use enum_iterator::all;
use itertools::Itertools;
use crate::arrays::PrimitiveArray;
use crate::dtype::DType;
use crate::dtype::Nullability;
use crate::dtype::PType;
use crate::expr::stats::IsConstant;
use crate::expr::stats::Precision;
use crate::expr::stats::Stat;
use crate::expr::stats::StatsProvider;
use crate::expr::stats::StatsProviderExt;
use crate::stats::StatsSet;
use crate::stats::stats_set::Scalar;
#[test]
fn test_iter() {
let set = unsafe {
StatsSet::new_unchecked(vec![
(Stat::Max, Precision::exact(100)),
(Stat::Min, Precision::exact(42)),
])
};
let mut iter = set.iter();
let first = iter.next().unwrap().clone();
assert_eq!(first.0, Stat::Max);
assert_eq!(
first.1.map(
|f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
),
Precision::exact(100)
);
let snd = iter.next().unwrap().clone();
assert_eq!(snd.0, Stat::Min);
assert_eq!(
snd.1.map(
|s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
),
Precision::exact(42)
);
}
#[test]
fn into_iter() {
let mut set = unsafe {
StatsSet::new_unchecked(vec![
(Stat::Max, Precision::exact(100)),
(Stat::Min, Precision::exact(42)),
])
}
.into_iter();
let (stat, first) = set.next().unwrap();
assert_eq!(stat, Stat::Max);
assert_eq!(
first.map(
|f| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(f)).unwrap()).unwrap()
),
Precision::exact(100)
);
let snd = set.next().unwrap();
assert_eq!(snd.0, Stat::Min);
assert_eq!(
snd.1.map(
|s| i32::try_from(&Scalar::try_new(PType::I32.into(), Some(s)).unwrap()).unwrap()
),
Precision::exact(42)
);
}
#[test]
fn merge_constant() {
let first = StatsSet::from_iter([
(Stat::Min, Precision::exact(42)),
(Stat::IsConstant, Precision::exact(true)),
])
.merge_ordered(
&StatsSet::from_iter([
(Stat::Min, Precision::inexact(42)),
(Stat::IsConstant, Precision::exact(true)),
]),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
first_ref.get_as::<bool>(Stat::IsConstant),
Some(Precision::exact(false))
);
assert_eq!(
first_ref.get_as::<i32>(Stat::Min),
Some(Precision::exact(42))
);
}
#[test]
fn merge_into_min() {
let first = StatsSet::of(Stat::Min, Precision::exact(42)).merge_ordered(
&StatsSet::default(),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert!(first_ref.get(Stat::Min).is_none());
}
#[test]
fn merge_from_min() {
let first = StatsSet::default().merge_ordered(
&StatsSet::of(Stat::Min, Precision::exact(42)),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert!(first_ref.get(Stat::Min).is_none());
}
#[test]
fn merge_mins() {
let first = StatsSet::of(Stat::Min, Precision::exact(37)).merge_ordered(
&StatsSet::of(Stat::Min, Precision::exact(42)),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
first_ref.get_as::<i32>(Stat::Min),
Some(Precision::exact(37))
);
}
#[test]
fn merge_into_bound_max() {
let first = StatsSet::of(Stat::Max, Precision::exact(42)).merge_ordered(
&StatsSet::default(),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
assert!(first.get(Stat::Max).is_none());
}
#[test]
fn merge_from_max() {
let first = StatsSet::default().merge_ordered(
&StatsSet::of(Stat::Max, Precision::exact(42)),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
assert!(first.get(Stat::Max).is_none());
}
#[test]
fn merge_maxes() {
let first = StatsSet::of(Stat::Max, Precision::exact(37)).merge_ordered(
&StatsSet::of(Stat::Max, Precision::exact(42)),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
first_ref.get_as::<i32>(Stat::Max),
Some(Precision::exact(42))
);
}
#[test]
fn merge_maxes_bound() {
let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
let first = StatsSet::of(Stat::Max, Precision::exact(42i32))
.merge_ordered(&StatsSet::of(Stat::Max, Precision::inexact(43i32)), &dtype);
let first_ref = first.as_typed_ref(&dtype);
assert_eq!(
first_ref.get_as::<i32>(Stat::Max),
Some(Precision::inexact(43))
);
}
#[test]
fn merge_into_scalar() {
let first = StatsSet::of(Stat::Sum, Precision::exact(42i64)).merge_ordered(
&StatsSet::default(),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert!(first_ref.get(Stat::Sum).is_none());
}
#[test]
fn merge_from_scalar() {
let first = StatsSet::default().merge_ordered(
&StatsSet::of(Stat::Sum, Precision::exact(42i64)),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert!(first_ref.get(Stat::Sum).is_none());
}
#[test]
fn merge_scalars() {
let first = StatsSet::of(Stat::Sum, Precision::exact(37i64)).merge_ordered(
&StatsSet::of(Stat::Sum, Precision::exact(42i64)),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
first_ref.get_as::<i64>(Stat::Sum),
Some(Precision::exact(79i64))
);
}
#[test]
fn merge_into_sortedness() {
let first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)).merge_ordered(
&StatsSet::default(),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
assert!(first.get(Stat::IsStrictSorted).is_none());
}
#[test]
fn merge_from_sortedness() {
let first = StatsSet::default().merge_ordered(
&StatsSet::of(Stat::IsStrictSorted, Precision::exact(true)),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
assert!(first.get(Stat::IsStrictSorted).is_none());
}
#[test]
fn merge_sortedness() {
let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
first.set(Stat::Max, Precision::exact(1));
let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
second.set(Stat::Min, Precision::exact(2));
first = first.merge_ordered(
&second,
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
first_ref.get_as::<bool>(Stat::IsStrictSorted),
Some(Precision::exact(true))
);
}
#[test]
fn merge_sortedness_out_of_order() {
let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
first.set(Stat::Min, Precision::exact(1));
let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
second.set(Stat::Max, Precision::exact(2));
second = second.merge_ordered(
&first,
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let second_ref =
second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
second_ref.get_as::<bool>(Stat::IsStrictSorted),
Some(Precision::inexact(false))
);
}
#[test]
fn merge_sortedness_only_one_sorted() {
let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
first.set(Stat::Max, Precision::exact(1));
let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(false));
second.set(Stat::Min, Precision::exact(2));
first.merge_ordered(
&second,
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let second_ref =
second.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
second_ref.get_as::<bool>(Stat::IsStrictSorted),
Some(Precision::exact(false))
);
}
#[test]
fn merge_sortedness_missing_min() {
let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
first.set(Stat::Max, Precision::exact(1));
let second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
first = first.merge_ordered(
&second,
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
assert!(first.get(Stat::IsStrictSorted).is_none());
}
#[test]
fn merge_sortedness_bound_min() {
let mut first = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
first.set(Stat::Max, Precision::exact(1));
let mut second = StatsSet::of(Stat::IsStrictSorted, Precision::exact(true));
second.set(Stat::Min, Precision::inexact(2));
first = first.merge_ordered(
&second,
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let first_ref = first.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
first_ref.get_as::<bool>(Stat::IsStrictSorted),
Some(Precision::exact(true))
);
}
#[test]
fn merge_unordered() {
let array =
PrimitiveArray::from_option_iter([Some(1), None, Some(2), Some(42), Some(10000), None]);
let all_stats = all::<Stat>()
.filter(|s| !matches!(s, Stat::Sum))
.filter(|s| !matches!(s, Stat::NaNCount))
.collect_vec();
array.statistics().compute_all(&all_stats).unwrap();
let stats = array.statistics().to_owned();
for stat in &all_stats {
assert!(stats.get(*stat).is_some(), "Stat {stat} is missing");
}
let merged = stats.clone().merge_unordered(
&stats,
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
for stat in &all_stats {
assert_eq!(
merged.get(*stat).is_some(),
stat.is_commutative(),
"Stat {stat} remains after merge_unordered despite not being commutative, or was removed despite being commutative"
)
}
let merged_ref = merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
let stats_ref = stats.as_typed_ref(&DType::Primitive(PType::I32, Nullability::Nullable));
assert_eq!(
merged_ref.get_as::<i32>(Stat::Min),
stats_ref.get_as::<i32>(Stat::Min)
);
assert_eq!(
merged_ref.get_as::<i32>(Stat::Max),
stats_ref.get_as::<i32>(Stat::Max)
);
assert_eq!(
merged_ref.get_as::<u64>(Stat::NullCount).unwrap(),
stats_ref
.get_as::<u64>(Stat::NullCount)
.unwrap()
.map(|s| s * 2)
);
}
#[test]
fn merge_min_bound_same() {
let merged = StatsSet::of(Stat::Min, Precision::inexact(5)).merge_ordered(
&StatsSet::of(Stat::Min, Precision::exact(5)),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let merged_ref =
merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
merged_ref.get_as::<i32>(Stat::Min),
Some(Precision::exact(5))
);
}
#[test]
fn merge_min_bound_bound_lower() {
let merged = StatsSet::of(Stat::Min, Precision::inexact(4)).merge_ordered(
&StatsSet::of(Stat::Min, Precision::exact(5)),
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
let merged_ref =
merged.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
merged_ref.get_as::<i32>(Stat::Min),
Some(Precision::inexact(4))
);
}
#[test]
fn test_combine_is_constant() {
{
let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
let stats2 = StatsSet::of(Stat::IsConstant, Precision::exact(true));
let mut stats_ref =
stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
stats_ref
.combine_bool_stat::<IsConstant>(
&stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
)
.unwrap();
assert_eq!(
stats_ref.get_as::<bool>(Stat::IsConstant),
Some(Precision::exact(true))
);
}
{
let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(true));
let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
let mut stats_ref =
stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
stats_ref
.combine_bool_stat::<IsConstant>(
&stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
)
.unwrap();
assert_eq!(
stats_ref.get_as::<bool>(Stat::IsConstant),
Some(Precision::exact(true))
);
}
{
let mut stats = StatsSet::of(Stat::IsConstant, Precision::exact(false));
let stats2 = StatsSet::of(Stat::IsConstant, Precision::inexact(false));
let mut stats_ref =
stats.as_mut_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
stats_ref
.combine_bool_stat::<IsConstant>(
&stats2.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable)),
)
.unwrap();
assert_eq!(
stats_ref.get_as::<bool>(Stat::IsConstant),
Some(Precision::exact(false))
);
}
}
#[test]
fn test_combine_sets_boolean_conflict() {
let mut stats1 = StatsSet::from_iter([
(Stat::IsConstant, Precision::exact(true)),
(Stat::IsSorted, Precision::exact(true)),
]);
let stats2 = StatsSet::from_iter([
(Stat::IsConstant, Precision::exact(false)),
(Stat::IsSorted, Precision::exact(true)),
]);
let result = stats1.combine_sets(
&stats2,
&DType::Primitive(PType::I32, Nullability::NonNullable),
);
assert!(result.is_err());
}
#[test]
fn test_combine_sets_with_missing_stats() {
let mut stats1 = StatsSet::from_iter([
(Stat::Min, Precision::exact(42)),
(Stat::UncompressedSizeInBytes, Precision::exact(1000)),
]);
let stats2 = StatsSet::from_iter([
(Stat::Max, Precision::exact(100)),
(Stat::IsStrictSorted, Precision::exact(true)),
]);
stats1
.combine_sets(
&stats2,
&DType::Primitive(PType::I32, Nullability::NonNullable),
)
.unwrap();
let stats_ref =
stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
stats_ref.get_as::<i32>(Stat::Min),
Some(Precision::exact(42))
);
assert_eq!(
stats_ref.get_as::<i32>(Stat::Max),
Some(Precision::exact(100))
);
assert_eq!(
stats_ref.get_as::<bool>(Stat::IsStrictSorted),
Some(Precision::exact(true))
);
}
#[test]
fn test_combine_sets_with_inexact() {
let mut stats1 = StatsSet::from_iter([
(Stat::Min, Precision::exact(42)),
(Stat::Max, Precision::inexact(100)),
(Stat::IsConstant, Precision::exact(false)),
]);
let stats2 = StatsSet::from_iter([
(Stat::Min, Precision::inexact(40)),
(Stat::Max, Precision::exact(90)),
(Stat::IsSorted, Precision::exact(true)),
]);
stats1
.combine_sets(
&stats2,
&DType::Primitive(PType::I32, Nullability::NonNullable),
)
.unwrap();
let stats_ref =
stats1.as_typed_ref(&DType::Primitive(PType::I32, Nullability::NonNullable));
assert_eq!(
stats_ref.get_as::<i32>(Stat::Min),
Some(Precision::exact(42))
);
assert_eq!(
stats_ref.get_as::<i32>(Stat::Max),
Some(Precision::exact(90))
);
assert_eq!(
stats_ref.get_as::<bool>(Stat::IsSorted),
Some(Precision::exact(true))
);
}
}