use std::{cmp::Ordering, collections::HashSet};
use polars::prelude::Expr;
use crate::{
core::Metric,
domains::find_min_covering,
error::Fallible,
traits::{InfMul, ProductOrd, option_min},
transformations::traits::UnboundedMetric,
};
use super::{ChangeOneDistance, IntDistance, MicrodataMetric, SymmetricDistance};
#[cfg(feature = "ffi")]
mod ffi;
#[derive(Clone, PartialEq, Debug)]
pub struct SymmetricIdDistance {
pub identifier: Expr,
}
impl Metric for SymmetricIdDistance {
type Distance = IntDistance;
}
#[derive(Clone, PartialEq, Debug)]
pub struct ChangeOneIdDistance {
pub identifier: Expr,
}
impl Metric for ChangeOneIdDistance {
type Distance = IntDistance;
}
#[derive(Clone, PartialEq, Debug)]
pub struct FrameDistance<M: UnboundedMetric>(pub M);
impl MicrodataMetric for SymmetricIdDistance {
const SIZED: bool = false;
const ORDERED: bool = false;
fn identifier(&self) -> Option<Expr> {
Some(self.identifier.clone())
}
type EventMetric = SymmetricDistance;
}
impl MicrodataMetric for ChangeOneIdDistance {
const SIZED: bool = true;
const ORDERED: bool = false;
fn identifier(&self) -> Option<Expr> {
Some(self.identifier.clone())
}
type EventMetric = ChangeOneDistance;
}
impl<M: UnboundedMetric> Metric for FrameDistance<M> {
type Distance = Bounds;
}
#[derive(Clone, PartialEq, Debug)]
pub struct Bounds(pub Vec<Bound>);
impl From<u32> for Bounds {
fn from(v: u32) -> Self {
Self(vec![Bound::by::<[Expr; 0], Expr>([]).with_per_group(v)])
}
}
impl Bounds {
pub fn get_bound(&self, by: &HashSet<Expr>) -> Bound {
let mut bound = (self.0.iter())
.find(|b| &b.by == by)
.cloned()
.unwrap_or_else(|| Bound {
by: by.clone(),
..Default::default()
});
let subset_bounds = (self.0.iter())
.filter(|m| m.by.is_subset(by))
.collect::<Vec<&Bound>>();
bound.per_group = (subset_bounds.iter()).filter_map(|m| m.per_group).min();
let all_mips = (self.0.iter())
.filter_map(|b| Some((&b.by, b.num_groups?)))
.collect();
bound.num_groups = find_min_covering(by.clone(), all_mips)
.map(|cover| {
cover
.iter()
.try_fold(1u32, |acc, (_, v)| acc.inf_mul(v).ok())
})
.flatten();
if by.is_empty() {
bound.num_groups = Some(1);
}
bound
}
pub fn with_bound(mut self, bound: Bound) -> Self {
if let Some(b) = self.0.iter_mut().find(|m| m.by == bound.by) {
b.num_groups = option_min(b.num_groups, bound.num_groups);
b.per_group = option_min(b.per_group, bound.per_group);
} else {
self.0.push(bound);
}
self
}
}
#[derive(Clone, PartialEq, Default, Debug)]
pub struct Bound {
pub by: HashSet<Expr>,
pub per_group: Option<u32>,
pub num_groups: Option<u32>,
}
impl Bound {
pub fn by<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(by: E) -> Self {
Self {
by: by.as_ref().iter().cloned().map(Into::into).collect(),
per_group: None,
num_groups: None,
}
}
pub fn with_per_group(mut self, value: u32) -> Self {
self.per_group = Some(value);
self
}
pub fn with_num_groups(mut self, value: u32) -> Self {
self.num_groups = Some(value);
self
}
}
impl ProductOrd for Bounds {
fn total_cmp(&self, other: &Self) -> Fallible<Ordering> {
if self != other {
return fallible!(
MakeTransformation,
"cannot compare bounds with different by columns"
);
}
Ok(Ordering::Equal)
}
}