#[cfg(feature = "ffi")]
pub(crate) mod ffi;
#[cfg(all(feature = "polars", feature = "contrib"))]
pub mod polars;
#[cfg(all(feature = "polars", feature = "contrib"))]
pub use polars::*;
use std::hash::Hash;
use std::marker::PhantomData;
use crate::{
core::{Domain, Metric, MetricSpace},
domains::{AtomDomain, BitVectorDomain, MapDomain, VectorDomain, type_name},
error::Fallible,
traits::CheckAtom,
};
#[cfg(feature = "contrib")]
use crate::{traits::Hashable, transformations::DataFrameDomain};
use std::fmt::{Debug, Formatter};
pub type IntDistance = u32;
#[derive(Clone)]
pub struct SymmetricDistance;
impl Default for SymmetricDistance {
fn default() -> Self {
SymmetricDistance
}
}
impl PartialEq for SymmetricDistance {
fn eq(&self, _other: &Self) -> bool {
true
}
}
impl Debug for SymmetricDistance {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "SymmetricDistance()")
}
}
impl Metric for SymmetricDistance {
type Distance = IntDistance;
}
impl MetricSpace for (AtomDomain<String>, SymmetricDistance) {
fn check_space(&self) -> Fallible<()> {
Ok(())
}
}
impl<D: Domain> MetricSpace for (VectorDomain<D>, SymmetricDistance) {
fn check_space(&self) -> Fallible<()> {
Ok(())
}
}
#[cfg(feature = "contrib")]
impl<K: Hashable> MetricSpace for (DataFrameDomain<K>, SymmetricDistance) {
fn check_space(&self) -> Fallible<()> {
Ok(())
}
}
#[derive(Clone)]
pub struct InsertDeleteDistance;
impl Default for InsertDeleteDistance {
fn default() -> Self {
InsertDeleteDistance
}
}
impl PartialEq for InsertDeleteDistance {
fn eq(&self, _other: &Self) -> bool {
true
}
}
impl Debug for InsertDeleteDistance {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "InsertDeleteDistance()")
}
}
impl Metric for InsertDeleteDistance {
type Distance = IntDistance;
}
impl<D: Domain> MetricSpace for (VectorDomain<D>, InsertDeleteDistance) {
fn check_space(&self) -> Fallible<()> {
Ok(())
}
}
#[cfg(feature = "contrib")]
impl<K: Hashable> MetricSpace for (DataFrameDomain<K>, InsertDeleteDistance) {
fn check_space(&self) -> Fallible<()> {
Ok(())
}
}
#[derive(Clone)]
pub struct ChangeOneDistance;
impl Default for ChangeOneDistance {
fn default() -> Self {
ChangeOneDistance
}
}
impl PartialEq for ChangeOneDistance {
fn eq(&self, _other: &Self) -> bool {
true
}
}
impl Debug for ChangeOneDistance {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "ChangeOneDistance()")
}
}
impl Metric for ChangeOneDistance {
type Distance = IntDistance;
}
impl<D: Domain> MetricSpace for (VectorDomain<D>, ChangeOneDistance) {
fn check_space(&self) -> Fallible<()> {
self.0.size.map(|_| ()).ok_or_else(|| {
err!(
MetricSpace,
"change-one distance requires a known dataset size"
)
})
}
}
#[derive(Clone)]
pub struct HammingDistance;
impl Default for HammingDistance {
fn default() -> Self {
HammingDistance
}
}
impl PartialEq for HammingDistance {
fn eq(&self, _other: &Self) -> bool {
true
}
}
impl Debug for HammingDistance {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "HammingDistance()")
}
}
impl Metric for HammingDistance {
type Distance = IntDistance;
}
impl<D: Domain> MetricSpace for (VectorDomain<D>, HammingDistance) {
fn check_space(&self) -> Fallible<()> {
self.0.size.map(|_| ()).ok_or_else(|| {
err!(
MetricSpace,
"Hamming distance requires a known dataset size"
)
})
}
}
pub struct LpDistance<const P: usize, Q>(PhantomData<fn() -> Q>);
impl<const P: usize, Q> Default for LpDistance<P, Q> {
fn default() -> Self {
LpDistance(PhantomData)
}
}
impl<const P: usize, Q> Clone for LpDistance<P, Q> {
fn clone(&self) -> Self {
Self::default()
}
}
impl<const P: usize, Q> PartialEq for LpDistance<P, Q> {
fn eq(&self, _other: &Self) -> bool {
true
}
}
impl<const P: usize, Q> Debug for LpDistance<P, Q> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "L{}Distance({})", P, type_name!(Q))
}
}
impl<const P: usize, Q> Metric for LpDistance<P, Q> {
type Distance = Q;
}
impl<T: CheckAtom, const P: usize, Q> MetricSpace
for (VectorDomain<AtomDomain<T>>, LpDistance<P, Q>)
{
fn check_space(&self) -> Fallible<()> {
if self.0.element_domain.nan() {
fallible!(MetricSpace, "LpDistance requires non-nan elements")
} else {
Ok(())
}
}
}
impl<K: CheckAtom, V: CheckAtom, const P: usize, Q> MetricSpace
for (MapDomain<AtomDomain<K>, AtomDomain<V>>, LpDistance<P, Q>)
where
K: Eq + Hash,
{
fn check_space(&self) -> Fallible<()> {
if self.0.value_domain.nan() {
return fallible!(MetricSpace, "LpDistance requires non-nan elements");
} else {
Ok(())
}
}
}
impl<K: CheckAtom, V: CheckAtom, const P: usize, Q> MetricSpace
for (
MapDomain<AtomDomain<K>, AtomDomain<V>>,
L0PInfDistance<P, AbsoluteDistance<Q>>,
)
where
K: Eq + Hash,
{
fn check_space(&self) -> Fallible<()> {
if self.0.value_domain.nan() {
return fallible!(
MetricSpace,
"PartitionDistance<AbsoluteDistance<Q>> requires non-nullable elements"
);
} else {
Ok(())
}
}
}
pub type L1Distance<Q> = LpDistance<1, Q>;
pub type L2Distance<Q> = LpDistance<2, Q>;
pub struct AbsoluteDistance<Q>(PhantomData<fn() -> Q>);
impl<Q> Default for AbsoluteDistance<Q> {
fn default() -> Self {
AbsoluteDistance(PhantomData)
}
}
impl<Q> Clone for AbsoluteDistance<Q> {
fn clone(&self) -> Self {
Self::default()
}
}
impl<Q> PartialEq for AbsoluteDistance<Q> {
fn eq(&self, _other: &Self) -> bool {
true
}
}
impl<Q> Debug for AbsoluteDistance<Q> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "AbsoluteDistance({})", type_name!(Q))
}
}
impl<Q> Metric for AbsoluteDistance<Q> {
type Distance = Q;
}
impl<T: CheckAtom, Q> MetricSpace for (AtomDomain<T>, AbsoluteDistance<Q>) {
fn check_space(&self) -> Fallible<()> {
if self.0.nan() {
fallible!(MetricSpace, "AbsoluteDistance requires non-nan elements")
} else {
Ok(())
}
}
}
#[derive(Clone, PartialEq, Default)]
pub struct L0InfDistance<M: Metric>(pub M);
impl<M: Metric> Debug for L0InfDistance<M> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "L0InfDistance({:?})", self.0)
}
}
impl<M: Metric> Metric for L0InfDistance<M> {
type Distance = (IntDistance, M::Distance);
}
#[derive(Clone, PartialEq, Default)]
pub struct L0PInfDistance<const P: usize, M: Metric>(pub M);
pub type L01InfDistance<M> = L0PInfDistance<1, M>;
pub type L02InfDistance<M> = L0PInfDistance<2, M>;
impl<M: Metric, const P: usize> Debug for L0PInfDistance<P, M> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "L0{P}InfDistance({:?})", self.0)
}
}
impl<const P: usize, M: Metric> Metric for L0PInfDistance<P, M> {
type Distance = (IntDistance, M::Distance, M::Distance);
}
impl<T: CheckAtom, const P: usize> MetricSpace
for (
VectorDomain<AtomDomain<T>>,
L0PInfDistance<P, AbsoluteDistance<T>>,
)
{
fn check_space(&self) -> Fallible<()> {
if self.0.element_domain.nan() {
fallible!(MetricSpace, "PartitionDistance requires non-nan elements")
} else {
Ok(())
}
}
}
#[derive(Clone, Default, PartialEq)]
pub struct DiscreteDistance;
impl Debug for DiscreteDistance {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "DiscreteDistance()")
}
}
impl Metric for DiscreteDistance {
type Distance = IntDistance;
}
impl<T: CheckAtom> MetricSpace for (AtomDomain<T>, DiscreteDistance) {
fn check_space(&self) -> Fallible<()> {
Ok(())
}
}
impl MetricSpace for (BitVectorDomain, DiscreteDistance) {
fn check_space(&self) -> Fallible<()> {
Ok(())
}
}
pub struct LInfDistance<Q> {
pub monotonic: bool,
_marker: PhantomData<fn() -> Q>,
}
impl<Q> LInfDistance<Q> {
pub fn new(monotonic: bool) -> Self {
LInfDistance {
monotonic,
_marker: PhantomData,
}
}
}
impl<Q> Default for LInfDistance<Q> {
fn default() -> Self {
LInfDistance {
monotonic: false,
_marker: PhantomData,
}
}
}
impl<Q> Clone for LInfDistance<Q> {
fn clone(&self) -> Self {
LInfDistance {
monotonic: self.monotonic,
_marker: PhantomData,
}
}
}
impl<Q> PartialEq for LInfDistance<Q> {
fn eq(&self, other: &Self) -> bool {
self.monotonic == other.monotonic
}
}
impl<Q> Debug for LInfDistance<Q> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
let monotonic = self.monotonic.then_some("monotonic, ").unwrap_or_default();
write!(f, "LInfDistance({monotonic}T={})", type_name!(Q))
}
}
impl<Q> Metric for LInfDistance<Q> {
type Distance = Q;
}
impl<T: CheckAtom> MetricSpace for (VectorDomain<AtomDomain<T>>, LInfDistance<T>) {
fn check_space(&self) -> Fallible<()> {
if self.0.element_domain.nan() {
fallible!(MetricSpace, "LInfDistance requires non-nan elements")
} else {
Ok(())
}
}
}
pub trait MicrodataMetric: 'static + Metric<Distance = IntDistance> {
const SIZED: bool;
const ORDERED: bool;
#[cfg(feature = "polars")]
fn identifier(&self) -> Option<polars_plan::dsl::Expr>;
type EventMetric: EventLevelMetric;
}
impl MicrodataMetric for SymmetricDistance {
const SIZED: bool = false;
const ORDERED: bool = false;
#[cfg(feature = "polars")]
fn identifier(&self) -> Option<polars_plan::dsl::Expr> {
None
}
type EventMetric = SymmetricDistance;
}
impl MicrodataMetric for InsertDeleteDistance {
const SIZED: bool = false;
const ORDERED: bool = true;
#[cfg(feature = "polars")]
fn identifier(&self) -> Option<polars_plan::dsl::Expr> {
None
}
type EventMetric = InsertDeleteDistance;
}
impl MicrodataMetric for ChangeOneDistance {
const SIZED: bool = true;
const ORDERED: bool = false;
#[cfg(feature = "polars")]
fn identifier(&self) -> Option<polars_plan::dsl::Expr> {
None
}
type EventMetric = ChangeOneDistance;
}
impl MicrodataMetric for HammingDistance {
const SIZED: bool = true;
const ORDERED: bool = true;
#[cfg(feature = "polars")]
fn identifier(&self) -> Option<polars_plan::dsl::Expr> {
None
}
type EventMetric = HammingDistance;
}
pub trait EventLevelMetric: MicrodataMetric + Default {}
impl EventLevelMetric for SymmetricDistance {}
impl EventLevelMetric for InsertDeleteDistance {}
impl EventLevelMetric for ChangeOneDistance {}
impl EventLevelMetric for HammingDistance {}