use core::fmt::Debug;
#[cfg(feature = "std")]
use std::{string::String, vec::Vec};
#[cfg(not(feature = "std"))]
extern crate alloc;
#[cfg(not(feature = "std"))]
use alloc::{string::String, vec::Vec};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MergeError {
IncompatibleConfig {
expected: String,
found: String,
},
VersionMismatch {
expected: u32,
found: u32,
},
}
impl core::fmt::Display for MergeError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
MergeError::IncompatibleConfig { expected, found } => {
write!(f, "incompatible config: expected {}, found {}", expected, found)
}
MergeError::VersionMismatch { expected, found } => {
write!(f, "version mismatch: expected {}, found {}", expected, found)
}
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for MergeError {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DecodeError {
BufferTooShort { expected: usize, found: usize },
InvalidHeader,
UnsupportedVersion(u32),
Corrupted(String),
}
impl core::fmt::Display for DecodeError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
DecodeError::BufferTooShort { expected, found } => {
write!(f, "buffer too short: expected {}, found {}", expected, found)
}
DecodeError::InvalidHeader => write!(f, "invalid header"),
DecodeError::UnsupportedVersion(v) => write!(f, "unsupported version: {}", v),
DecodeError::Corrupted(msg) => write!(f, "corrupted data: {}", msg),
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for DecodeError {}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ErrorBounds {
pub lower: f64,
pub estimate: f64,
pub upper: f64,
pub confidence: f64,
}
impl ErrorBounds {
pub fn new(lower: f64, estimate: f64, upper: f64, confidence: f64) -> Self {
Self {
lower,
estimate,
upper,
confidence,
}
}
pub fn contains(&self, value: f64) -> bool {
value >= self.lower && value <= self.upper
}
pub fn width(&self) -> f64 {
self.upper - self.lower
}
pub fn relative_width(&self) -> f64 {
if self.estimate == 0.0 {
0.0
} else {
self.width() / self.estimate
}
}
}
pub trait Sketch: Clone + Debug {
type Item: ?Sized;
fn update(&mut self, item: &Self::Item);
fn merge(&mut self, other: &Self) -> Result<(), MergeError>;
fn clear(&mut self);
fn size_bytes(&self) -> usize;
fn count(&self) -> u64;
fn is_empty(&self) -> bool {
self.count() == 0
}
}
pub trait CardinalitySketch: Sketch {
fn estimate(&self) -> f64;
fn error_bounds(&self, confidence: f64) -> ErrorBounds;
fn relative_error(&self) -> f64;
fn estimate_with_bounds(&self) -> ErrorBounds {
self.error_bounds(0.95)
}
}
pub trait FrequencySketch: Sketch {
fn estimate_frequency(&self, item: &Self::Item) -> u64;
fn exceeds_threshold(&self, item: &Self::Item, threshold: u64) -> bool {
self.estimate_frequency(item) >= threshold
}
}
pub trait HeavyHitters: FrequencySketch
where
Self::Item: Sized + Clone,
{
fn heavy_hitters(&self, threshold: f64) -> Vec<(Self::Item, u64)>;
fn top_k(&self, k: usize) -> Vec<(Self::Item, u64)>;
}
pub trait QuantileSketch: Sketch {
type Value: PartialOrd + Clone;
fn add(&mut self, value: Self::Value);
fn quantile(&self, rank: f64) -> Option<Self::Value>;
fn rank(&self, value: &Self::Value) -> f64;
fn cdf(&self, value: &Self::Value) -> f64 {
self.rank(value)
}
fn min(&self) -> Option<Self::Value>;
fn max(&self) -> Option<Self::Value>;
fn median(&self) -> Option<Self::Value> {
self.quantile(0.5)
}
fn quantiles(&self, ranks: &[f64]) -> Vec<Option<Self::Value>> {
ranks.iter().map(|&r| self.quantile(r)).collect()
}
}
pub trait MembershipSketch: Sketch {
fn contains(&self, item: &Self::Item) -> bool;
fn false_positive_rate(&self) -> f64;
fn len(&self) -> usize;
fn is_filter_empty(&self) -> bool {
self.len() == 0
}
}
pub trait SetSketch: Sketch {
fn union(&self, other: &Self) -> Self;
fn intersection(&self, other: &Self) -> Self;
fn difference(&self, other: &Self) -> Self;
fn jaccard_similarity(&self, other: &Self) -> f64;
}
pub trait SamplingSketch: Sketch
where
Self::Item: Sized + Clone,
{
fn sample(&self) -> &[Self::Item];
fn capacity(&self) -> usize;
fn sample_size(&self) -> usize {
self.sample().len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_error_bounds() {
let bounds = ErrorBounds::new(90.0, 100.0, 110.0, 0.95);
assert!(bounds.contains(100.0));
assert!(bounds.contains(90.0));
assert!(bounds.contains(110.0));
assert!(!bounds.contains(89.0));
assert!(!bounds.contains(111.0));
assert_eq!(bounds.width(), 20.0);
assert!((bounds.relative_width() - 0.2).abs() < 0.001);
}
}