use std::collections::hash_map::{Entry, HashMap};
use std::hash::Hash;
#[derive(Clone, Debug)]
struct KnownEntry {
f: usize,
delta: usize,
}
#[derive(Clone, Debug)]
pub struct LossyCounter<T>
where
T: Clone + Eq + Hash,
{
epsilon: f64,
known: HashMap<T, KnownEntry>,
n: usize,
width: usize,
}
impl<T> LossyCounter<T>
where
T: Clone + Eq + Hash,
{
pub fn with_epsilon(epsilon: f64) -> Self {
assert!(
(epsilon > 0.) & (epsilon < 1.),
"epsilon ({}) must be greater than 0 and smaller than 1",
epsilon
);
Self {
epsilon,
known: HashMap::new(),
n: 0,
width: (1. / epsilon).ceil() as usize,
}
}
pub fn with_width(width: usize) -> Self {
assert!(width > 0, "width must be greater than 0");
Self {
epsilon: (1. / (width as f64)),
known: HashMap::new(),
n: 0,
width,
}
}
pub fn epsilon(&self) -> f64 {
self.epsilon
}
pub fn width(&self) -> usize {
self.width
}
pub fn n(&self) -> usize {
self.n
}
pub fn add(&mut self, t: T) -> bool {
self.n += 1;
let at_window_end = self.n % self.width == 0;
let b_current = self.n / self.width + if at_window_end { 0 } else { 1 };
let was_new = match self.known.entry(t) {
Entry::Occupied(mut o) => {
let mut value = o.get_mut();
value.f += 1;
false
}
Entry::Vacant(v) => {
v.insert(KnownEntry {
f: 1,
delta: b_current - 1,
});
true
}
};
if at_window_end {
self.known = self
.known
.drain()
.filter(|(_k, v)| v.f + v.delta > b_current)
.collect();
}
was_new
}
pub fn query(&self, threshold: f64) -> impl '_ + Iterator<Item = T> {
let bound = (((threshold - self.epsilon) * (self.n as f64)).ceil()).max(0.) as usize;
self.known
.iter()
.filter(move |(_k, v)| v.f >= bound)
.map(|(k, _v)| k)
.cloned()
}
pub fn clear(&mut self) {
self.known = HashMap::new();
self.n = 0;
}
}
#[cfg(test)]
mod tests {
use super::LossyCounter;
#[test]
fn new_with_epsilon() {
let counter = LossyCounter::<u64>::with_epsilon(0.5);
assert_eq!(counter.epsilon(), 0.5);
assert_eq!(counter.width(), 2);
assert_eq!(counter.n(), 0);
}
#[test]
fn new_with_width() {
let counter = LossyCounter::<u64>::with_width(2);
assert_eq!(counter.width(), 2);
assert_eq!(counter.epsilon(), 0.5);
assert_eq!(counter.n(), 0);
}
#[test]
#[should_panic(expected = "epsilon (0) must be greater than 0 and smaller than 1")]
fn new_panics_epsilon_0() {
LossyCounter::<u64>::with_epsilon(0.);
}
#[test]
#[should_panic(expected = "epsilon (1) must be greater than 0 and smaller than 1")]
fn new_panics_epsilon_1() {
LossyCounter::<u64>::with_epsilon(1.);
}
#[test]
#[should_panic(expected = "width must be greater than 0")]
fn new_panics_width_0() {
LossyCounter::<u64>::with_width(0);
}
#[test]
fn add() {
let mut counter = LossyCounter::<u64>::with_epsilon(0.5);
assert!(counter.add(13));
assert_eq!(counter.n(), 1);
}
#[test]
fn double_add() {
let mut counter = LossyCounter::<u64>::with_epsilon(0.5);
assert!(counter.add(13));
assert!(!counter.add(13));
assert_eq!(counter.n(), 2);
}
#[test]
fn query_all() {
let mut counter = LossyCounter::<u64>::with_epsilon(0.2);
assert!(counter.add(13));
assert!(counter.add(42));
let mut data: Vec<u64> = counter.query(0.).collect();
data.sort_unstable();
assert_eq!(data, vec![13, 42]);
}
#[test]
fn query_part() {
let mut counter = LossyCounter::<u64>::with_epsilon(0.001);
assert!(counter.add(13));
assert!(!counter.add(13));
assert!(counter.add(42));
let mut data: Vec<u64> = counter.query(0.6).collect();
data.sort_unstable();
assert_eq!(data, vec![13]);
}
#[test]
fn query_large() {
let mut counter = LossyCounter::<u64>::with_epsilon(0.01);
for i in 0..1000 {
let j = i % 10;
if j <= 6 {
counter.add(i);
} else {
counter.add(j);
}
}
let mut data: Vec<u64> = counter.query(0.02).collect();
data.sort_unstable();
assert_eq!(data, vec![7, 8, 9]);
}
#[test]
fn clone() {
let mut counter1 = LossyCounter::<u64>::with_epsilon(0.2);
assert!(counter1.add(13));
let mut counter2 = counter1.clone();
assert!(counter2.add(42));
assert_eq!(counter1.n(), 1);
assert_eq!(counter2.n(), 2);
let mut data1: Vec<u64> = counter1.query(0.).collect();
data1.sort_unstable();
assert_eq!(data1, vec![13]);
let mut data2: Vec<u64> = counter2.query(0.).collect();
data2.sort_unstable();
assert_eq!(data2, vec![13, 42]);
}
#[test]
fn clear() {
let mut counter = LossyCounter::<u64>::with_epsilon(0.2);
assert!(counter.add(13));
assert_eq!(counter.n(), 1);
counter.clear();
assert_eq!(counter.n(), 0);
assert!(counter.query(0.).next().is_none());
assert!(counter.add(13));
assert_eq!(counter.n(), 1);
let data: Vec<u64> = counter.query(0.).collect();
assert_eq!(data, vec![13]);
}
#[test]
fn pruning() {
let mut counter = LossyCounter::<u64>::with_epsilon(0.5);
assert!(counter.add(13));
assert_eq!(counter.n(), 1);
let data: Vec<u64> = counter.query(0.).collect();
assert_eq!(data, vec![13]);
assert!(counter.add(42));
assert_eq!(counter.n(), 2);
assert!(counter.query(0.).next().is_none());
}
}