use std::hash::Hash;
use crate::{SketchError, seeded_hash64, splitmix64};
#[derive(Debug, Clone)]
pub struct MinMaxSketch {
width: usize,
depth: usize,
counters: Vec<u64>,
seeds: Vec<u64>,
total_count: u64,
}
impl MinMaxSketch {
pub fn new(epsilon: f64, delta: f64) -> Result<Self, SketchError> {
if !epsilon.is_finite() || epsilon <= 0.0 || epsilon >= 1.0 {
return Err(SketchError::InvalidParameter(
"epsilon must be finite and strictly between 0 and 1",
));
}
if !delta.is_finite() || delta <= 0.0 || delta >= 1.0 {
return Err(SketchError::InvalidParameter(
"delta must be finite and strictly between 0 and 1",
));
}
let width = (std::f64::consts::E / epsilon).ceil() as usize;
let depth = (1.0 / delta).ln().ceil() as usize;
Self::with_dimensions(width.max(1), depth.max(1))
}
pub fn with_dimensions(width: usize, depth: usize) -> Result<Self, SketchError> {
if width == 0 {
return Err(SketchError::InvalidParameter(
"width must be greater than zero",
));
}
if depth == 0 {
return Err(SketchError::InvalidParameter(
"depth must be greater than zero",
));
}
let table_len = width
.checked_mul(depth)
.ok_or(SketchError::InvalidParameter(
"width * depth overflows usize",
))?;
let seeds = (0..depth)
.map(|idx| splitmix64((idx as u64).wrapping_add(0xA076_1D64_78BD_642F)))
.collect();
Ok(Self {
width,
depth,
counters: vec![0; table_len],
seeds,
total_count: 0,
})
}
pub fn width(&self) -> usize {
self.width
}
pub fn depth(&self) -> usize {
self.depth
}
pub fn total_count(&self) -> u64 {
self.total_count
}
pub fn is_empty(&self) -> bool {
self.total_count == 0
}
pub fn add<T: Hash>(&mut self, item: &T, count: u64) {
if count == 0 {
return;
}
let mut min_counter = u64::MAX;
for row in 0..self.depth {
let idx = self.counter_index(row, item);
min_counter = min_counter.min(self.counters[idx]);
}
let target = min_counter.saturating_add(count);
for row in 0..self.depth {
let idx = self.counter_index(row, item);
if self.counters[idx] < target {
self.counters[idx] = target;
}
}
self.total_count = self.total_count.saturating_add(count);
}
pub fn increment<T: Hash>(&mut self, item: &T) {
self.add(item, 1);
}
pub fn estimate<T: Hash>(&self, item: &T) -> u64 {
let mut min_counter = u64::MAX;
for row in 0..self.depth {
let idx = self.counter_index(row, item);
min_counter = min_counter.min(self.counters[idx]);
}
min_counter
}
pub fn max_estimate<T: Hash>(&self, item: &T) -> u64 {
let mut max_counter = 0_u64;
for row in 0..self.depth {
let idx = self.counter_index(row, item);
max_counter = max_counter.max(self.counters[idx]);
}
max_counter
}
pub fn estimate_interval<T: Hash>(&self, item: &T) -> (u64, u64) {
(self.estimate(item), self.max_estimate(item))
}
pub fn clear(&mut self) {
self.counters.fill(0);
self.total_count = 0;
}
pub fn merge(&mut self, other: &Self) -> Result<(), SketchError> {
if self.width != other.width || self.depth != other.depth {
return Err(SketchError::IncompatibleSketches(
"width/depth must match for merge",
));
}
if self.seeds != other.seeds {
return Err(SketchError::IncompatibleSketches(
"hash seeds must match for merge",
));
}
for (left, right) in self.counters.iter_mut().zip(other.counters.iter()) {
*left = left.saturating_add(*right);
}
self.total_count = self.total_count.saturating_add(other.total_count);
Ok(())
}
fn counter_index<T: Hash>(&self, row: usize, item: &T) -> usize {
let column = (seeded_hash64(item, self.seeds[row]) as usize) % self.width;
row * self.width + column
}
}
#[cfg(test)]
mod tests {
use super::MinMaxSketch;
#[test]
fn constructor_from_error_bounds_creates_non_zero_dimensions() {
let sketch = MinMaxSketch::new(0.01, 0.01).expect("valid bounds");
assert!(sketch.width() > 0);
assert!(sketch.depth() > 0);
}
#[test]
fn constructor_rejects_invalid_error_bounds() {
assert!(MinMaxSketch::new(0.0, 0.1).is_err());
assert!(MinMaxSketch::new(0.1, 0.0).is_err());
assert!(MinMaxSketch::new(1.0, 0.1).is_err());
assert!(MinMaxSketch::new(0.1, 1.0).is_err());
assert!(MinMaxSketch::new(f64::NAN, 0.1).is_err());
}
#[test]
fn with_dimensions_rejects_zero_sizes() {
assert!(MinMaxSketch::with_dimensions(0, 1).is_err());
assert!(MinMaxSketch::with_dimensions(1, 0).is_err());
}
#[test]
fn estimate_is_monotonic_after_updates() {
let mut sketch = MinMaxSketch::with_dimensions(128, 5).unwrap();
let mut previous = 0_u64;
for _ in 0..50 {
sketch.increment(&"user:42");
let current = sketch.estimate(&"user:42");
assert!(current >= previous);
previous = current;
}
}
#[test]
fn estimate_interval_is_ordered() {
let mut sketch = MinMaxSketch::with_dimensions(128, 4).unwrap();
sketch.add(&"alpha", 7);
let (min_estimate, max_estimate) = sketch.estimate_interval(&"alpha");
assert!(min_estimate <= max_estimate);
}
#[test]
fn estimate_is_never_below_exact_count_in_small_stream() {
let mut sketch = MinMaxSketch::with_dimensions(512, 6).unwrap();
for _ in 0..30 {
sketch.increment(&"a");
}
for _ in 0..7 {
sketch.increment(&"b");
}
for _ in 0..13 {
sketch.increment(&"c");
}
assert!(sketch.estimate(&"a") >= 30);
assert!(sketch.estimate(&"b") >= 7);
assert!(sketch.estimate(&"c") >= 13);
}
#[test]
fn clear_resets_all_state() {
let mut sketch = MinMaxSketch::with_dimensions(64, 4).unwrap();
sketch.add(&"key", 10);
assert!(!sketch.is_empty());
sketch.clear();
assert_eq!(sketch.total_count(), 0);
assert_eq!(sketch.estimate(&"key"), 0);
assert!(sketch.is_empty());
}
#[test]
fn merge_combines_sketches() {
let mut left = MinMaxSketch::with_dimensions(256, 5).unwrap();
let mut right = MinMaxSketch::with_dimensions(256, 5).unwrap();
left.add(&"hot", 10);
right.add(&"hot", 15);
right.add(&"cold", 4);
left.merge(&right).expect("compatible sketches");
assert!(left.estimate(&"hot") >= 25);
assert!(left.estimate(&"cold") >= 4);
assert!(left.total_count() >= 29);
}
#[test]
fn merge_rejects_mismatched_dimensions() {
let mut left = MinMaxSketch::with_dimensions(64, 4).unwrap();
let right = MinMaxSketch::with_dimensions(65, 4).unwrap();
assert!(left.merge(&right).is_err());
}
#[test]
fn saturating_addition_avoids_overflow() {
let mut sketch = MinMaxSketch::with_dimensions(32, 4).unwrap();
sketch.add(&"overflow", u64::MAX);
sketch.increment(&"overflow");
assert_eq!(sketch.estimate(&"overflow"), u64::MAX);
assert_eq!(sketch.total_count(), u64::MAX);
}
}