use std::collections::HashMap;
use std::hash::Hash;
use crate::SketchError;
#[derive(Debug, Clone, Copy)]
struct CounterEntry {
count: u64,
error: u64,
}
#[derive(Debug, Clone)]
pub struct SpaceSaving<T>
where
T: Eq + Hash + Clone,
{
capacity: usize,
counters: HashMap<T, CounterEntry>,
total_count: u64,
}
impl<T> SpaceSaving<T>
where
T: Eq + Hash + Clone,
{
pub fn new(capacity: usize) -> Result<Self, SketchError> {
if capacity == 0 {
return Err(SketchError::InvalidParameter(
"capacity must be greater than zero",
));
}
Ok(Self {
capacity,
counters: HashMap::with_capacity(capacity),
total_count: 0,
})
}
pub fn capacity(&self) -> usize {
self.capacity
}
pub fn tracked_items(&self) -> usize {
self.counters.len()
}
pub fn total_count(&self) -> u64 {
self.total_count
}
pub fn is_empty(&self) -> bool {
self.total_count == 0
}
pub fn insert(&mut self, item: T) {
self.add(item, 1);
}
pub fn add(&mut self, item: T, count: u64) {
if count == 0 {
return;
}
if let Some(entry) = self.counters.get_mut(&item) {
entry.count = entry.count.saturating_add(count);
self.total_count = self.total_count.saturating_add(count);
return;
}
if self.counters.len() < self.capacity {
self.counters.insert(item, CounterEntry { count, error: 0 });
self.total_count = self.total_count.saturating_add(count);
return;
}
let (min_item, min_entry) = self
.counters
.iter()
.min_by_key(|(_, entry)| entry.count)
.map(|(tracked_item, entry)| (tracked_item.clone(), *entry))
.expect("non-empty map when capacity is full");
self.counters.remove(&min_item);
self.counters.insert(
item,
CounterEntry {
count: min_entry.count.saturating_add(count),
error: min_entry.count,
},
);
self.total_count = self.total_count.saturating_add(count);
}
pub fn estimate(&self, item: &T) -> Option<u64> {
self.counters.get(item).map(|entry| entry.count)
}
pub fn estimate_with_error(&self, item: &T) -> Option<(u64, u64)> {
self.counters.get(item).map(|entry| (entry.count, entry.error))
}
pub fn lower_bound(&self, item: &T) -> Option<u64> {
self.counters
.get(item)
.map(|entry| entry.count.saturating_sub(entry.error))
}
pub fn top_k(&self, k: usize) -> Vec<(T, u64, u64)> {
if k == 0 {
return Vec::new();
}
let mut entries: Vec<_> = self
.counters
.iter()
.map(|(item, entry)| (item.clone(), entry.count, entry.error))
.collect();
entries.sort_unstable_by(|left, right| right.1.cmp(&left.1));
entries.truncate(k.min(entries.len()));
entries
}
pub fn clear(&mut self) {
self.counters.clear();
self.total_count = 0;
}
pub fn merge(&mut self, other: &Self) -> Result<(), SketchError> {
if self.capacity != other.capacity {
return Err(SketchError::IncompatibleSketches(
"capacity must match for merge",
));
}
for (item, entry) in &other.counters {
self.add(item.clone(), entry.count);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::SpaceSaving;
#[test]
fn constructor_validates_capacity() {
assert!(SpaceSaving::<String>::new(0).is_err());
assert!(SpaceSaving::<String>::new(4).is_ok());
}
#[test]
fn heavy_hitters_are_retained() {
let mut sketch = SpaceSaving::new(5).unwrap();
sketch.add("apple".to_string(), 5_000);
sketch.add("banana".to_string(), 3_000);
sketch.add("carrot".to_string(), 1_000);
for value in 0..200_u64 {
sketch.insert(format!("noise-{value}"));
}
let top = sketch.top_k(3);
let names: Vec<_> = top.iter().map(|(item, _, _)| item.as_str()).collect();
assert!(names.contains(&"apple"));
assert!(names.contains(&"banana"));
}
#[test]
fn estimates_expose_error_bounds() {
let mut sketch = SpaceSaving::new(2).unwrap();
sketch.add("a".to_string(), 10);
sketch.add("b".to_string(), 5);
sketch.add("c".to_string(), 2);
let estimate = sketch.estimate_with_error(&"c".to_string());
if let Some((count, error)) = estimate {
assert!(count >= error);
}
}
#[test]
fn merge_combines_observations() {
let mut left = SpaceSaving::new(4).unwrap();
let mut right = SpaceSaving::new(4).unwrap();
left.add("alpha".to_string(), 50);
right.add("alpha".to_string(), 30);
right.add("beta".to_string(), 20);
left.merge(&right).unwrap();
let alpha_estimate = left.estimate(&"alpha".to_string()).unwrap();
assert!(alpha_estimate >= 70);
}
#[test]
fn merge_rejects_mismatched_capacity() {
let mut left: SpaceSaving<String> = SpaceSaving::new(4).unwrap();
let right: SpaceSaving<String> = SpaceSaving::new(5).unwrap();
assert!(left.merge(&right).is_err());
}
#[test]
fn clear_resets_state() {
let mut sketch = SpaceSaving::new(3).unwrap();
sketch.add("x".to_string(), 10);
assert!(!sketch.is_empty());
sketch.clear();
assert!(sketch.is_empty());
assert_eq!(sketch.tracked_items(), 0);
}
}