use std::borrow::Borrow;
use std::hash::Hash;
use rustc_hash::FxHashMap;
use crate::ds::slot_arena::{SlotArena, SlotId};
#[derive(Debug, Clone)]
#[repr(C)]
struct Entry<K> {
prev: Option<SlotId>,
next: Option<SlotId>,
freq: u64,
last_epoch: u64,
key: K,
}
#[derive(Debug, Default, Clone)]
struct Bucket {
head: Option<SlotId>,
tail: Option<SlotId>,
prev: Option<u64>,
next: Option<u64>,
}
#[derive(Debug, Clone)]
pub struct FrequencyBuckets<K> {
entries: SlotArena<Entry<K>>,
index: FxHashMap<K, SlotId>,
buckets: FxHashMap<u64, Bucket>,
min_freq: u64,
epoch: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub struct FrequencyBucketEntryMeta<'a, K> {
pub key: &'a K,
pub freq: u64,
pub last_epoch: u64,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub struct ShardedFrequencyBucketEntryMeta<K> {
pub key: K,
pub freq: u64,
pub last_epoch: u64,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub struct FrequencyBucketEntryDebug<K> {
pub id: SlotId,
pub key: K,
pub freq: u64,
pub last_epoch: u64,
}
pub const DEFAULT_BUCKET_PREALLOC: usize = 32;
impl<K> FrequencyBuckets<K>
where
K: Eq + Hash + Clone,
{
pub fn with_capacity(capacity: usize) -> Self {
Self::with_capacity_and_bucket_hint(capacity, DEFAULT_BUCKET_PREALLOC)
}
pub fn with_capacity_and_bucket_hint(capacity: usize, bucket_hint: usize) -> Self {
Self {
entries: SlotArena::with_capacity(capacity),
index: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
buckets: FxHashMap::with_capacity_and_hasher(bucket_hint, Default::default()),
min_freq: 0,
epoch: 0,
}
}
pub fn new() -> Self {
Self {
entries: SlotArena::new(),
index: FxHashMap::default(),
buckets: FxHashMap::default(),
min_freq: 0,
epoch: 0,
}
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
#[inline]
pub fn contains<Q>(&self, key: &Q) -> bool
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
self.index.contains_key(key)
}
pub fn current_epoch(&self) -> u64 {
self.epoch
}
pub fn advance_epoch(&mut self) -> u64 {
self.epoch = self.epoch.wrapping_add(1);
self.epoch
}
pub fn set_epoch(&mut self, epoch: u64) {
self.epoch = epoch;
}
#[inline]
pub fn frequency<Q>(&self, key: &Q) -> Option<u64>
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
let id = *self.index.get(key)?;
self.entries.get(id).map(|entry| entry.freq)
}
pub fn entry_epoch<Q>(&self, key: &Q) -> Option<u64>
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
let id = *self.index.get(key)?;
self.entries.get(id).map(|entry| entry.last_epoch)
}
pub fn set_entry_epoch<Q>(&mut self, key: &Q, epoch: u64) -> bool
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
let id = match self.index.get(key) {
Some(id) => *id,
None => return false,
};
if let Some(entry) = self.entries.get_mut(id) {
entry.last_epoch = epoch;
return true;
}
false
}
pub fn min_freq(&self) -> Option<u64> {
if self.min_freq == 0 {
None
} else {
Some(self.min_freq)
}
}
pub fn peek_min(&self) -> Option<(&K, u64)> {
if self.min_freq == 0 {
return None;
}
let min_freq = self.min_freq;
let bucket = self.buckets.get(&min_freq)?;
let id = bucket.tail?;
let entry = self.entries.get(id)?;
Some((&entry.key, entry.freq))
}
pub fn peek_min_id(&self) -> Option<SlotId> {
if self.min_freq == 0 {
return None;
}
let bucket = self.buckets.get(&self.min_freq)?;
bucket.tail
}
pub fn peek_min_key(&self) -> Option<&K> {
let id = self.peek_min_id()?;
self.entries.get(id).map(|entry| &entry.key)
}
pub fn iter_bucket_ids(&self, freq: u64) -> BucketIds<'_, K> {
let head = self.buckets.get(&freq).and_then(|bucket| bucket.head);
BucketIds {
buckets: self,
current: head,
}
}
pub fn iter_bucket_entries(&self, freq: u64) -> BucketEntries<'_, K> {
let head = self.buckets.get(&freq).and_then(|bucket| bucket.head);
BucketEntries {
buckets: self,
current: head,
}
}
pub fn iter(&self) -> Iter<'_, K> {
Iter {
inner: self.entries.iter(),
}
}
#[inline]
pub fn insert(&mut self, key: K) -> bool {
if self.index.contains_key(&key) {
return false;
}
let id = self.entries.insert(Entry {
key: key.clone(),
freq: 1,
last_epoch: self.epoch,
prev: None,
next: None,
});
self.index.insert(key, id);
if !self.buckets.contains_key(&1) {
let next = if self.min_freq == 0 {
None
} else {
Some(self.min_freq)
};
self.insert_bucket(1, None, next);
}
self.list_push_front(1, id);
if self.min_freq == 0 || self.min_freq > 1 {
self.min_freq = 1;
}
true
}
pub fn insert_batch<I>(&mut self, keys: I) -> usize
where
I: IntoIterator<Item = K>,
{
let mut inserted = 0;
for key in keys {
if self.insert(key) {
inserted += 1;
}
}
inserted
}
#[inline]
pub fn touch<Q>(&mut self, key: &Q) -> Option<u64>
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
let id = *self.index.get(key)?;
let current_freq = self.entries.get(id)?.freq;
if current_freq == u64::MAX {
self.list_remove(current_freq, id)?;
self.list_push_front(current_freq, id);
if let Some(entry) = self.entries.get_mut(id) {
entry.last_epoch = self.epoch;
}
return Some(current_freq);
}
let next_freq = current_freq + 1;
let (prev_freq, next_existing) = {
let bucket = self.buckets.get(¤t_freq)?;
(bucket.prev, bucket.next)
};
self.list_remove(current_freq, id)?;
let bucket_empty = self.bucket_is_empty(current_freq);
if bucket_empty {
self.remove_bucket(current_freq, prev_freq, next_existing);
if self.min_freq == current_freq {
self.min_freq = next_existing.unwrap_or(0);
}
}
if !self.buckets.contains_key(&next_freq) {
let prev = if bucket_empty {
prev_freq
} else {
Some(current_freq)
};
let next = next_existing;
self.insert_bucket(next_freq, prev, next);
}
if let Some(entry) = self.entries.get_mut(id) {
entry.freq = next_freq;
entry.last_epoch = self.epoch;
}
self.list_push_front(next_freq, id);
if self.min_freq == 0 || next_freq < self.min_freq {
self.min_freq = next_freq;
}
Some(next_freq)
}
pub fn touch_batch<I>(&mut self, keys: I) -> usize
where
I: IntoIterator<Item = K>,
{
let mut touched = 0;
for key in keys {
if self.touch(&key).is_some() {
touched += 1;
}
}
touched
}
pub fn touch_capped<Q>(&mut self, key: &Q, max_freq: u64) -> Option<u64>
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
let max_freq = max_freq.max(1);
let id = *self.index.get(key)?;
let current_freq = self.entries.get(id)?.freq;
if current_freq >= max_freq {
self.list_remove(current_freq, id)?;
self.list_push_front(current_freq, id);
if let Some(entry) = self.entries.get_mut(id) {
entry.last_epoch = self.epoch;
}
return Some(current_freq);
}
let next_freq = current_freq + 1;
let (prev_freq, next_existing) = {
let bucket = self.buckets.get(¤t_freq)?;
(bucket.prev, bucket.next)
};
self.list_remove(current_freq, id)?;
let bucket_empty = self.bucket_is_empty(current_freq);
if bucket_empty {
self.remove_bucket(current_freq, prev_freq, next_existing);
if self.min_freq == current_freq {
self.min_freq = next_existing.unwrap_or(0);
}
}
if !self.buckets.contains_key(&next_freq) {
let prev = if bucket_empty {
prev_freq
} else {
Some(current_freq)
};
let next = next_existing;
self.insert_bucket(next_freq, prev, next);
}
if let Some(entry) = self.entries.get_mut(id) {
entry.freq = next_freq;
entry.last_epoch = self.epoch;
}
self.list_push_front(next_freq, id);
if self.min_freq == 0 || next_freq < self.min_freq {
self.min_freq = next_freq;
}
Some(next_freq)
}
pub fn decay_halve(&mut self) {
if self.is_empty() {
return;
}
self.rebuild_with(|freq| (freq / 2).max(1));
}
pub fn rebase_min_freq(&mut self) {
if self.min_freq <= 1 {
return;
}
let delta = self.min_freq - 1;
self.rebuild_with(|freq| freq.saturating_sub(delta).max(1));
}
#[inline]
pub fn remove<Q>(&mut self, key: &Q) -> Option<u64>
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
let id = self.index.remove(key)?;
let freq = self.entries.get(id)?.freq;
self.list_remove(freq, id)?;
let bucket_empty = self.bucket_is_empty(freq);
let (prev, next) = {
let bucket = self.buckets.get(&freq)?;
(bucket.prev, bucket.next)
};
if bucket_empty {
self.remove_bucket(freq, prev, next);
if self.min_freq == freq {
self.min_freq = next.unwrap_or(0);
}
}
self.entries.remove(id).map(|entry| entry.freq)
}
pub fn remove_batch<I>(&mut self, keys: I) -> usize
where
I: IntoIterator<Item = K>,
{
let mut removed = 0;
for key in keys {
if self.remove(&key).is_some() {
removed += 1;
}
}
removed
}
#[inline]
pub fn pop_min(&mut self) -> Option<(K, u64)> {
let freq = self.min_freq;
if freq == 0 {
return None;
}
let id = self.buckets.get(&freq)?.tail?;
self.list_remove(freq, id)?;
let bucket_empty = self.bucket_is_empty(freq);
let (prev, next) = {
let bucket = self.buckets.get(&freq)?;
(bucket.prev, bucket.next)
};
if bucket_empty {
self.remove_bucket(freq, prev, next);
if self.min_freq == freq {
self.min_freq = next.unwrap_or(0);
}
}
let entry = self.entries.remove(id)?;
self.index.remove(&entry.key);
Some((entry.key, entry.freq))
}
pub fn clear(&mut self) {
self.entries.clear();
self.index.clear();
self.buckets.clear();
self.min_freq = 0;
self.epoch = 0;
}
pub fn clear_shrink(&mut self) {
self.clear();
self.entries.shrink_to_fit();
self.index.shrink_to_fit();
self.buckets.shrink_to_fit();
}
pub fn approx_bytes(&self) -> usize {
std::mem::size_of::<Self>()
+ self.entries.approx_bytes()
+ self.index.capacity() * std::mem::size_of::<(K, SlotId)>()
+ self.buckets.capacity() * std::mem::size_of::<(u64, Bucket)>()
}
fn ensure_bucket(&mut self, freq: u64) {
if self.buckets.contains_key(&freq) {
return;
}
let mut prev = None;
let mut next = None;
for &f in self.buckets.keys() {
if f < freq && prev.is_none_or(|p| f > p) {
prev = Some(f);
}
if f > freq && next.is_none_or(|n| f < n) {
next = Some(f);
}
}
self.insert_bucket(freq, prev, next);
}
fn insert_with_freq(&mut self, key: K, freq: u64) {
let freq = freq.max(1);
let id = self.entries.insert(Entry {
key: key.clone(),
freq,
last_epoch: self.epoch,
prev: None,
next: None,
});
self.index.insert(key, id);
self.ensure_bucket(freq);
self.list_push_front(freq, id);
if self.min_freq == 0 || freq < self.min_freq {
self.min_freq = freq;
}
}
fn rebuild_with<F>(&mut self, mut f: F)
where
F: FnMut(u64) -> u64,
{
let entries: Vec<(K, u64)> = self
.entries
.iter()
.map(|(_, entry)| (entry.key.clone(), f(entry.freq)))
.collect();
self.entries.clear();
self.index.clear();
self.buckets.clear();
self.min_freq = 0;
for (key, freq) in entries {
self.insert_with_freq(key, freq);
}
}
#[cfg(any(test, debug_assertions))]
pub fn debug_snapshot(&self) -> FrequencyBucketsSnapshot<K> {
let mut buckets: Vec<(u64, Vec<SlotId>)> = self
.buckets
.keys()
.copied()
.map(|freq| (freq, self.iter_bucket_ids(freq).collect()))
.collect();
for (_, ids) in &mut buckets {
ids.sort_by_key(|id| id.index());
}
buckets.sort_by_key(|(freq, _)| *freq);
let mut bucket_entries: Vec<(u64, Vec<FrequencyBucketEntryDebug<K>>)> = self
.buckets
.keys()
.copied()
.map(|freq| {
let mut entries: Vec<_> = self
.iter_bucket_entries(freq)
.map(|(id, meta)| FrequencyBucketEntryDebug {
id,
key: meta.key.clone(),
freq: meta.freq,
last_epoch: meta.last_epoch,
})
.collect();
entries.sort_by_key(|entry| entry.id.index());
(freq, entries)
})
.collect();
bucket_entries.sort_by_key(|(freq, _)| *freq);
let mut entry_epochs: Vec<(SlotId, u64)> = self
.entries
.iter()
.map(|(id, entry)| (id, entry.last_epoch))
.collect();
entry_epochs.sort_by_key(|(id, _)| id.index());
FrequencyBucketsSnapshot {
min_freq: self.min_freq(),
entries_len: self.entries.len(),
index_len: self.index.len(),
buckets,
epoch: self.epoch,
entry_epochs,
bucket_entries,
}
}
#[cfg(any(test, debug_assertions))]
pub fn debug_validate_invariants(&self) {
assert_eq!(self.len(), self.index.len());
if self.is_empty() {
assert!(self.buckets.is_empty());
assert_eq!(self.min_freq, 0);
return;
}
assert!(self.min_freq > 0);
assert!(self.buckets.contains_key(&self.min_freq));
for (&freq, bucket) in &self.buckets {
assert!(bucket.head.is_some());
assert!(bucket.tail.is_some());
if let Some(prev) = bucket.prev {
assert!(self.buckets.contains_key(&prev));
assert_eq!(self.buckets[&prev].next, Some(freq));
} else {
assert_eq!(self.min_freq, freq);
}
if let Some(next) = bucket.next {
assert!(self.buckets.contains_key(&next));
assert_eq!(self.buckets[&next].prev, Some(freq));
}
let mut current = bucket.head;
let mut last = None;
let mut count = 0usize;
while let Some(id) = current {
let entry = self.entries.get(id).expect("bucket entry missing");
assert_eq!(entry.freq, freq);
assert_eq!(entry.prev, last);
assert_eq!(self.index.get(&entry.key), Some(&id));
last = Some(id);
current = entry.next;
count += 1;
}
assert_eq!(bucket.tail, last);
assert!(count > 0);
}
}
fn bucket_is_empty(&self, freq: u64) -> bool {
self.buckets
.get(&freq)
.map(|bucket| bucket.head.is_none())
.unwrap_or(true)
}
fn insert_bucket(&mut self, freq: u64, prev: Option<u64>, next: Option<u64>) {
let bucket = Bucket {
head: None,
tail: None,
prev,
next,
};
self.buckets.insert(freq, bucket);
if let Some(prev) = prev {
if let Some(prev_bucket) = self.buckets.get_mut(&prev) {
prev_bucket.next = Some(freq);
}
}
if let Some(next) = next {
if let Some(next_bucket) = self.buckets.get_mut(&next) {
next_bucket.prev = Some(freq);
}
}
}
fn remove_bucket(&mut self, freq: u64, prev: Option<u64>, next: Option<u64>) {
if let Some(prev) = prev {
if let Some(prev_bucket) = self.buckets.get_mut(&prev) {
prev_bucket.next = next;
}
}
if let Some(next) = next {
if let Some(next_bucket) = self.buckets.get_mut(&next) {
next_bucket.prev = prev;
}
}
self.buckets.remove(&freq);
}
fn list_push_front(&mut self, freq: u64, id: SlotId) {
let bucket = self.buckets.get_mut(&freq).expect("bucket missing");
let old_head = bucket.head;
if let Some(entry) = self.entries.get_mut(id) {
entry.prev = None;
entry.next = old_head;
}
if let Some(old_head) = old_head {
if let Some(entry) = self.entries.get_mut(old_head) {
entry.prev = Some(id);
}
} else {
bucket.tail = Some(id);
}
bucket.head = Some(id);
}
fn list_remove(&mut self, freq: u64, id: SlotId) -> Option<()> {
let (prev, next) = {
let entry = self.entries.get(id)?;
(entry.prev, entry.next)
};
let bucket = self.buckets.get_mut(&freq)?;
if let Some(prev) = prev {
if let Some(entry) = self.entries.get_mut(prev) {
entry.next = next;
}
} else {
bucket.head = next;
}
if let Some(next) = next {
if let Some(entry) = self.entries.get_mut(next) {
entry.prev = prev;
}
} else {
bucket.tail = prev;
}
if let Some(entry) = self.entries.get_mut(id) {
entry.prev = None;
entry.next = None;
}
Some(())
}
}
#[cfg(any(test, debug_assertions))]
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct FrequencyBucketsSnapshot<K> {
pub min_freq: Option<u64>,
pub entries_len: usize,
pub index_len: usize,
pub buckets: Vec<(u64, Vec<SlotId>)>,
pub epoch: u64,
pub entry_epochs: Vec<(SlotId, u64)>,
pub bucket_entries: Vec<(u64, Vec<FrequencyBucketEntryDebug<K>>)>,
}
#[cfg(feature = "concurrency")]
#[derive(Debug)]
pub struct ShardedFrequencyBuckets<K> {
shards: Vec<parking_lot::RwLock<FrequencyBuckets<K>>>,
selector: crate::ds::ShardSelector,
}
#[cfg(feature = "concurrency")]
impl<K> ShardedFrequencyBuckets<K>
where
K: Eq + Hash + Clone,
{
pub fn new(shards: usize) -> Self {
let shards = shards.max(1);
let mut vec = Vec::with_capacity(shards);
for _ in 0..shards {
vec.push(parking_lot::RwLock::new(FrequencyBuckets::new()));
}
Self {
shards: vec,
selector: crate::ds::ShardSelector::new(shards, 0),
}
}
pub fn with_shards(shards: usize, capacity_per_shard: usize) -> Self {
let shards = shards.max(1);
let mut vec = Vec::with_capacity(shards);
for _ in 0..shards {
vec.push(parking_lot::RwLock::new(FrequencyBuckets::with_capacity(
capacity_per_shard,
)));
}
Self {
shards: vec,
selector: crate::ds::ShardSelector::new(shards, 0),
}
}
pub fn with_shards_seed(shards: usize, capacity_per_shard: usize, seed: u64) -> Self {
let shards = shards.max(1);
let mut vec = Vec::with_capacity(shards);
for _ in 0..shards {
vec.push(parking_lot::RwLock::new(FrequencyBuckets::with_capacity(
capacity_per_shard,
)));
}
Self {
shards: vec,
selector: crate::ds::ShardSelector::new(shards, seed),
}
}
pub fn shard_count(&self) -> usize {
self.shards.len()
}
fn shard_for<Q: Hash + ?Sized>(&self, key: &Q) -> usize {
self.selector.shard_for_key(key)
}
pub fn shard_for_key<Q>(&self, key: &Q) -> usize
where
K: Borrow<Q>,
Q: Hash + ?Sized,
{
self.selector.shard_for_key(key)
}
pub fn insert(&self, key: K) -> bool {
let shard = self.shard_for(&key);
let mut buckets = self.shards[shard].write();
buckets.insert(key)
}
pub fn touch<Q>(&self, key: &Q) -> Option<u64>
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
let shard = self.shard_for(key);
let mut buckets = self.shards[shard].write();
buckets.touch(key)
}
pub fn remove<Q>(&self, key: &Q) -> Option<u64>
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
let shard = self.shard_for(key);
let mut buckets = self.shards[shard].write();
buckets.remove(key)
}
pub fn frequency<Q>(&self, key: &Q) -> Option<u64>
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
let shard = self.shard_for(key);
let buckets = self.shards[shard].read();
buckets.frequency(key)
}
pub fn contains<Q>(&self, key: &Q) -> bool
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
let shard = self.shard_for(key);
let buckets = self.shards[shard].read();
buckets.contains(key)
}
pub fn len(&self) -> usize {
self.shards.iter().map(|b| b.read().len()).sum()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn clear(&self) {
for shard in &self.shards {
shard.write().clear();
}
}
pub fn clear_shrink(&self) {
for shard in &self.shards {
shard.write().clear_shrink();
}
}
pub fn collect_bucket_entries(
&self,
freq: u64,
) -> Vec<(SlotId, ShardedFrequencyBucketEntryMeta<K>)> {
let mut entries = Vec::new();
for shard in &self.shards {
let buckets = shard.read();
entries.extend(buckets.iter_bucket_entries(freq).map(|(id, meta)| {
(
id,
ShardedFrequencyBucketEntryMeta {
key: meta.key.clone(),
freq: meta.freq,
last_epoch: meta.last_epoch,
},
)
}));
}
entries
}
pub fn collect_entries(&self) -> Vec<(SlotId, ShardedFrequencyBucketEntryMeta<K>)> {
let mut entries = Vec::new();
for shard in &self.shards {
let buckets = shard.read();
entries.extend(buckets.iter().map(|(id, meta)| {
(
id,
ShardedFrequencyBucketEntryMeta {
key: meta.key.clone(),
freq: meta.freq,
last_epoch: meta.last_epoch,
},
)
}));
}
entries
}
pub fn collect_bucket_entries_with_shard(
&self,
freq: u64,
) -> Vec<(usize, SlotId, ShardedFrequencyBucketEntryMeta<K>)> {
let mut entries = Vec::new();
for (idx, shard) in self.shards.iter().enumerate() {
let buckets = shard.read();
entries.extend(buckets.iter_bucket_entries(freq).map(|(id, meta)| {
(
idx,
id,
ShardedFrequencyBucketEntryMeta {
key: meta.key.clone(),
freq: meta.freq,
last_epoch: meta.last_epoch,
},
)
}));
}
entries
}
pub fn collect_entries_with_shard(
&self,
) -> Vec<(usize, SlotId, ShardedFrequencyBucketEntryMeta<K>)> {
let mut entries = Vec::new();
for (idx, shard) in self.shards.iter().enumerate() {
let buckets = shard.read();
entries.extend(buckets.iter().map(|(id, meta)| {
(
idx,
id,
ShardedFrequencyBucketEntryMeta {
key: meta.key.clone(),
freq: meta.freq,
last_epoch: meta.last_epoch,
},
)
}));
}
entries
}
pub fn approx_bytes(&self) -> usize {
self.shards
.iter()
.map(|shard| shard.read().approx_bytes())
.sum()
}
pub fn peek_min(&self) -> Option<(K, u64)> {
let mut best: Option<(usize, u64)> = None;
for (idx, shard) in self.shards.iter().enumerate() {
let buckets = shard.read();
let Some(freq) = buckets.min_freq() else {
continue;
};
let is_better = best.is_none_or(|(_, best_freq)| freq < best_freq);
if is_better {
best = Some((idx, freq));
}
}
let (idx, _) = best?;
let buckets = self.shards[idx].read();
buckets.peek_min().map(|(key, freq)| (key.clone(), freq))
}
pub fn pop_min(&self) -> Option<(K, u64)> {
let mut best: Option<(usize, u64)> = None;
for (idx, shard) in self.shards.iter().enumerate() {
let buckets = shard.read();
let Some(freq) = buckets.min_freq() else {
continue;
};
let is_better = best.is_none_or(|(_, best_freq)| freq < best_freq);
if is_better {
best = Some((idx, freq));
}
}
let (idx, _) = best?;
let mut buckets = self.shards[idx].write();
buckets.pop_min()
}
}
#[derive(Debug, Clone)]
pub struct FrequencyBucketsHandle<H> {
inner: FrequencyBuckets<H>,
}
impl<H> FrequencyBucketsHandle<H>
where
H: Eq + Hash + Copy,
{
pub fn new() -> Self {
Self {
inner: FrequencyBuckets::new(),
}
}
pub fn with_capacity(capacity: usize) -> Self {
Self {
inner: FrequencyBuckets::with_capacity(capacity),
}
}
pub fn with_capacity_and_bucket_hint(capacity: usize, bucket_hint: usize) -> Self {
Self {
inner: FrequencyBuckets::with_capacity_and_bucket_hint(capacity, bucket_hint),
}
}
pub fn len(&self) -> usize {
self.inner.len()
}
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
pub fn contains(&self, handle: &H) -> bool {
self.inner.contains(handle)
}
pub fn frequency(&self, handle: &H) -> Option<u64> {
self.inner.frequency(handle)
}
pub fn current_epoch(&self) -> u64 {
self.inner.current_epoch()
}
pub fn advance_epoch(&mut self) -> u64 {
self.inner.advance_epoch()
}
pub fn set_epoch(&mut self, epoch: u64) {
self.inner.set_epoch(epoch);
}
pub fn entry_epoch(&self, handle: &H) -> Option<u64> {
self.inner.entry_epoch(handle)
}
pub fn set_entry_epoch(&mut self, handle: &H, epoch: u64) -> bool {
self.inner.set_entry_epoch(handle, epoch)
}
pub fn min_freq(&self) -> Option<u64> {
self.inner.min_freq()
}
pub fn peek_min(&self) -> Option<(H, u64)> {
self.inner.peek_min().map(|(handle, freq)| (*handle, freq))
}
pub fn peek_min_ref(&self) -> Option<(&H, u64)> {
self.inner.peek_min()
}
pub fn iter_bucket_ids(&self, freq: u64) -> BucketIds<'_, H> {
self.inner.iter_bucket_ids(freq)
}
pub fn iter_bucket_entries(&self, freq: u64) -> BucketEntries<'_, H> {
self.inner.iter_bucket_entries(freq)
}
pub fn iter(&self) -> Iter<'_, H> {
self.inner.iter()
}
pub fn insert(&mut self, handle: H) -> bool {
self.inner.insert(handle)
}
pub fn touch(&mut self, handle: &H) -> Option<u64> {
self.inner.touch(handle)
}
pub fn touch_capped(&mut self, handle: &H, max_freq: u64) -> Option<u64> {
self.inner.touch_capped(handle, max_freq)
}
pub fn remove(&mut self, handle: &H) -> Option<u64> {
self.inner.remove(handle)
}
pub fn pop_min(&mut self) -> Option<(H, u64)> {
self.inner.pop_min()
}
pub fn decay_halve(&mut self) {
self.inner.decay_halve();
}
pub fn rebase_min_freq(&mut self) {
self.inner.rebase_min_freq();
}
pub fn clear(&mut self) {
self.inner.clear();
}
pub fn clear_shrink(&mut self) {
self.inner.clear_shrink();
}
pub fn into_inner(self) -> FrequencyBuckets<H> {
self.inner
}
pub fn approx_bytes(&self) -> usize {
self.inner.approx_bytes()
}
#[cfg(any(test, debug_assertions))]
pub fn debug_snapshot(&self) -> FrequencyBucketsSnapshot<H> {
self.inner.debug_snapshot()
}
#[cfg(any(test, debug_assertions))]
pub fn debug_validate_invariants(&self) {
self.inner.debug_validate_invariants();
}
}
impl<H> Default for FrequencyBucketsHandle<H>
where
H: Eq + Hash + Copy,
{
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct BucketIds<'a, K> {
buckets: &'a FrequencyBuckets<K>,
current: Option<SlotId>,
}
impl<'a, K> Iterator for BucketIds<'a, K> {
type Item = SlotId;
fn next(&mut self) -> Option<Self::Item> {
let id = self.current?;
let entry = self.buckets.entries.get(id)?;
self.current = entry.next;
Some(id)
}
}
#[derive(Debug, Clone)]
pub struct BucketEntries<'a, K> {
buckets: &'a FrequencyBuckets<K>,
current: Option<SlotId>,
}
impl<'a, K> Iterator for BucketEntries<'a, K> {
type Item = (SlotId, FrequencyBucketEntryMeta<'a, K>);
fn next(&mut self) -> Option<Self::Item> {
let id = self.current?;
let entry = self.buckets.entries.get(id)?;
self.current = entry.next;
Some((
id,
FrequencyBucketEntryMeta {
key: &entry.key,
freq: entry.freq,
last_epoch: entry.last_epoch,
},
))
}
}
#[derive(Debug, Clone)]
pub struct Iter<'a, K> {
inner: crate::ds::slot_arena::Iter<'a, Entry<K>>,
}
impl<'a, K: 'a> Iterator for Iter<'a, K> {
type Item = (SlotId, FrequencyBucketEntryMeta<'a, K>);
fn next(&mut self) -> Option<Self::Item> {
let (id, entry) = self.inner.next()?;
Some((
id,
FrequencyBucketEntryMeta {
key: &entry.key,
freq: entry.freq,
last_epoch: entry.last_epoch,
},
))
}
}
impl<K> Default for FrequencyBuckets<K>
where
K: Eq + Hash + Clone,
{
fn default() -> Self {
Self::new()
}
}
impl<'a, K> IntoIterator for &'a FrequencyBuckets<K>
where
K: Eq + Hash + Clone,
{
type Item = (SlotId, FrequencyBucketEntryMeta<'a, K>);
type IntoIter = Iter<'a, K>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn frequency_buckets_basic_flow() {
let mut buckets = FrequencyBuckets::new();
assert!(buckets.insert("a"));
assert!(buckets.insert("b"));
assert_eq!(buckets.frequency(&"a"), Some(1));
assert_eq!(buckets.min_freq(), Some(1));
assert_eq!(buckets.touch(&"a"), Some(2));
assert_eq!(buckets.frequency(&"a"), Some(2));
assert_eq!(buckets.min_freq(), Some(1));
let popped = buckets.pop_min();
assert_eq!(popped, Some(("b", 1)));
assert_eq!(buckets.min_freq(), Some(2));
}
#[test]
fn frequency_buckets_duplicate_insert_is_noop() {
let mut buckets = FrequencyBuckets::new();
assert!(buckets.insert("a"));
assert!(!buckets.insert("a"));
assert_eq!(buckets.len(), 1);
assert_eq!(buckets.frequency(&"a"), Some(1));
}
#[test]
fn frequency_buckets_touch_missing_returns_none() {
let mut buckets: FrequencyBuckets<&str> = FrequencyBuckets::new();
assert_eq!(buckets.touch(&"missing"), None);
assert_eq!(buckets.min_freq(), None);
assert!(buckets.is_empty());
}
#[test]
fn frequency_buckets_remove_updates_min_freq() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
buckets.touch(&"b");
assert_eq!(buckets.min_freq(), Some(1));
assert_eq!(buckets.remove(&"a"), Some(1));
assert_eq!(buckets.min_freq(), Some(2));
assert!(!buckets.contains(&"a"));
assert!(buckets.contains(&"b"));
}
#[test]
fn frequency_buckets_pop_min_on_empty() {
let mut buckets: FrequencyBuckets<&str> = FrequencyBuckets::new();
assert_eq!(buckets.pop_min(), None);
assert_eq!(buckets.peek_min(), None);
assert_eq!(buckets.min_freq(), None);
}
#[test]
fn frequency_buckets_peek_min_does_not_remove() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
let peeked = buckets.peek_min();
assert!(matches!(peeked, Some((&"a", 1)) | Some((&"b", 1))));
assert_eq!(buckets.len(), 2);
assert!(buckets.contains(&"a"));
assert!(buckets.contains(&"b"));
}
#[test]
fn frequency_buckets_fifo_within_same_frequency() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
buckets.insert("c");
let first = buckets.pop_min();
assert_eq!(first, Some(("a", 1)));
let second = buckets.pop_min();
assert_eq!(second, Some(("b", 1)));
let third = buckets.pop_min();
assert_eq!(third, Some(("c", 1)));
assert!(buckets.is_empty());
}
#[test]
fn frequency_buckets_fifo_regression_fuzz_crash() {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
let keys = vec![92, 13, 92, 83];
let mut expected_order = Vec::new();
for &key in &keys {
if buckets.insert(key) {
expected_order.push(key);
}
}
assert_eq!(expected_order, vec![92, 13, 83]);
for expected_key in expected_order {
if let Some((popped_key, freq)) = buckets.pop_min() {
assert_eq!(
popped_key, expected_key,
"Expected {} but got {}",
expected_key, popped_key
);
assert_eq!(freq, 1);
}
}
assert!(buckets.is_empty());
}
#[test]
fn frequency_buckets_min_freq_tracks_next_bucket() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
buckets.insert("c");
buckets.touch(&"a");
buckets.touch(&"a");
assert_eq!(buckets.frequency(&"a"), Some(3));
assert_eq!(buckets.min_freq(), Some(1));
buckets.pop_min();
buckets.pop_min();
assert_eq!(buckets.min_freq(), Some(3));
assert_eq!(buckets.peek_min(), Some((&"a", 3)));
}
#[test]
fn frequency_buckets_clear_resets_state() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
buckets.touch(&"a");
buckets.clear();
assert!(buckets.is_empty());
assert_eq!(buckets.min_freq(), None);
assert_eq!(buckets.pop_min(), None);
assert_eq!(buckets.peek_min(), None);
}
#[test]
fn frequency_buckets_debug_invariants_hold() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
buckets.touch(&"a");
buckets.touch(&"a");
buckets.remove(&"b");
buckets.debug_validate_invariants();
}
#[test]
fn frequency_buckets_peek_min_id_and_key() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
let id = buckets.peek_min_id().unwrap();
let key = buckets.peek_min_key().unwrap();
assert!(matches!(key, &"a" | &"b"));
assert_eq!(buckets.frequency(key), Some(1));
let entry = buckets.entries.get(id).unwrap();
assert_eq!(&entry.key, key);
}
#[test]
fn frequency_buckets_iter_bucket_ids_order() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
buckets.insert("c");
let ids: Vec<_> = buckets.iter_bucket_ids(1).collect();
assert_eq!(ids.len(), 3);
let keys: Vec<_> = ids
.iter()
.map(|id| buckets.entries.get(*id).unwrap().key)
.collect();
assert_eq!(keys, vec!["c", "b", "a"]);
}
#[test]
fn frequency_buckets_iter_bucket_entries_meta() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
buckets.touch(&"a");
let entries: Vec<_> = buckets.iter_bucket_entries(1).collect();
assert_eq!(entries.len(), 1);
let (id, meta) = entries[0];
assert_eq!(meta.key, &"b");
assert_eq!(meta.freq, 1);
assert_eq!(meta.last_epoch, 0);
assert_eq!(buckets.entries.get(id).unwrap().freq, 1);
}
#[test]
fn frequency_buckets_iter_entries_meta() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.touch(&"a");
buckets.insert("b");
let mut entries: Vec<_> = buckets.iter().collect();
entries.sort_by_key(|(id, _)| id.index());
assert_eq!(entries.len(), 2);
let metas: Vec<_> = entries
.into_iter()
.map(|(_, meta)| (meta.key, meta.freq))
.collect();
assert!(metas.contains(&(&"a", 2)));
assert!(metas.contains(&(&"b", 1)));
}
#[test]
fn frequency_buckets_debug_snapshot() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
buckets.touch(&"a");
let snapshot = buckets.debug_snapshot();
assert_eq!(snapshot.min_freq, Some(1));
assert_eq!(snapshot.entries_len, 2);
assert_eq!(snapshot.index_len, 2);
assert_eq!(snapshot.buckets.len(), 2);
assert_eq!(snapshot.epoch, 0);
assert_eq!(snapshot.entry_epochs.len(), 2);
assert!(snapshot.entry_epochs.iter().all(|(_, epoch)| *epoch == 0));
assert_eq!(snapshot.bucket_entries.len(), 2);
let first_bucket = &snapshot.bucket_entries[0].1;
assert_eq!(first_bucket.len(), 1);
assert_eq!(first_bucket[0].freq, 1);
assert!(matches!(first_bucket[0].key, "a" | "b"));
}
#[test]
fn frequency_buckets_borrowed_key_lookups() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("alpha".to_string());
buckets.touch("alpha");
assert!(buckets.contains("alpha"));
assert_eq!(buckets.frequency("alpha"), Some(2));
}
#[test]
fn frequency_buckets_batch_ops() {
let mut buckets = FrequencyBuckets::new();
assert_eq!(buckets.insert_batch(["a", "b", "c"]), 3);
assert_eq!(buckets.touch_batch(["a", "b", "z"]), 2);
assert_eq!(buckets.remove_batch(["b", "c", "z"]), 2);
assert_eq!(buckets.len(), 1);
}
#[test]
fn frequency_buckets_touch_capped() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
assert_eq!(buckets.touch_capped(&"a", 2), Some(2));
assert_eq!(buckets.touch_capped(&"a", 2), Some(2));
assert_eq!(buckets.frequency(&"a"), Some(2));
}
#[test]
fn frequency_buckets_decay_halve() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
buckets.touch(&"a");
buckets.touch(&"a");
assert_eq!(buckets.frequency(&"a"), Some(3));
buckets.decay_halve();
assert_eq!(buckets.frequency(&"a"), Some(1));
assert_eq!(buckets.min_freq(), Some(1));
}
#[test]
fn frequency_buckets_rebase_min_freq() {
let mut buckets = FrequencyBuckets::new();
buckets.insert("a");
buckets.insert("b");
buckets.touch(&"a");
buckets.touch(&"a");
assert_eq!(buckets.frequency(&"a"), Some(3));
buckets.remove(&"b");
assert_eq!(buckets.min_freq(), Some(3));
buckets.rebase_min_freq();
assert_eq!(buckets.frequency(&"a"), Some(1));
assert_eq!(buckets.min_freq(), Some(1));
}
#[cfg(feature = "concurrency")]
#[test]
fn sharded_frequency_buckets_basic_ops() {
let buckets = ShardedFrequencyBuckets::new(2);
assert!(buckets.insert("a"));
assert!(buckets.insert("b"));
assert!(buckets.contains(&"a"));
assert_eq!(buckets.touch(&"a"), Some(2));
assert_eq!(buckets.frequency(&"a"), Some(2));
let peeked = buckets.peek_min();
assert!(matches!(peeked, Some(("a", 2)) | Some(("b", 1))));
let popped = buckets.pop_min();
assert!(matches!(popped, Some(("a", 2)) | Some(("b", 1))));
assert_eq!(buckets.len(), 1);
}
#[cfg(feature = "concurrency")]
#[test]
fn sharded_frequency_buckets_shard_for_key() {
let buckets: ShardedFrequencyBuckets<&str> = ShardedFrequencyBuckets::new(4);
let shard = buckets.shard_for_key(&"alpha");
assert!(shard < buckets.shard_count());
}
#[cfg(feature = "concurrency")]
#[test]
fn sharded_frequency_buckets_with_seed() {
let buckets: ShardedFrequencyBuckets<&str> =
ShardedFrequencyBuckets::with_shards_seed(4, 0, 99);
let shard = buckets.shard_for_key(&"alpha");
assert!(shard < buckets.shard_count());
}
#[cfg(feature = "concurrency")]
#[test]
fn sharded_frequency_buckets_collect_entries() {
let buckets = ShardedFrequencyBuckets::new(2);
assert!(buckets.insert("a"));
assert!(buckets.insert("b"));
assert_eq!(buckets.touch(&"a"), Some(2));
let mut entries = buckets.collect_entries();
entries.sort_by_key(|(_, meta)| meta.key);
let metas: Vec<_> = entries
.into_iter()
.map(|(_, meta)| (meta.key, meta.freq, meta.last_epoch))
.collect();
assert!(metas.contains(&("a", 2, 0)));
assert!(metas.contains(&("b", 1, 0)));
let mut bucket_entries = buckets.collect_bucket_entries(1);
bucket_entries.sort_by_key(|(_, meta)| meta.key);
assert_eq!(bucket_entries.len(), 1);
assert_eq!(bucket_entries[0].1.key, "b");
let mut shard_entries = buckets.collect_entries_with_shard();
shard_entries.sort_by_key(|(_, _, meta)| meta.key);
assert_eq!(shard_entries.len(), 2);
assert!(
shard_entries
.iter()
.all(|(idx, _, _)| *idx < buckets.shard_count())
);
let mut shard_bucket_entries = buckets.collect_bucket_entries_with_shard(1);
shard_bucket_entries.sort_by_key(|(_, _, meta)| meta.key);
assert_eq!(shard_bucket_entries.len(), 1);
assert_eq!(shard_bucket_entries[0].2.key, "b");
}
#[test]
fn frequency_buckets_handle_basic_ops() {
let mut buckets = FrequencyBucketsHandle::new();
assert!(buckets.insert(1u64));
assert!(buckets.insert(2u64));
assert!(buckets.contains(&1));
assert_eq!(buckets.touch(&1), Some(2));
assert_eq!(buckets.frequency(&1), Some(2));
let peeked = buckets.peek_min();
assert!(matches!(peeked, Some((1, 2)) | Some((2, 1))));
let popped = buckets.pop_min();
assert!(matches!(popped, Some((1, 2)) | Some((2, 1))));
assert_eq!(buckets.len(), 1);
}
}
#[cfg(test)]
mod property_tests {
use super::*;
use proptest::prelude::*;
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_invariants_always_hold(
ops in prop::collection::vec((0u8..4, any::<u32>()), 0..100)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
for (op, key) in ops {
match op % 4 {
0 => { buckets.insert(key); }
1 => { buckets.touch(&key); }
2 => { buckets.remove(&key); }
3 => { buckets.pop_min(); }
_ => unreachable!(),
}
buckets.debug_validate_invariants();
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_len_is_accurate(
ops in prop::collection::vec((0u8..3, 0u32..20), 0..50)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
let mut expected_keys = std::collections::HashSet::new();
for (op, key) in ops {
match op % 3 {
0 => {
buckets.insert(key);
expected_keys.insert(key);
}
1 => {
buckets.remove(&key);
expected_keys.remove(&key);
}
2 => {
if let Some((evicted_key, _)) = buckets.pop_min() {
expected_keys.remove(&evicted_key);
}
}
_ => unreachable!(),
}
prop_assert_eq!(buckets.len(), expected_keys.len());
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_empty_state_consistent(
keys in prop::collection::vec(any::<u32>(), 0..20)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
for key in keys {
buckets.insert(key);
}
while !buckets.is_empty() {
buckets.pop_min();
}
prop_assert!(buckets.is_empty());
prop_assert_eq!(buckets.len(), 0);
prop_assert_eq!(buckets.min_freq(), None);
prop_assert_eq!(buckets.peek_min(), None);
prop_assert_eq!(buckets.pop_min(), None);
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_frequency_starts_at_one_and_increments(
key in any::<u32>(),
touch_count in 0usize..20
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
buckets.insert(key);
prop_assert_eq!(buckets.frequency(&key), Some(1));
for i in 0..touch_count {
let new_freq = buckets.touch(&key).unwrap();
prop_assert_eq!(new_freq, 2 + i as u64);
prop_assert_eq!(buckets.frequency(&key), Some(2 + i as u64));
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_touch_missing_returns_none(
present_key in any::<u32>(),
missing_key in any::<u32>()
) {
prop_assume!(present_key != missing_key);
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
buckets.insert(present_key);
prop_assert_eq!(buckets.touch(&missing_key), None);
prop_assert!(!buckets.contains(&missing_key));
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_touch_capped_respects_max(
key in any::<u32>(),
max_freq in 2u64..10,
touch_count in 0usize..20
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
buckets.insert(key);
for _ in 0..touch_count {
buckets.touch_capped(&key, max_freq);
}
let final_freq = buckets.frequency(&key).unwrap();
prop_assert!(final_freq <= max_freq);
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_min_freq_is_accurate(
ops in prop::collection::vec((0u8..2, 0u32..10), 1..30)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
for (op, key) in ops {
match op % 2 {
0 => { buckets.insert(key); }
1 => { buckets.touch(&key); }
_ => unreachable!(),
}
let mut actual_min: Option<u64> = None;
for (_, meta) in buckets.iter() {
actual_min = match actual_min {
None => Some(meta.freq),
Some(min) => Some(min.min(meta.freq)),
};
}
prop_assert_eq!(buckets.min_freq(), actual_min);
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_min_freq_updates_after_removal(
keys in prop::collection::vec(0u32..10, 2..10)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
let mut unique_keys = Vec::new();
let mut seen = std::collections::HashSet::new();
for key in keys {
if seen.insert(key) {
unique_keys.push(key);
}
}
for &key in &unique_keys {
buckets.insert(key);
}
if let Some(&first_key) = unique_keys.first() {
for _ in 0..5 {
buckets.touch(&first_key);
}
}
let expected_min = if unique_keys.len() > 1 { Some(1) } else { Some(6) };
prop_assert_eq!(buckets.min_freq(), expected_min);
for &key in &unique_keys {
if buckets.frequency(&key) == Some(1) {
buckets.remove(&key);
}
}
if !buckets.is_empty() {
prop_assert!(buckets.min_freq().unwrap() > 1);
}
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_fifo_order_same_frequency(
keys in prop::collection::vec(0u32..50, 2..20)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
let mut unique_keys = Vec::new();
let mut seen = std::collections::HashSet::new();
for key in keys {
if seen.insert(key) {
unique_keys.push(key);
}
}
for &key in &unique_keys {
buckets.insert(key);
}
for expected_key in unique_keys {
let (popped_key, freq) = buckets.pop_min().unwrap();
prop_assert_eq!(popped_key, expected_key);
prop_assert_eq!(freq, 1);
}
prop_assert!(buckets.is_empty());
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_peek_pop_consistency(
keys in prop::collection::vec(any::<u32>(), 1..20),
touch_ops in prop::collection::vec((any::<u32>(), 0usize..5), 0..20)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
for key in keys {
buckets.insert(key);
}
for (key, count) in touch_ops {
for _ in 0..count {
buckets.touch(&key);
}
}
while !buckets.is_empty() {
let peeked = buckets.peek_min().map(|(k, f)| (*k, f));
let popped = buckets.pop_min();
prop_assert!(peeked.is_some());
prop_assert!(popped.is_some());
let (peek_key, peek_freq) = peeked.unwrap();
let (pop_key, pop_freq) = popped.unwrap();
prop_assert_eq!(peek_key, pop_key);
prop_assert_eq!(peek_freq, pop_freq);
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_pop_min_returns_lowest_frequency(
keys in prop::collection::vec(0u32..10, 2..10)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
for &key in &keys {
buckets.insert(key);
}
if let Some(&first_key) = keys.first() {
for _ in 0..3 {
buckets.touch(&first_key);
}
}
if let Some((_, popped_freq)) = buckets.pop_min() {
for (_, meta) in buckets.iter() {
prop_assert!(meta.freq >= popped_freq);
}
}
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_remove_returns_freq_and_removes(
key in any::<u32>(),
touch_count in 0usize..10
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
buckets.insert(key);
for _ in 0..touch_count {
buckets.touch(&key);
}
let expected_freq = 1 + touch_count as u64;
let removed_freq = buckets.remove(&key).unwrap();
prop_assert_eq!(removed_freq, expected_freq);
prop_assert!(!buckets.contains(&key));
prop_assert_eq!(buckets.frequency(&key), None);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_remove_missing_returns_none(
present_key in any::<u32>(),
missing_key in any::<u32>()
) {
prop_assume!(present_key != missing_key);
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
buckets.insert(present_key);
prop_assert_eq!(buckets.remove(&missing_key), None);
prop_assert!(!buckets.contains(&missing_key));
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_clear_resets_state(
keys in prop::collection::vec(any::<u32>(), 1..30)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
for key in keys {
buckets.insert(key);
buckets.touch(&key);
}
buckets.clear();
prop_assert!(buckets.is_empty());
prop_assert_eq!(buckets.len(), 0);
prop_assert_eq!(buckets.min_freq(), None);
prop_assert_eq!(buckets.peek_min(), None);
prop_assert_eq!(buckets.pop_min(), None);
buckets.debug_validate_invariants();
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_clear_shrink_same_as_clear(
keys in prop::collection::vec(any::<u32>(), 1..30)
) {
let mut buckets1: FrequencyBuckets<u32> = FrequencyBuckets::new();
let mut buckets2: FrequencyBuckets<u32> = FrequencyBuckets::new();
for &key in &keys {
buckets1.insert(key);
buckets2.insert(key);
}
buckets1.clear();
buckets2.clear_shrink();
prop_assert_eq!(buckets1.len(), buckets2.len());
prop_assert_eq!(buckets1.is_empty(), buckets2.is_empty());
prop_assert_eq!(buckets1.min_freq(), buckets2.min_freq());
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_usable_after_clear(
keys1 in prop::collection::vec(any::<u32>(), 1..20),
keys2 in prop::collection::vec(any::<u32>(), 1..20)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
for key in keys1 {
buckets.insert(key);
}
buckets.clear();
for key in &keys2 {
buckets.insert(*key);
}
let unique_keys2: std::collections::HashSet<_> = keys2.into_iter().collect();
prop_assert_eq!(buckets.len(), unique_keys2.len());
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_decay_halve_reduces_frequencies(
keys in prop::collection::vec(0u32..10, 1..10)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
for &key in &keys {
buckets.insert(key);
for _ in 0..5 {
buckets.touch(&key);
}
}
let mut before: std::collections::HashMap<u32, u64> = std::collections::HashMap::new();
for (_, meta) in buckets.iter() {
before.insert(*meta.key, meta.freq);
}
buckets.decay_halve();
for (key, old_freq) in before {
let new_freq = buckets.frequency(&key).unwrap();
let expected = (old_freq / 2).max(1);
prop_assert_eq!(new_freq, expected);
}
buckets.debug_validate_invariants();
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_rebase_min_freq_shifts_correctly(
keys in prop::collection::vec(0u32..5, 2..8)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
for (i, &key) in keys.iter().enumerate() {
buckets.insert(key);
for _ in 0..i {
buckets.touch(&key);
}
}
let min_before = buckets.min_freq();
if min_before.unwrap_or(0) <= 1 {
return Ok(());
}
let delta = min_before.unwrap() - 1;
let mut before: std::collections::HashMap<u32, u64> = std::collections::HashMap::new();
for (_, meta) in buckets.iter() {
before.insert(*meta.key, meta.freq);
}
buckets.rebase_min_freq();
for (key, old_freq) in before {
let new_freq = buckets.frequency(&key).unwrap();
let expected = old_freq.saturating_sub(delta).max(1);
prop_assert_eq!(new_freq, expected);
}
prop_assert_eq!(buckets.min_freq(), Some(1));
buckets.debug_validate_invariants();
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_insert_batch_unique_count(
keys in prop::collection::vec(any::<u32>(), 0..30)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
let unique_keys: std::collections::HashSet<_> = keys.iter().copied().collect();
let inserted = buckets.insert_batch(keys);
prop_assert_eq!(inserted, unique_keys.len());
prop_assert_eq!(buckets.len(), unique_keys.len());
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_touch_batch_only_present(
present_keys in prop::collection::vec(0u32..10, 1..10),
touch_keys in prop::collection::vec(0u32..20, 0..10)
) {
let mut buckets: FrequencyBuckets<u32> = FrequencyBuckets::new();
buckets.insert_batch(present_keys.clone());
let touched = buckets.touch_batch(touch_keys.clone());
let present_set: std::collections::HashSet<_> = present_keys.into_iter().collect();
let intersection_count = touch_keys
.iter()
.filter(|key| present_set.contains(key))
.count();
prop_assert_eq!(touched, intersection_count);
}
}
}