breve 0.1.0

In-memory cache implementation with Uno as the admission policy and S3-FIFO as the eviction policy
Documentation
// Copyright 2025 Chojan Shang.
// Copyright 2024 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use ahash::RandomState;
use std::hash::Hash;
use std::sync::atomic::{AtomicU8, Ordering};

pub struct Estimator {
    estimator: Box<[(Box<[AtomicU8]>, RandomState)]>,
}

impl Estimator {
    pub fn optimal_paras(items: usize) -> (usize, usize) {
        use std::cmp::max;
        // derived from https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch
        // width = ceil(e / ε)
        // depth = ceil(ln(1 − δ) / ln(1 / 2))
        let error_range = 1.0 / (items as f64);
        let failure_probability = 1.0 / (items as f64);
        (
            max((std::f64::consts::E / error_range).ceil() as usize, 16),
            max((failure_probability.ln() / 0.5f64.ln()).ceil() as usize, 2),
        )
    }

    pub fn optimal(items: usize) -> Self {
        let (slots, hashes) = Self::optimal_paras(items);
        Self::new(hashes, slots)
    }

    pub fn compact(items: usize) -> Self {
        let (slots, hashes) = Self::optimal_paras(items / 100);
        Self::new(hashes, slots)
    }

    /// Create a new `Estimator` with the given amount of hashes and columns (slots).
    pub fn new(hashes: usize, slots: usize) -> Self {
        let mut estimator = Vec::with_capacity(hashes);
        for _ in 0..hashes {
            let mut slot = Vec::with_capacity(slots);
            for _ in 0..slots {
                slot.push(AtomicU8::new(0));
            }
            estimator.push((slot.into_boxed_slice(), RandomState::new()));
        }

        Estimator {
            estimator: estimator.into_boxed_slice(),
        }
    }

    pub fn incr<T: Hash>(&self, key: T) -> u8 {
        let mut min = u8::MAX;
        for (slot, hasher) in self.estimator.iter() {
            let hash = hasher.hash_one(&key) as usize;
            let counter = &slot[hash % slot.len()];
            let (_current, new) = incr_no_overflow(counter);
            min = std::cmp::min(min, new);
        }
        min
    }

    /// Get the estimated frequency of `key`.
    pub fn get<T: Hash>(&self, key: T) -> u8 {
        let mut min = u8::MAX;
        for (slot, hasher) in self.estimator.iter() {
            let hash = hasher.hash_one(&key) as usize;
            let counter = &slot[hash % slot.len()];
            let current = counter.load(Ordering::Relaxed);
            min = std::cmp::min(min, current);
        }
        min
    }

    pub fn exponential_decay(&self, decay_factor: f32) {
        for (slot, _) in self.estimator.iter() {
            for counter in slot.iter() {
                // we don't CAS because the only update between the load and store
                // is fetch_add(1), which should be fine to miss/ignore
                let c = counter.load(Ordering::Relaxed);
                let new_value = (c as f32 * decay_factor).round() as u8;
                counter.store(new_value, Ordering::Relaxed);
                // counter.store(c >> shift, Ordering::Relaxed);
            }
        }
    }

    /// right shift all values inside this `Estimator`.
    pub fn age(&self, shift: u8) {
        for (slot, _) in self.estimator.iter() {
            for counter in slot.iter() {
                // we don't CAS because the only update between the load and store
                // is fetch_add(1), which should be fine to miss/ignore
                let c = counter.load(Ordering::Relaxed);
                // let new_value = if c >> shift >= 1 { 1 } else { 0 };
                // counter.store(new_value, Ordering::Relaxed);
                counter.store(c >> shift, Ordering::Relaxed);
            }
        }
    }
}

fn incr_no_overflow(var: &AtomicU8) -> (u8, u8) {
    loop {
        let current = var.load(Ordering::Relaxed);
        if current == u8::MAX {
            return (current, current);
        }
        let new = if current == u8::MAX - 1 {
            u8::MAX
        } else {
            current + 1
        };
        if let Err(new) = var.compare_exchange(current, new, Ordering::Acquire, Ordering::Relaxed) {
            // someone else beat us to it
            if new == u8::MAX {
                // already max
                return (current, new);
            } // else, try again
        } else {
            return (current, new);
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_cmk_paras() {
        let (slots, hashes) = Estimator::optimal_paras(1_000_000);
        // just smoke check some standard input
        assert_eq!(slots, 2718282);
        assert_eq!(hashes, 20);
    }
}