cache-rs 0.4.0 - Docs.rs

//! Concurrent LFU Cache Implementation
//!
//! A thread-safe LFU cache using lock striping (segmented storage) for high-performance
//! concurrent access. This is the multi-threaded counterpart to [`LfuCache`](crate::LfuCache).
//!
//! # How It Works
//!
//! The cache partitions keys across multiple independent segments, each with its own lock.
//! This allows concurrent operations on different segments without contention.
//!
//! ```text
//! ┌──────────────────────────────────────────────────────────────────────┐
//! │                      ConcurrentLfuCache                              │
//! │                                                                      │
//! │  hash(key) % N  ──▶  Segment Selection                               │
//! │                                                                      │
//! │  ┌──────────────┐ ┌──────────────┐     ┌──────────────┐              │
//! │  │  Segment 0   │ │  Segment 1   │ ... │  Segment N-1 │              │
//! │  │  ┌────────┐  │ │  ┌────────┐  │     │  ┌────────┐  │              │
//! │  │  │ Mutex  │  │ │  │ Mutex  │  │     │  │ Mutex  │  │              │
//! │  │  └────┬───┘  │ │  └────┬───┘  │     │  └────┬───┘  │              │
//! │  │       │      │ │       │      │     │       │      │              │
//! │  │  ┌────▼───┐  │ │  ┌────▼───┐  │     │  ┌────▼───┐  │              │
//! │  │  │LfuCache│  │ │  │LfuCache│  │     │  │LfuCache│  │              │
//! │  │  └────────┘  │ │  └────────┘  │     │  └────────┘  │              │
//! │  └──────────────┘ └──────────────┘     └──────────────┘              │
//! └──────────────────────────────────────────────────────────────────────┘
//! ```
//!
//! ## Segment Count
//!
//! The default segment count is based on available CPU cores (typically 16).
//! More segments = less contention but more memory overhead.
//!
//! ## Trade-offs
//!
//! - **Pros**: Near-linear scaling with thread count, excellent scan resistance
//! - **Cons**: LFU frequency tracking is per-segment, not global. An item accessed
//!   in segment A doesn't affect frequency tracking in segment B.
//!
//! # Performance Characteristics
//!
//! | Metric | Value |
//! |--------|-------|
//! | Get/Put/Remove | O(log F) per segment, effectively O(1) |
//! | Concurrency | Near-linear scaling up to segment count |
//! | Memory overhead | ~150 bytes per entry + one Mutex per segment |
//! | Scan resistance | Excellent (frequency-based eviction) |
//!
//! Where F = distinct frequency values per segment. Since frequencies are small
//! integers, F is bounded and operations are effectively O(1).
//!
//! # When to Use
//!
//! **Use ConcurrentLfuCache when:**
//! - Multiple threads need cache access
//! - Access patterns have stable popularity (some keys consistently more popular)
//! - You need excellent scan resistance
//! - Frequency is more important than recency
//!
//! **Consider alternatives when:**
//! - Single-threaded access only → use `LfuCache`
//! - Need global frequency tracking → use `Mutex<LfuCache>`
//! - Popularity changes over time → use `ConcurrentLfudaCache`
//! - Recency-based access → use `ConcurrentLruCache`
//!
//! # Thread Safety
//!
//! `ConcurrentLfuCache` is `Send + Sync` and can be shared via `Arc`.
//!
//! # Example
//!
//! ```rust,ignore
//! use cache_rs::concurrent::ConcurrentLfuCache;
//! use cache_rs::config::ConcurrentLfuCacheConfig;
//! use std::num::NonZeroUsize;
//! use std::sync::Arc;
//! use std::thread;
//!
//! let config = ConcurrentLfuCacheConfig::new(NonZeroUsize::new(10_000).unwrap());
//! let cache = Arc::new(ConcurrentLfuCache::from_config(config));
//!
//! let handles: Vec<_> = (0..4).map(|i| {
//!     let cache = Arc::clone(&cache);
//!     thread::spawn(move || {
//!         for j in 0..1000 {
//!             let key = format!("key-{}-{}", i, j);
//!             cache.put(key.clone(), j, 1);
//!             // Access popular keys more frequently
//!             if j % 10 == 0 {
//!                 for _ in 0..5 {
//!                     let _ = cache.get(&key);
//!                 }
//!             }
//!         }
//!     })
//! }).collect();
//!
//! for h in handles {
//!     h.join().unwrap();
//! }
//!
//! println!("Total entries: {}", cache.len());
//! ```

extern crate alloc;

use crate::lfu::LfuSegment;
use crate::metrics::CacheMetrics;
use alloc::boxed::Box;
use alloc::collections::BTreeMap;
use alloc::string::String;
use alloc::vec::Vec;
use core::borrow::Borrow;
use core::hash::{BuildHasher, Hash};
use core::num::NonZeroUsize;
use parking_lot::Mutex;

#[cfg(feature = "hashbrown")]
use hashbrown::DefaultHashBuilder;

#[cfg(not(feature = "hashbrown"))]
use std::collections::hash_map::RandomState as DefaultHashBuilder;

/// A thread-safe LFU cache with segmented storage for high concurrency.
pub struct ConcurrentLfuCache<K, V, S = DefaultHashBuilder> {
    segments: Box<[Mutex<LfuSegment<K, V, S>>]>,
    hash_builder: S,
}

impl<K, V> ConcurrentLfuCache<K, V, DefaultHashBuilder>
where
    K: Hash + Eq + Clone + Send,
    V: Clone + Send,
{
    /// Creates a new concurrent LFU cache from a configuration.
    ///
    /// This is the **recommended** way to create a concurrent LFU cache.
    ///
    /// # Arguments
    /// * `config` - The cache configuration
    /// * `hasher` - Optional custom hash builder. If `None`, uses the default.
    pub fn init(
        config: crate::config::ConcurrentLfuCacheConfig,
        hasher: Option<DefaultHashBuilder>,
    ) -> Self {
        let segment_count = config.segments;
        let capacity = config.base.capacity;
        let max_size = config.base.max_size;

        let segment_capacity = capacity.get() / segment_count;
        let segment_cap = NonZeroUsize::new(segment_capacity.max(1)).unwrap();
        let segment_max_size = max_size / segment_count as u64;

        let hash_builder = hasher.unwrap_or_default();

        let segments: Vec<_> = (0..segment_count)
            .map(|_| {
                let segment_config = crate::config::LfuCacheConfig {
                    capacity: segment_cap,
                    max_size: segment_max_size,
                };
                Mutex::new(LfuSegment::init(segment_config, hash_builder.clone()))
            })
            .collect();

        Self {
            segments: segments.into_boxed_slice(),
            hash_builder,
        }
    }
}

impl<K, V, S> ConcurrentLfuCache<K, V, S>
where
    K: Hash + Eq + Clone + Send,
    V: Clone + Send,
    S: BuildHasher + Clone + Send,
{
    #[inline]
    fn segment_index<Q>(&self, key: &Q) -> usize
    where
        K: Borrow<Q>,
        Q: ?Sized + Hash,
    {
        (self.hash_builder.hash_one(key) as usize) % self.segments.len()
    }

    /// Returns the total capacity across all segments.
    pub fn capacity(&self) -> usize {
        self.segments.iter().map(|s| s.lock().cap().get()).sum()
    }

    /// Returns the number of segments in the cache.
    pub fn segment_count(&self) -> usize {
        self.segments.len()
    }

    /// Returns the total number of entries across all segments.
    pub fn len(&self) -> usize {
        self.segments.iter().map(|s| s.lock().len()).sum()
    }

    /// Returns `true` if the cache contains no entries.
    pub fn is_empty(&self) -> bool {
        self.segments.iter().all(|s| s.lock().is_empty())
    }

    /// Gets a value from the cache.
    ///
    /// This clones the value to avoid holding the lock. For zero-copy access,
    /// use `get_with()` instead.
    pub fn get<Q>(&self, key: &Q) -> Option<V>
    where
        K: Borrow<Q>,
        Q: ?Sized + Hash + Eq,
    {
        let idx = self.segment_index(key);
        let mut segment = self.segments[idx].lock();
        segment.get(key).cloned()
    }

    /// Gets a value and applies a function to it while holding the lock.
    ///
    /// This is more efficient than `get()` when you only need to read from the value,
    /// as it avoids cloning.
    pub fn get_with<Q, F, R>(&self, key: &Q, f: F) -> Option<R>
    where
        K: Borrow<Q>,
        Q: ?Sized + Hash + Eq,
        F: FnOnce(&V) -> R,
    {
        let idx = self.segment_index(key);
        let mut segment = self.segments[idx].lock();
        segment.get(key).map(f)
    }

    /// Inserts a key-value pair into the cache with optional size tracking.
    ///
    /// If the cache is at capacity, the least frequently used entry is evicted.
    /// Use `SIZE_UNIT` (1) for count-based caching.
    pub fn put(&self, key: K, value: V, size: u64) -> Option<Vec<(K, V)>> {
        let idx = self.segment_index(&key);
        let mut segment = self.segments[idx].lock();
        segment.put(key, value, size)
    }

    /// Removes a key from the cache, returning the value if it existed.
    pub fn remove<Q>(&self, key: &Q) -> Option<V>
    where
        K: Borrow<Q>,
        Q: ?Sized + Hash + Eq,
    {
        let idx = self.segment_index(key);
        let mut segment = self.segments[idx].lock();
        segment.remove(key)
    }

    /// Clears all entries from the cache.
    pub fn clear(&self) {
        for segment in self.segments.iter() {
            segment.lock().clear();
        }
    }

    /// Returns the current total size of cached content across all segments.
    pub fn current_size(&self) -> u64 {
        self.segments.iter().map(|s| s.lock().current_size()).sum()
    }

    /// Returns the maximum content size the cache can hold across all segments.
    pub fn max_size(&self) -> u64 {
        self.segments.iter().map(|s| s.lock().max_size()).sum()
    }

    /// Checks if the cache contains a key without updating frequency.
    ///
    /// This is a pure existence check that does **not** update the entry's frequency.
    ///
    /// # Example
    ///
    /// ```rust,ignore
    /// if cache.contains(&"key".to_string()) {
    ///     println!("Key exists!");
    /// }
    /// ```
    pub fn contains<Q>(&self, key: &Q) -> bool
    where
        K: Borrow<Q>,
        Q: ?Sized + Hash + Eq,
    {
        let idx = self.segment_index(key);
        let segment = self.segments[idx].lock();
        segment.contains(key)
    }

    /// Returns a clone of the value without updating frequency or access metadata.
    ///
    /// Unlike [`get()`](Self::get), this does NOT increment the entry's frequency
    /// or change its position. Returns a cloned value because the internal lock
    /// cannot be held across the return boundary.
    ///
    /// # Example
    ///
    /// ```rust,ignore
    /// let value = cache.peek(&"key".to_string());
    /// ```
    pub fn peek<Q>(&self, key: &Q) -> Option<V>
    where
        K: Borrow<Q>,
        Q: ?Sized + Hash + Eq,
        V: Clone,
    {
        let idx = self.segment_index(key);
        let segment = self.segments[idx].lock();
        segment.peek(key).cloned()
    }
}

impl<K, V, S> CacheMetrics for ConcurrentLfuCache<K, V, S>
where
    K: Hash + Eq + Clone + Send,
    V: Clone + Send,
    S: BuildHasher + Clone + Send,
{
    fn metrics(&self) -> BTreeMap<String, f64> {
        let mut aggregated = BTreeMap::new();
        for segment in self.segments.iter() {
            let segment_metrics = segment.lock().metrics().metrics();
            for (key, value) in segment_metrics {
                *aggregated.entry(key).or_insert(0.0) += value;
            }
        }
        aggregated
    }

    fn algorithm_name(&self) -> &'static str {
        "ConcurrentLFU"
    }
}

unsafe impl<K: Send, V: Send, S: Send> Send for ConcurrentLfuCache<K, V, S> {}
unsafe impl<K: Send, V: Send, S: Send + Sync> Sync for ConcurrentLfuCache<K, V, S> {}

impl<K, V, S> core::fmt::Debug for ConcurrentLfuCache<K, V, S>
where
    K: Hash + Eq + Clone + Send,
    V: Clone + Send,
    S: BuildHasher + Clone + Send,
{
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        f.debug_struct("ConcurrentLfuCache")
            .field("segment_count", &self.segments.len())
            .field("total_len", &self.len())
            .finish()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::config::{ConcurrentCacheConfig, ConcurrentLfuCacheConfig, LfuCacheConfig};

    extern crate std;
    use std::string::ToString;
    use std::sync::Arc;
    use std::thread;
    use std::vec::Vec;

    fn make_config(capacity: usize, segments: usize) -> ConcurrentLfuCacheConfig {
        ConcurrentCacheConfig {
            base: LfuCacheConfig {
                capacity: NonZeroUsize::new(capacity).unwrap(),
                max_size: u64::MAX,
            },
            segments,
        }
    }

    #[test]
    fn test_basic_operations() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        cache.put("a".to_string(), 1, 1);
        cache.put("b".to_string(), 2, 1);

        assert_eq!(cache.get(&"a".to_string()), Some(1));
        assert_eq!(cache.get(&"b".to_string()), Some(2));
    }

    #[test]
    fn test_concurrent_access() {
        let cache: Arc<ConcurrentLfuCache<String, i32>> =
            Arc::new(ConcurrentLfuCache::init(make_config(1000, 16), None));
        let num_threads = 8;
        let ops_per_thread = 500;

        let mut handles: Vec<std::thread::JoinHandle<()>> = Vec::new();

        for t in 0..num_threads {
            let cache = Arc::clone(&cache);
            handles.push(thread::spawn(move || {
                for i in 0..ops_per_thread {
                    let key = std::format!("key_{}_{}", t, i);
                    cache.put(key.clone(), i, 1);
                    // Access multiple times to test frequency tracking
                    if i % 3 == 0 {
                        let _ = cache.get(&key);
                        let _ = cache.get(&key);
                    }
                }
            }));
        }

        for handle in handles {
            handle.join().unwrap();
        }

        assert!(!cache.is_empty());
    }

    #[test]
    fn test_capacity() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        // Capacity is distributed across segments
        let capacity = cache.capacity();
        assert!(capacity >= 16);
        assert!(capacity <= 100);
    }

    #[test]
    fn test_segment_count() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 8), None);

        assert_eq!(cache.segment_count(), 8);
    }

    #[test]
    fn test_len_and_is_empty() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        assert!(cache.is_empty());
        assert_eq!(cache.len(), 0);

        cache.put("key1".to_string(), 1, 1);
        assert_eq!(cache.len(), 1);
        assert!(!cache.is_empty());

        cache.put("key2".to_string(), 2, 1);
        assert_eq!(cache.len(), 2);
    }

    #[test]
    fn test_remove() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        cache.put("key1".to_string(), 1, 1);
        cache.put("key2".to_string(), 2, 1);

        assert_eq!(cache.remove(&"key1".to_string()), Some(1));
        assert_eq!(cache.len(), 1);
        assert_eq!(cache.get(&"key1".to_string()), None);

        assert_eq!(cache.remove(&"nonexistent".to_string()), None);
    }

    #[test]
    fn test_clear() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        cache.put("key1".to_string(), 1, 1);
        cache.put("key2".to_string(), 2, 1);
        cache.put("key3".to_string(), 3, 1);

        assert_eq!(cache.len(), 3);

        cache.clear();

        assert_eq!(cache.len(), 0);
        assert!(cache.is_empty());
        assert_eq!(cache.get(&"key1".to_string()), None);
    }

    #[test]
    fn test_contains_key() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        cache.put("exists".to_string(), 1, 1);

        assert!(cache.contains(&"exists".to_string()));
        assert!(!cache.contains(&"missing".to_string()));
    }

    #[test]
    fn test_get_with() {
        let cache: ConcurrentLfuCache<String, String> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        cache.put("key".to_string(), "hello world".to_string(), 1);

        let len = cache.get_with(&"key".to_string(), |v: &String| v.len());
        assert_eq!(len, Some(11));

        let missing = cache.get_with(&"missing".to_string(), |v: &String| v.len());
        assert_eq!(missing, None);
    }

    #[test]
    fn test_frequency_eviction() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(48, 16), None);

        cache.put("a".to_string(), 1, 1);
        cache.put("b".to_string(), 2, 1);
        cache.put("c".to_string(), 3, 1);

        // Access "a" and "c" multiple times to increase frequency
        for _ in 0..5 {
            let _ = cache.get(&"a".to_string());
            let _ = cache.get(&"c".to_string());
        }

        // Add a new item
        cache.put("d".to_string(), 4, 1);

        assert!(cache.len() <= 48);
    }

    #[test]
    fn test_eviction_on_capacity() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(80, 16), None);

        // Fill the cache
        for i in 0..10 {
            cache.put(std::format!("key{}", i), i, 1);
        }

        // Cache should not exceed capacity
        assert!(cache.len() <= 80);
    }

    #[test]
    fn test_metrics() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        cache.put("a".to_string(), 1, 1);
        cache.put("b".to_string(), 2, 1);

        let metrics = cache.metrics();
        // Metrics aggregation across segments
        assert!(!metrics.is_empty());
    }

    #[test]
    fn test_algorithm_name() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        assert_eq!(cache.algorithm_name(), "ConcurrentLFU");
    }

    #[test]
    fn test_empty_cache_operations() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        assert!(cache.is_empty());
        assert_eq!(cache.len(), 0);
        assert_eq!(cache.get(&"missing".to_string()), None);
        assert_eq!(cache.remove(&"missing".to_string()), None);
        assert!(!cache.contains(&"missing".to_string()));
    }

    #[test]
    fn test_borrowed_key_lookup() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        cache.put("test_key".to_string(), 42, 1);

        // Test with borrowed key
        let key_str = "test_key";
        assert_eq!(cache.get(key_str), Some(42));
        assert!(cache.contains(key_str));
        assert_eq!(cache.remove(key_str), Some(42));
    }

    #[test]
    fn test_frequency_tracking() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        cache.put("key".to_string(), 1, 1);

        // Access the key multiple times
        for _ in 0..10 {
            let _ = cache.get(&"key".to_string());
        }

        // Item should still be accessible
        assert_eq!(cache.get(&"key".to_string()), Some(1));
    }

    #[test]
    fn test_contains_non_promoting() {
        let cache: ConcurrentLfuCache<String, i32> =
            ConcurrentLfuCache::init(make_config(100, 16), None);

        cache.put("a".to_string(), 1, 1);
        cache.put("b".to_string(), 2, 1);

        // contains() should check without updating frequency
        assert!(cache.contains(&"a".to_string()));
        assert!(cache.contains(&"b".to_string()));
        assert!(!cache.contains(&"c".to_string()));
    }
}