cache_rs/
lfu.rs

1//! Least Frequently Used Cache Implementation.
2//!
3//! The LFU (Least Frequently Used) cache evicts the least frequently accessed items
4//! when the cache reaches capacity. This implementation tracks the frequency of
5//! access for each item and maintains items sorted by frequency.
6//!
7//! This implementation provides better performance for workloads where certain
8//! items are accessed more frequently than others over time, as it protects
9//! frequently accessed items from eviction.
10
11extern crate alloc;
12
13use crate::config::LfuCacheConfig;
14use crate::list::{Entry, List};
15use crate::metrics::{CacheMetrics, LfuCacheMetrics};
16use alloc::boxed::Box;
17use alloc::collections::BTreeMap;
18use alloc::string::String;
19use core::borrow::Borrow;
20use core::hash::{BuildHasher, Hash};
21use core::mem;
22use core::num::NonZeroUsize;
23
24/// Type alias for the frequency metadata stored in the hash map
25type FrequencyMetadata<K, V> = (usize, *mut Entry<(K, V)>);
26
27#[cfg(feature = "hashbrown")]
28use hashbrown::hash_map::DefaultHashBuilder;
29#[cfg(feature = "hashbrown")]
30use hashbrown::HashMap;
31
32#[cfg(not(feature = "hashbrown"))]
33use std::collections::hash_map::RandomState as DefaultHashBuilder;
34#[cfg(not(feature = "hashbrown"))]
35use std::collections::HashMap;
36
37/// An implementation of a Least Frequently Used (LFU) cache.
38///
39/// The cache tracks the frequency of access for each item and evicts the least
40/// frequently used items when the cache reaches capacity. In case of a tie in
41/// frequency, the least recently used item among those with the same frequency
42/// is evicted.
43///
44/// # Examples
45///
46/// ```
47/// use cache_rs::lfu::LfuCache;
48/// use core::num::NonZeroUsize;
49///
50/// // Create an LFU cache with capacity 3
51/// let mut cache = LfuCache::new(NonZeroUsize::new(3).unwrap());
52///
53/// // Add some items
54/// cache.put("a", 1);
55/// cache.put("b", 2);
56/// cache.put("c", 3);
57///
58/// // Access "a" multiple times to increase its frequency
59/// assert_eq!(cache.get(&"a"), Some(&1));
60/// assert_eq!(cache.get(&"a"), Some(&1));
61///
62/// // Add a new item, which will evict the least frequently used item
63/// cache.put("d", 4);
64/// assert_eq!(cache.get(&"b"), None); // "b" was evicted as it had frequency 0
65/// ```
66#[derive(Debug)]
67pub struct LfuCache<K, V, S = DefaultHashBuilder> {
68    /// Configuration for the LFU cache
69    config: LfuCacheConfig,
70
71    /// Current minimum frequency in the cache
72    min_frequency: usize,
73
74    /// Map from keys to their frequency and list node
75    map: HashMap<K, FrequencyMetadata<K, V>, S>,
76
77    /// Map from frequency to list of items with that frequency
78    /// Items within each frequency list are ordered by recency (LRU within frequency)
79    frequency_lists: BTreeMap<usize, List<(K, V)>>,
80
81    /// Metrics for tracking cache performance and frequency distribution
82    metrics: LfuCacheMetrics,
83}
84
85impl<K: Hash + Eq, V: Clone, S: BuildHasher> LfuCache<K, V, S> {
86    /// Creates a new LFU cache with the specified capacity and hash builder.
87    ///
88    /// # Examples
89    ///
90    /// ```
91    /// use cache_rs::lfu::LfuCache;
92    /// use core::num::NonZeroUsize;
93    /// use std::collections::hash_map::RandomState;
94    ///
95    /// let cache: LfuCache<&str, u32, _> = LfuCache::with_hasher(
96    ///     NonZeroUsize::new(10).unwrap(),
97    ///     RandomState::new()
98    /// );
99    /// ```
100    pub fn with_hasher(cap: NonZeroUsize, hash_builder: S) -> Self {
101        let config = LfuCacheConfig::new(cap);
102        let map_capacity = config.capacity().get().next_power_of_two();
103        let max_cache_size_bytes = config.capacity().get() as u64 * 128; // Estimate based on capacity
104        LfuCache {
105            config,
106            min_frequency: 1,
107            map: HashMap::with_capacity_and_hasher(map_capacity, hash_builder),
108            frequency_lists: BTreeMap::new(),
109            metrics: LfuCacheMetrics::new(max_cache_size_bytes),
110        }
111    }
112
113    /// Returns the maximum number of key-value pairs the cache can hold.
114    pub fn cap(&self) -> NonZeroUsize {
115        self.config.capacity()
116    }
117
118    /// Returns the current number of key-value pairs in the cache.
119    pub fn len(&self) -> usize {
120        self.map.len()
121    }
122
123    /// Returns `true` if the cache contains no key-value pairs.
124    pub fn is_empty(&self) -> bool {
125        self.map.is_empty()
126    }
127
128    /// Estimates the size of a key-value pair in bytes for metrics tracking
129    fn estimate_object_size(&self, _key: &K, _value: &V) -> u64 {
130        // Simple estimation: key size + value size + overhead for pointers and metadata
131        mem::size_of::<K>() as u64 + mem::size_of::<V>() as u64 + 64
132    }
133
134    /// Updates the frequency of an item and moves it to the appropriate frequency list.
135    unsafe fn update_frequency(&mut self, key: &K, old_frequency: usize) -> *mut Entry<(K, V)>
136    where
137        K: Clone,
138    {
139        let new_frequency = old_frequency + 1;
140
141        // Record frequency increment
142        self.metrics
143            .record_frequency_increment(old_frequency, new_frequency);
144
145        // Get the current node from the old frequency list
146        let (_, node) = self.map.get(key).unwrap();
147
148        // Remove from old frequency list
149        let boxed_entry = self
150            .frequency_lists
151            .get_mut(&old_frequency)
152            .unwrap()
153            .remove(*node)
154            .unwrap();
155
156        // If the old frequency list is now empty and it was the minimum frequency,
157        // update the minimum frequency
158        if self.frequency_lists.get(&old_frequency).unwrap().is_empty()
159            && old_frequency == self.min_frequency
160        {
161            self.min_frequency = new_frequency;
162        }
163
164        // Add to new frequency list
165        let entry_ptr = Box::into_raw(boxed_entry);
166
167        // Ensure the new frequency list exists
168        let capacity = self.config.capacity();
169        self.frequency_lists
170            .entry(new_frequency)
171            .or_insert_with(|| List::new(capacity));
172
173        // Add to the front of the new frequency list (most recently used within that frequency)
174        self.frequency_lists
175            .get_mut(&new_frequency)
176            .unwrap()
177            .attach_from_other_list(entry_ptr);
178
179        // Update the map
180        self.map.get_mut(key).unwrap().0 = new_frequency;
181        self.map.get_mut(key).unwrap().1 = entry_ptr;
182
183        // Update metrics with new frequency levels
184        self.metrics.update_frequency_levels(&self.frequency_lists);
185
186        entry_ptr
187    }
188
189    /// Returns a reference to the value corresponding to the key.
190    ///
191    /// The key may be any borrowed form of the cache's key type, but
192    /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for
193    /// the key type.
194    ///
195    /// Accessing an item increases its frequency count.
196    pub fn get<Q>(&mut self, key: &Q) -> Option<&V>
197    where
198        K: Borrow<Q> + Clone,
199        Q: ?Sized + Hash + Eq,
200    {
201        if let Some(&(frequency, node)) = self.map.get(key) {
202            // Cache hit
203            unsafe {
204                // SAFETY: node comes from our map, so it's a valid pointer to an entry in our frequency list
205                let (key_ref, value) = (*node).get_value();
206
207                // Record hit with estimated object size
208                let object_size = self.estimate_object_size(key_ref, value);
209                self.metrics.record_frequency_hit(object_size, frequency);
210
211                let new_node = self.update_frequency(key_ref, frequency);
212                let (_, value) = (*new_node).get_value();
213                Some(value)
214            }
215        } else {
216            // Cache miss - we can't estimate size without the actual object
217            // The simulation system will need to call record_miss separately
218            None
219        }
220    }
221
222    /// Returns a mutable reference to the value corresponding to the key.
223    ///
224    /// The key may be any borrowed form of the cache's key type, but
225    /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for
226    /// the key type.
227    ///
228    /// Accessing an item increases its frequency count.
229    pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
230    where
231        K: Borrow<Q> + Clone,
232        Q: ?Sized + Hash + Eq,
233    {
234        if let Some(&(frequency, node)) = self.map.get(key) {
235            // Cache hit
236            unsafe {
237                // SAFETY: node comes from our map, so it's a valid pointer to an entry in our frequency list
238                let (key_ref, value) = (*node).get_value();
239
240                // Record hit with estimated object size
241                let object_size = self.estimate_object_size(key_ref, value);
242                self.metrics.record_frequency_hit(object_size, frequency);
243
244                let new_node = self.update_frequency(key_ref, frequency);
245                let (_, value) = (*new_node).get_value_mut();
246                Some(value)
247            }
248        } else {
249            None
250        }
251    }
252
253    /// Inserts a key-value pair into the cache.
254    ///
255    /// If the cache already contained this key, the old value is replaced and returned.
256    /// Otherwise, if the cache is at capacity, the least frequently used item is evicted.
257    /// In case of a tie in frequency, the least recently used item among those with
258    /// the same frequency is evicted.
259    ///
260    /// New items are inserted with a frequency of 1.
261    pub fn put(&mut self, key: K, value: V) -> Option<(K, V)>
262    where
263        K: Clone,
264    {
265        let object_size = self.estimate_object_size(&key, &value);
266
267        // If key already exists, update it
268        if let Some(&(frequency, node)) = self.map.get(&key) {
269            unsafe {
270                // SAFETY: node comes from our map, so it's a valid pointer to an entry in our frequency list
271                let old_entry = self.frequency_lists.get_mut(&frequency).unwrap().update(
272                    node,
273                    (key.clone(), value),
274                    true,
275                );
276
277                // Record the storage of the new value
278                self.metrics.core.record_insertion(object_size);
279
280                return old_entry.0;
281            }
282        }
283
284        let mut evicted = None;
285
286        // If at capacity, evict the least frequently used item
287        if self.len() >= self.config.capacity().get() {
288            // Find the list with minimum frequency and evict from the end (LRU within frequency)
289            if let Some(min_freq_list) = self.frequency_lists.get_mut(&self.min_frequency) {
290                if let Some(old_entry) = min_freq_list.remove_last() {
291                    unsafe {
292                        // SAFETY: old_entry comes from min_freq_list.remove_last(), so it's a valid Box
293                        // that we own. Converting to raw pointer is safe for temporary access.
294                        let entry_ptr = Box::into_raw(old_entry);
295                        let (old_key, old_value) = (*entry_ptr).get_value().clone();
296
297                        // Record eviction
298                        let evicted_size = self.estimate_object_size(&old_key, &old_value);
299                        self.metrics.core.record_eviction(evicted_size);
300
301                        // Remove from map
302                        self.map.remove(&old_key);
303                        evicted = Some((old_key, old_value));
304
305                        // SAFETY: Reconstructing Box from the same raw pointer to properly free memory
306                        let _ = Box::from_raw(entry_ptr);
307                    }
308                }
309            }
310        }
311
312        // Add new item with frequency 1
313        let frequency = 1;
314        self.min_frequency = 1;
315
316        // Ensure frequency list exists
317        let capacity = self.config.capacity();
318        self.frequency_lists
319            .entry(frequency)
320            .or_insert_with(|| List::new(capacity));
321
322        if let Some(node) = self
323            .frequency_lists
324            .get_mut(&frequency)
325            .unwrap()
326            .add((key.clone(), value))
327        {
328            self.map.insert(key, (frequency, node));
329        }
330
331        // Record the insertion
332        self.metrics.core.record_insertion(object_size);
333
334        // Update frequency levels
335        self.metrics.update_frequency_levels(&self.frequency_lists);
336
337        evicted
338    }
339
340    /// Removes a key from the cache, returning the value at the key if the key was previously in the cache.
341    ///
342    /// The key may be any borrowed form of the cache's key type, but
343    /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for
344    /// the key type.
345    pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
346    where
347        K: Borrow<Q>,
348        Q: ?Sized + Hash + Eq,
349        V: Clone,
350    {
351        let (frequency, node) = self.map.remove(key)?;
352
353        unsafe {
354            // SAFETY: node comes from our map and was just removed, so frequency_lists.remove is safe
355            let boxed_entry = self.frequency_lists.get_mut(&frequency)?.remove(node)?;
356            // SAFETY: boxed_entry is a valid Box we own, converting to raw pointer for temporary access
357            let entry_ptr = Box::into_raw(boxed_entry);
358            let value = (*entry_ptr).get_value().1.clone();
359            // SAFETY: Reconstructing Box from the same raw pointer to properly free memory
360            let _ = Box::from_raw(entry_ptr);
361
362            // Update min_frequency if necessary
363            if self.frequency_lists.get(&frequency).unwrap().is_empty()
364                && frequency == self.min_frequency
365            {
366                // Find the next minimum frequency
367                self.min_frequency = self
368                    .frequency_lists
369                    .keys()
370                    .find(|&&f| f > frequency && !self.frequency_lists.get(&f).unwrap().is_empty())
371                    .copied()
372                    .unwrap_or(1);
373            }
374
375            Some(value)
376        }
377    }
378
379    /// Clears the cache, removing all key-value pairs.
380    pub fn clear(&mut self) {
381        self.map.clear();
382        self.frequency_lists.clear();
383        self.min_frequency = 1;
384    }
385
386    /// Records a cache miss for metrics tracking (to be called by simulation system)
387    pub fn record_miss(&mut self, object_size: u64) {
388        self.metrics.record_miss(object_size);
389    }
390}
391
392impl<K: Hash + Eq, V> LfuCache<K, V>
393where
394    V: Clone,
395{
396    /// Creates a new LFU cache with the specified capacity.
397    ///
398    /// # Examples
399    ///
400    /// ```
401    /// use cache_rs::lfu::LfuCache;
402    /// use core::num::NonZeroUsize;
403    ///
404    /// let cache: LfuCache<&str, u32> = LfuCache::new(NonZeroUsize::new(10).unwrap());
405    /// ```
406    pub fn new(cap: NonZeroUsize) -> LfuCache<K, V, DefaultHashBuilder> {
407        let config = LfuCacheConfig::new(cap);
408        LfuCache::with_hasher(config.capacity(), DefaultHashBuilder::default())
409    }
410}
411
412impl<K: Hash + Eq, V, S: BuildHasher> CacheMetrics for LfuCache<K, V, S> {
413    fn metrics(&self) -> BTreeMap<String, f64> {
414        self.metrics.metrics()
415    }
416
417    fn algorithm_name(&self) -> &'static str {
418        self.metrics.algorithm_name()
419    }
420}
421
422#[cfg(test)]
423mod tests {
424    extern crate std;
425    use std::string::ToString;
426
427    use super::*;
428    use alloc::string::String;
429
430    #[test]
431    fn test_lfu_basic() {
432        let mut cache = LfuCache::new(NonZeroUsize::new(3).unwrap());
433
434        // Add items
435        assert_eq!(cache.put("a", 1), None);
436        assert_eq!(cache.put("b", 2), None);
437        assert_eq!(cache.put("c", 3), None);
438
439        // Access "a" multiple times to increase its frequency
440        assert_eq!(cache.get(&"a"), Some(&1));
441        assert_eq!(cache.get(&"a"), Some(&1));
442
443        // Access "b" once
444        assert_eq!(cache.get(&"b"), Some(&2));
445
446        // Add a new item, should evict "c" (frequency 0, least recently used among frequency 0)
447        let evicted = cache.put("d", 4);
448        assert!(evicted.is_some());
449        let (evicted_key, evicted_val) = evicted.unwrap();
450        assert_eq!(evicted_key, "c");
451        assert_eq!(evicted_val, 3);
452
453        // Check remaining items
454        assert_eq!(cache.get(&"a"), Some(&1)); // frequency 3 now
455        assert_eq!(cache.get(&"b"), Some(&2)); // frequency 2 now
456        assert_eq!(cache.get(&"d"), Some(&4)); // frequency 1 now
457        assert_eq!(cache.get(&"c"), None); // evicted
458    }
459
460    #[test]
461    fn test_lfu_frequency_ordering() {
462        let mut cache = LfuCache::new(NonZeroUsize::new(2).unwrap());
463
464        // Add items
465        cache.put("a", 1);
466        cache.put("b", 2);
467
468        // Access "a" multiple times
469        cache.get(&"a");
470        cache.get(&"a");
471        cache.get(&"a");
472
473        // Access "b" once
474        cache.get(&"b");
475
476        // Add new item, should evict "b" (lower frequency)
477        let evicted = cache.put("c", 3);
478        assert_eq!(evicted.unwrap().0, "b");
479
480        assert_eq!(cache.get(&"a"), Some(&1));
481        assert_eq!(cache.get(&"c"), Some(&3));
482        assert_eq!(cache.get(&"b"), None);
483    }
484
485    #[test]
486    fn test_lfu_update_existing() {
487        let mut cache = LfuCache::new(NonZeroUsize::new(2).unwrap());
488
489        cache.put("a", 1);
490        cache.get(&"a"); // frequency becomes 2
491
492        // Update existing key
493        let old_value = cache.put("a", 10);
494        assert_eq!(old_value.unwrap().1, 1);
495
496        // The frequency should be preserved
497        cache.put("b", 2);
498        cache.put("c", 3); // Should evict "b" because "a" has higher frequency
499
500        assert_eq!(cache.get(&"a"), Some(&10));
501        assert_eq!(cache.get(&"c"), Some(&3));
502        assert_eq!(cache.get(&"b"), None);
503    }
504
505    #[test]
506    fn test_lfu_remove() {
507        let mut cache = LfuCache::new(NonZeroUsize::new(3).unwrap());
508
509        cache.put("a", 1);
510        cache.put("b", 2);
511        cache.put("c", 3);
512
513        // Remove item
514        assert_eq!(cache.remove(&"b"), Some(2));
515        assert_eq!(cache.remove(&"b"), None);
516
517        // Check remaining items
518        assert_eq!(cache.get(&"a"), Some(&1));
519        assert_eq!(cache.get(&"c"), Some(&3));
520        assert_eq!(cache.len(), 2);
521    }
522
523    #[test]
524    fn test_lfu_clear() {
525        let mut cache = LfuCache::new(NonZeroUsize::new(3).unwrap());
526
527        cache.put("a", 1);
528        cache.put("b", 2);
529        cache.put("c", 3);
530
531        assert_eq!(cache.len(), 3);
532        cache.clear();
533        assert_eq!(cache.len(), 0);
534        assert!(cache.is_empty());
535
536        // Should be able to add items after clear
537        cache.put("d", 4);
538        assert_eq!(cache.get(&"d"), Some(&4));
539    }
540
541    #[test]
542    fn test_lfu_get_mut() {
543        let mut cache = LfuCache::new(NonZeroUsize::new(2).unwrap());
544
545        cache.put("a", 1);
546
547        // Modify value using get_mut
548        if let Some(value) = cache.get_mut(&"a") {
549            *value = 10;
550        }
551
552        assert_eq!(cache.get(&"a"), Some(&10));
553    }
554
555    #[test]
556    fn test_lfu_complex_values() {
557        let mut cache = LfuCache::new(NonZeroUsize::new(2).unwrap());
558
559        #[derive(Debug, Clone, PartialEq)]
560        struct ComplexValue {
561            id: usize,
562            data: String,
563        }
564
565        // Add complex values
566        cache.put(
567            "a",
568            ComplexValue {
569                id: 1,
570                data: "a-data".to_string(),
571            },
572        );
573
574        cache.put(
575            "b",
576            ComplexValue {
577                id: 2,
578                data: "b-data".to_string(),
579            },
580        );
581
582        // Modify a value using get_mut
583        if let Some(value) = cache.get_mut(&"a") {
584            value.id = 100;
585            value.data = "a-modified".to_string();
586        }
587
588        // Check the modified value
589        let a = cache.get(&"a").unwrap();
590        assert_eq!(a.id, 100);
591        assert_eq!(a.data, "a-modified");
592    }
593}