cache_rs/lfu.rs
1//! Least Frequently Used Cache Implementation.
2//!
3//! The LFU (Least Frequently Used) cache evicts the least frequently accessed items
4//! when the cache reaches capacity. This implementation tracks the frequency of
5//! access for each item and maintains items sorted by frequency.
6//!
7//! This implementation provides better performance for workloads where certain
8//! items are accessed more frequently than others over time, as it protects
9//! frequently accessed items from eviction.
10
11extern crate alloc;
12
13use crate::config::LfuCacheConfig;
14use crate::list::{Entry, List};
15use crate::metrics::{CacheMetrics, LfuCacheMetrics};
16use alloc::boxed::Box;
17use alloc::collections::BTreeMap;
18use alloc::string::String;
19use core::borrow::Borrow;
20use core::hash::{BuildHasher, Hash};
21use core::mem;
22use core::num::NonZeroUsize;
23
24/// Type alias for the frequency metadata stored in the hash map
25type FrequencyMetadata<K, V> = (usize, *mut Entry<(K, V)>);
26
27#[cfg(feature = "hashbrown")]
28use hashbrown::hash_map::DefaultHashBuilder;
29#[cfg(feature = "hashbrown")]
30use hashbrown::HashMap;
31
32#[cfg(not(feature = "hashbrown"))]
33use std::collections::hash_map::RandomState as DefaultHashBuilder;
34#[cfg(not(feature = "hashbrown"))]
35use std::collections::HashMap;
36
37/// An implementation of a Least Frequently Used (LFU) cache.
38///
39/// The cache tracks the frequency of access for each item and evicts the least
40/// frequently used items when the cache reaches capacity. In case of a tie in
41/// frequency, the least recently used item among those with the same frequency
42/// is evicted.
43///
44/// # Examples
45///
46/// ```
47/// use cache_rs::lfu::LfuCache;
48/// use core::num::NonZeroUsize;
49///
50/// // Create an LFU cache with capacity 3
51/// let mut cache = LfuCache::new(NonZeroUsize::new(3).unwrap());
52///
53/// // Add some items
54/// cache.put("a", 1);
55/// cache.put("b", 2);
56/// cache.put("c", 3);
57///
58/// // Access "a" multiple times to increase its frequency
59/// assert_eq!(cache.get(&"a"), Some(&1));
60/// assert_eq!(cache.get(&"a"), Some(&1));
61///
62/// // Add a new item, which will evict the least frequently used item
63/// cache.put("d", 4);
64/// assert_eq!(cache.get(&"b"), None); // "b" was evicted as it had frequency 0
65/// ```
66#[derive(Debug)]
67pub struct LfuCache<K, V, S = DefaultHashBuilder> {
68 /// Configuration for the LFU cache
69 config: LfuCacheConfig,
70
71 /// Current minimum frequency in the cache
72 min_frequency: usize,
73
74 /// Map from keys to their frequency and list node
75 map: HashMap<K, FrequencyMetadata<K, V>, S>,
76
77 /// Map from frequency to list of items with that frequency
78 /// Items within each frequency list are ordered by recency (LRU within frequency)
79 frequency_lists: BTreeMap<usize, List<(K, V)>>,
80
81 /// Metrics for tracking cache performance and frequency distribution
82 metrics: LfuCacheMetrics,
83}
84
85impl<K: Hash + Eq, V: Clone, S: BuildHasher> LfuCache<K, V, S> {
86 /// Creates a new LFU cache with the specified capacity and hash builder.
87 ///
88 /// # Examples
89 ///
90 /// ```
91 /// use cache_rs::lfu::LfuCache;
92 /// use core::num::NonZeroUsize;
93 /// use std::collections::hash_map::RandomState;
94 ///
95 /// let cache: LfuCache<&str, u32, _> = LfuCache::with_hasher(
96 /// NonZeroUsize::new(10).unwrap(),
97 /// RandomState::new()
98 /// );
99 /// ```
100 pub fn with_hasher(cap: NonZeroUsize, hash_builder: S) -> Self {
101 let config = LfuCacheConfig::new(cap);
102 let map_capacity = config.capacity().get().next_power_of_two();
103 let max_cache_size_bytes = config.capacity().get() as u64 * 128; // Estimate based on capacity
104 LfuCache {
105 config,
106 min_frequency: 1,
107 map: HashMap::with_capacity_and_hasher(map_capacity, hash_builder),
108 frequency_lists: BTreeMap::new(),
109 metrics: LfuCacheMetrics::new(max_cache_size_bytes),
110 }
111 }
112
113 /// Returns the maximum number of key-value pairs the cache can hold.
114 pub fn cap(&self) -> NonZeroUsize {
115 self.config.capacity()
116 }
117
118 /// Returns the current number of key-value pairs in the cache.
119 pub fn len(&self) -> usize {
120 self.map.len()
121 }
122
123 /// Returns `true` if the cache contains no key-value pairs.
124 pub fn is_empty(&self) -> bool {
125 self.map.is_empty()
126 }
127
128 /// Estimates the size of a key-value pair in bytes for metrics tracking
129 fn estimate_object_size(&self, _key: &K, _value: &V) -> u64 {
130 // Simple estimation: key size + value size + overhead for pointers and metadata
131 mem::size_of::<K>() as u64 + mem::size_of::<V>() as u64 + 64
132 }
133
134 /// Updates the frequency of an item and moves it to the appropriate frequency list.
135 unsafe fn update_frequency(&mut self, key: &K, old_frequency: usize) -> *mut Entry<(K, V)>
136 where
137 K: Clone,
138 {
139 let new_frequency = old_frequency + 1;
140
141 // Record frequency increment
142 self.metrics
143 .record_frequency_increment(old_frequency, new_frequency);
144
145 // Get the current node from the old frequency list
146 let (_, node) = self.map.get(key).unwrap();
147
148 // Remove from old frequency list
149 let boxed_entry = self
150 .frequency_lists
151 .get_mut(&old_frequency)
152 .unwrap()
153 .remove(*node)
154 .unwrap();
155
156 // If the old frequency list is now empty and it was the minimum frequency,
157 // update the minimum frequency
158 if self.frequency_lists.get(&old_frequency).unwrap().is_empty()
159 && old_frequency == self.min_frequency
160 {
161 self.min_frequency = new_frequency;
162 }
163
164 // Add to new frequency list
165 let entry_ptr = Box::into_raw(boxed_entry);
166
167 // Ensure the new frequency list exists
168 let capacity = self.config.capacity();
169 self.frequency_lists
170 .entry(new_frequency)
171 .or_insert_with(|| List::new(capacity));
172
173 // Add to the front of the new frequency list (most recently used within that frequency)
174 self.frequency_lists
175 .get_mut(&new_frequency)
176 .unwrap()
177 .attach_from_other_list(entry_ptr);
178
179 // Update the map
180 self.map.get_mut(key).unwrap().0 = new_frequency;
181 self.map.get_mut(key).unwrap().1 = entry_ptr;
182
183 // Update metrics with new frequency levels
184 self.metrics.update_frequency_levels(&self.frequency_lists);
185
186 entry_ptr
187 }
188
189 /// Returns a reference to the value corresponding to the key.
190 ///
191 /// The key may be any borrowed form of the cache's key type, but
192 /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for
193 /// the key type.
194 ///
195 /// Accessing an item increases its frequency count.
196 pub fn get<Q>(&mut self, key: &Q) -> Option<&V>
197 where
198 K: Borrow<Q> + Clone,
199 Q: ?Sized + Hash + Eq,
200 {
201 if let Some(&(frequency, node)) = self.map.get(key) {
202 // Cache hit
203 unsafe {
204 // SAFETY: node comes from our map, so it's a valid pointer to an entry in our frequency list
205 let (key_ref, value) = (*node).get_value();
206
207 // Record hit with estimated object size
208 let object_size = self.estimate_object_size(key_ref, value);
209 self.metrics.record_frequency_hit(object_size, frequency);
210
211 let new_node = self.update_frequency(key_ref, frequency);
212 let (_, value) = (*new_node).get_value();
213 Some(value)
214 }
215 } else {
216 // Cache miss - we can't estimate size without the actual object
217 // The simulation system will need to call record_miss separately
218 None
219 }
220 }
221
222 /// Returns a mutable reference to the value corresponding to the key.
223 ///
224 /// The key may be any borrowed form of the cache's key type, but
225 /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for
226 /// the key type.
227 ///
228 /// Accessing an item increases its frequency count.
229 pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
230 where
231 K: Borrow<Q> + Clone,
232 Q: ?Sized + Hash + Eq,
233 {
234 if let Some(&(frequency, node)) = self.map.get(key) {
235 // Cache hit
236 unsafe {
237 // SAFETY: node comes from our map, so it's a valid pointer to an entry in our frequency list
238 let (key_ref, value) = (*node).get_value();
239
240 // Record hit with estimated object size
241 let object_size = self.estimate_object_size(key_ref, value);
242 self.metrics.record_frequency_hit(object_size, frequency);
243
244 let new_node = self.update_frequency(key_ref, frequency);
245 let (_, value) = (*new_node).get_value_mut();
246 Some(value)
247 }
248 } else {
249 None
250 }
251 }
252
253 /// Inserts a key-value pair into the cache.
254 ///
255 /// If the cache already contained this key, the old value is replaced and returned.
256 /// Otherwise, if the cache is at capacity, the least frequently used item is evicted.
257 /// In case of a tie in frequency, the least recently used item among those with
258 /// the same frequency is evicted.
259 ///
260 /// New items are inserted with a frequency of 1.
261 pub fn put(&mut self, key: K, value: V) -> Option<(K, V)>
262 where
263 K: Clone,
264 {
265 let object_size = self.estimate_object_size(&key, &value);
266
267 // If key already exists, update it
268 if let Some(&(frequency, node)) = self.map.get(&key) {
269 unsafe {
270 // SAFETY: node comes from our map, so it's a valid pointer to an entry in our frequency list
271 let old_entry = self.frequency_lists.get_mut(&frequency).unwrap().update(
272 node,
273 (key.clone(), value),
274 true,
275 );
276
277 // Record the storage of the new value
278 self.metrics.core.record_insertion(object_size);
279
280 return old_entry.0;
281 }
282 }
283
284 let mut evicted = None;
285
286 // If at capacity, evict the least frequently used item
287 if self.len() >= self.config.capacity().get() {
288 // Find the list with minimum frequency and evict from the end (LRU within frequency)
289 if let Some(min_freq_list) = self.frequency_lists.get_mut(&self.min_frequency) {
290 if let Some(old_entry) = min_freq_list.remove_last() {
291 unsafe {
292 // SAFETY: old_entry comes from min_freq_list.remove_last(), so it's a valid Box
293 // that we own. Converting to raw pointer is safe for temporary access.
294 let entry_ptr = Box::into_raw(old_entry);
295 let (old_key, old_value) = (*entry_ptr).get_value().clone();
296
297 // Record eviction
298 let evicted_size = self.estimate_object_size(&old_key, &old_value);
299 self.metrics.core.record_eviction(evicted_size);
300
301 // Remove from map
302 self.map.remove(&old_key);
303 evicted = Some((old_key, old_value));
304
305 // SAFETY: Reconstructing Box from the same raw pointer to properly free memory
306 let _ = Box::from_raw(entry_ptr);
307 }
308 }
309 }
310 }
311
312 // Add new item with frequency 1
313 let frequency = 1;
314 self.min_frequency = 1;
315
316 // Ensure frequency list exists
317 let capacity = self.config.capacity();
318 self.frequency_lists
319 .entry(frequency)
320 .or_insert_with(|| List::new(capacity));
321
322 if let Some(node) = self
323 .frequency_lists
324 .get_mut(&frequency)
325 .unwrap()
326 .add((key.clone(), value))
327 {
328 self.map.insert(key, (frequency, node));
329 }
330
331 // Record the insertion
332 self.metrics.core.record_insertion(object_size);
333
334 // Update frequency levels
335 self.metrics.update_frequency_levels(&self.frequency_lists);
336
337 evicted
338 }
339
340 /// Removes a key from the cache, returning the value at the key if the key was previously in the cache.
341 ///
342 /// The key may be any borrowed form of the cache's key type, but
343 /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for
344 /// the key type.
345 pub fn remove<Q>(&mut self, key: &Q) -> Option<V>
346 where
347 K: Borrow<Q>,
348 Q: ?Sized + Hash + Eq,
349 V: Clone,
350 {
351 let (frequency, node) = self.map.remove(key)?;
352
353 unsafe {
354 // SAFETY: node comes from our map and was just removed, so frequency_lists.remove is safe
355 let boxed_entry = self.frequency_lists.get_mut(&frequency)?.remove(node)?;
356 // SAFETY: boxed_entry is a valid Box we own, converting to raw pointer for temporary access
357 let entry_ptr = Box::into_raw(boxed_entry);
358 let value = (*entry_ptr).get_value().1.clone();
359 // SAFETY: Reconstructing Box from the same raw pointer to properly free memory
360 let _ = Box::from_raw(entry_ptr);
361
362 // Update min_frequency if necessary
363 if self.frequency_lists.get(&frequency).unwrap().is_empty()
364 && frequency == self.min_frequency
365 {
366 // Find the next minimum frequency
367 self.min_frequency = self
368 .frequency_lists
369 .keys()
370 .find(|&&f| f > frequency && !self.frequency_lists.get(&f).unwrap().is_empty())
371 .copied()
372 .unwrap_or(1);
373 }
374
375 Some(value)
376 }
377 }
378
379 /// Clears the cache, removing all key-value pairs.
380 pub fn clear(&mut self) {
381 self.map.clear();
382 self.frequency_lists.clear();
383 self.min_frequency = 1;
384 }
385
386 /// Records a cache miss for metrics tracking (to be called by simulation system)
387 pub fn record_miss(&mut self, object_size: u64) {
388 self.metrics.record_miss(object_size);
389 }
390}
391
392impl<K: Hash + Eq, V> LfuCache<K, V>
393where
394 V: Clone,
395{
396 /// Creates a new LFU cache with the specified capacity.
397 ///
398 /// # Examples
399 ///
400 /// ```
401 /// use cache_rs::lfu::LfuCache;
402 /// use core::num::NonZeroUsize;
403 ///
404 /// let cache: LfuCache<&str, u32> = LfuCache::new(NonZeroUsize::new(10).unwrap());
405 /// ```
406 pub fn new(cap: NonZeroUsize) -> LfuCache<K, V, DefaultHashBuilder> {
407 let config = LfuCacheConfig::new(cap);
408 LfuCache::with_hasher(config.capacity(), DefaultHashBuilder::default())
409 }
410}
411
412impl<K: Hash + Eq, V, S: BuildHasher> CacheMetrics for LfuCache<K, V, S> {
413 fn metrics(&self) -> BTreeMap<String, f64> {
414 self.metrics.metrics()
415 }
416
417 fn algorithm_name(&self) -> &'static str {
418 self.metrics.algorithm_name()
419 }
420}
421
422#[cfg(test)]
423mod tests {
424 extern crate std;
425 use std::string::ToString;
426
427 use super::*;
428 use alloc::string::String;
429
430 #[test]
431 fn test_lfu_basic() {
432 let mut cache = LfuCache::new(NonZeroUsize::new(3).unwrap());
433
434 // Add items
435 assert_eq!(cache.put("a", 1), None);
436 assert_eq!(cache.put("b", 2), None);
437 assert_eq!(cache.put("c", 3), None);
438
439 // Access "a" multiple times to increase its frequency
440 assert_eq!(cache.get(&"a"), Some(&1));
441 assert_eq!(cache.get(&"a"), Some(&1));
442
443 // Access "b" once
444 assert_eq!(cache.get(&"b"), Some(&2));
445
446 // Add a new item, should evict "c" (frequency 0, least recently used among frequency 0)
447 let evicted = cache.put("d", 4);
448 assert!(evicted.is_some());
449 let (evicted_key, evicted_val) = evicted.unwrap();
450 assert_eq!(evicted_key, "c");
451 assert_eq!(evicted_val, 3);
452
453 // Check remaining items
454 assert_eq!(cache.get(&"a"), Some(&1)); // frequency 3 now
455 assert_eq!(cache.get(&"b"), Some(&2)); // frequency 2 now
456 assert_eq!(cache.get(&"d"), Some(&4)); // frequency 1 now
457 assert_eq!(cache.get(&"c"), None); // evicted
458 }
459
460 #[test]
461 fn test_lfu_frequency_ordering() {
462 let mut cache = LfuCache::new(NonZeroUsize::new(2).unwrap());
463
464 // Add items
465 cache.put("a", 1);
466 cache.put("b", 2);
467
468 // Access "a" multiple times
469 cache.get(&"a");
470 cache.get(&"a");
471 cache.get(&"a");
472
473 // Access "b" once
474 cache.get(&"b");
475
476 // Add new item, should evict "b" (lower frequency)
477 let evicted = cache.put("c", 3);
478 assert_eq!(evicted.unwrap().0, "b");
479
480 assert_eq!(cache.get(&"a"), Some(&1));
481 assert_eq!(cache.get(&"c"), Some(&3));
482 assert_eq!(cache.get(&"b"), None);
483 }
484
485 #[test]
486 fn test_lfu_update_existing() {
487 let mut cache = LfuCache::new(NonZeroUsize::new(2).unwrap());
488
489 cache.put("a", 1);
490 cache.get(&"a"); // frequency becomes 2
491
492 // Update existing key
493 let old_value = cache.put("a", 10);
494 assert_eq!(old_value.unwrap().1, 1);
495
496 // The frequency should be preserved
497 cache.put("b", 2);
498 cache.put("c", 3); // Should evict "b" because "a" has higher frequency
499
500 assert_eq!(cache.get(&"a"), Some(&10));
501 assert_eq!(cache.get(&"c"), Some(&3));
502 assert_eq!(cache.get(&"b"), None);
503 }
504
505 #[test]
506 fn test_lfu_remove() {
507 let mut cache = LfuCache::new(NonZeroUsize::new(3).unwrap());
508
509 cache.put("a", 1);
510 cache.put("b", 2);
511 cache.put("c", 3);
512
513 // Remove item
514 assert_eq!(cache.remove(&"b"), Some(2));
515 assert_eq!(cache.remove(&"b"), None);
516
517 // Check remaining items
518 assert_eq!(cache.get(&"a"), Some(&1));
519 assert_eq!(cache.get(&"c"), Some(&3));
520 assert_eq!(cache.len(), 2);
521 }
522
523 #[test]
524 fn test_lfu_clear() {
525 let mut cache = LfuCache::new(NonZeroUsize::new(3).unwrap());
526
527 cache.put("a", 1);
528 cache.put("b", 2);
529 cache.put("c", 3);
530
531 assert_eq!(cache.len(), 3);
532 cache.clear();
533 assert_eq!(cache.len(), 0);
534 assert!(cache.is_empty());
535
536 // Should be able to add items after clear
537 cache.put("d", 4);
538 assert_eq!(cache.get(&"d"), Some(&4));
539 }
540
541 #[test]
542 fn test_lfu_get_mut() {
543 let mut cache = LfuCache::new(NonZeroUsize::new(2).unwrap());
544
545 cache.put("a", 1);
546
547 // Modify value using get_mut
548 if let Some(value) = cache.get_mut(&"a") {
549 *value = 10;
550 }
551
552 assert_eq!(cache.get(&"a"), Some(&10));
553 }
554
555 #[test]
556 fn test_lfu_complex_values() {
557 let mut cache = LfuCache::new(NonZeroUsize::new(2).unwrap());
558
559 #[derive(Debug, Clone, PartialEq)]
560 struct ComplexValue {
561 id: usize,
562 data: String,
563 }
564
565 // Add complex values
566 cache.put(
567 "a",
568 ComplexValue {
569 id: 1,
570 data: "a-data".to_string(),
571 },
572 );
573
574 cache.put(
575 "b",
576 ComplexValue {
577 id: 2,
578 data: "b-data".to_string(),
579 },
580 );
581
582 // Modify a value using get_mut
583 if let Some(value) = cache.get_mut(&"a") {
584 value.id = 100;
585 value.data = "a-modified".to_string();
586 }
587
588 // Check the modified value
589 let a = cache.get(&"a").unwrap();
590 assert_eq!(a.id, 100);
591 assert_eq!(a.data, "a-modified");
592 }
593}