cache_rs/
gdsf.rs

1//! Greedy Dual-Size Frequency (GDSF) cache implementation.
2//!
3//! GDSF is a sophisticated cache replacement algorithm that combines frequency, size,
4//! and aging to optimize cache performance for variable-sized objects.
5//!
6//! # Thread Safety
7//!
8//! This implementation is not thread-safe. For concurrent access, wrap the cache
9//! with a synchronization primitive such as `Mutex` or `RwLock`.
10
11extern crate alloc;
12
13use crate::config::GdsfCacheConfig;
14use crate::list::{Entry, List};
15use crate::metrics::{CacheMetrics, GdsfCacheMetrics};
16use alloc::boxed::Box;
17use alloc::collections::BTreeMap;
18use alloc::string::String;
19use core::borrow::Borrow;
20use core::hash::{BuildHasher, Hash};
21use core::mem;
22use core::num::NonZeroUsize;
23
24#[cfg(feature = "hashbrown")]
25use hashbrown::hash_map::DefaultHashBuilder;
26#[cfg(feature = "hashbrown")]
27use hashbrown::HashMap;
28
29#[cfg(not(feature = "hashbrown"))]
30use std::collections::hash_map::RandomState as DefaultHashBuilder;
31#[cfg(not(feature = "hashbrown"))]
32use std::collections::HashMap;
33
34/// Metadata for each cache entry in GDSF
35#[derive(Debug, Clone, Copy)]
36struct EntryMetadata<K, V> {
37    frequency: u64,
38    size: u64,
39    priority: f64,
40    node: *mut Entry<(K, V)>,
41}
42
43/// Internal GDSF segment containing the actual cache algorithm.
44pub(crate) struct GdsfSegment<K, V, S = DefaultHashBuilder> {
45    config: GdsfCacheConfig,
46    global_age: f64,
47    min_priority: f64,
48    map: HashMap<K, EntryMetadata<K, V>, S>,
49    priority_lists: BTreeMap<u64, List<(K, V)>>,
50    metrics: GdsfCacheMetrics,
51}
52
53// SAFETY: GdsfSegment owns all data and raw pointers point only to nodes owned by
54// `priority_lists`. Concurrent access is safe when wrapped in proper synchronization primitives.
55unsafe impl<K: Send, V: Send, S: Send> Send for GdsfSegment<K, V, S> {}
56
57// SAFETY: All mutation requires &mut self; shared references cannot cause data races.
58unsafe impl<K: Send, V: Send, S: Sync> Sync for GdsfSegment<K, V, S> {}
59
60impl<K: Hash + Eq, V: Clone, S: BuildHasher> GdsfSegment<K, V, S> {
61    pub(crate) fn with_hasher(cap: NonZeroUsize, hash_builder: S) -> Self {
62        let config = GdsfCacheConfig::new(cap);
63        let map_capacity = config.capacity().get().next_power_of_two();
64        let max_cache_size_bytes = config.capacity().get() as u64 * 128;
65
66        GdsfSegment {
67            config,
68            global_age: config.initial_age(),
69            min_priority: 0.0,
70            map: HashMap::with_capacity_and_hasher(map_capacity, hash_builder),
71            priority_lists: BTreeMap::new(),
72            metrics: GdsfCacheMetrics::new(max_cache_size_bytes),
73        }
74    }
75
76    #[inline]
77    pub(crate) fn cap(&self) -> NonZeroUsize {
78        self.config.capacity()
79    }
80
81    #[inline]
82    pub(crate) fn len(&self) -> usize {
83        self.map.len()
84    }
85
86    #[inline]
87    pub(crate) fn is_empty(&self) -> bool {
88        self.map.is_empty()
89    }
90
91    #[inline]
92    pub(crate) fn global_age(&self) -> f64 {
93        self.global_age
94    }
95
96    #[inline]
97    pub(crate) fn metrics(&self) -> &GdsfCacheMetrics {
98        &self.metrics
99    }
100
101    fn estimate_object_size(&self, _key: &K, _value: &V) -> u64 {
102        mem::size_of::<K>() as u64 + mem::size_of::<V>() as u64 + 64
103    }
104
105    #[inline]
106    pub(crate) fn record_miss(&mut self, object_size: u64) {
107        self.metrics.core.record_miss(object_size);
108    }
109
110    fn calculate_priority(&self, frequency: u64, size: u64) -> f64 {
111        if size == 0 {
112            return f64::INFINITY;
113        }
114        (frequency as f64 / size as f64) + self.global_age
115    }
116
117    unsafe fn update_priority_by_node(&mut self, node: *mut Entry<(K, V)>) -> *mut Entry<(K, V)>
118    where
119        K: Clone + Hash + Eq,
120    {
121        // SAFETY: node is guaranteed valid by caller's contract
122        let (key_ref, _) = (*node).get_value();
123        let key_cloned = key_ref.clone();
124
125        let metadata = self.map.get_mut(&key_cloned).unwrap();
126        let old_priority = metadata.priority;
127        let size = metadata.size;
128
129        metadata.frequency += 1;
130
131        let global_age = self.global_age;
132        let new_priority = if size == 0 {
133            f64::INFINITY
134        } else {
135            (metadata.frequency as f64 / size as f64) + global_age
136        };
137        metadata.priority = new_priority;
138
139        let old_priority_key = (old_priority * 1000.0) as u64;
140        let new_priority_key = (new_priority * 1000.0) as u64;
141
142        if old_priority_key == new_priority_key {
143            let node = metadata.node;
144            self.priority_lists
145                .get_mut(&new_priority_key)
146                .unwrap()
147                .move_to_front(node);
148            return node;
149        }
150
151        let node = metadata.node;
152        let boxed_entry = self
153            .priority_lists
154            .get_mut(&old_priority_key)
155            .unwrap()
156            .remove(node)
157            .unwrap();
158
159        if self
160            .priority_lists
161            .get(&old_priority_key)
162            .unwrap()
163            .is_empty()
164        {
165            self.priority_lists.remove(&old_priority_key);
166        }
167
168        let entry_ptr = Box::into_raw(boxed_entry);
169
170        let capacity = self.config.capacity();
171        self.priority_lists
172            .entry(new_priority_key)
173            .or_insert_with(|| List::new(capacity));
174
175        self.priority_lists
176            .get_mut(&new_priority_key)
177            .unwrap()
178            .attach_from_other_list(entry_ptr);
179
180        metadata.node = entry_ptr;
181        entry_ptr
182    }
183
184    pub(crate) fn get<Q>(&mut self, key: &Q) -> Option<V>
185    where
186        K: Borrow<Q> + Clone,
187        Q: ?Sized + Hash + Eq,
188    {
189        if let Some(metadata) = self.map.get(key) {
190            let node = metadata.node;
191            unsafe {
192                // SAFETY: node comes from our map
193                let (key_ref, value) = (*node).get_value();
194                let object_size = self.estimate_object_size(key_ref, value);
195                self.metrics.core.record_hit(object_size);
196                self.metrics.record_item_access(
197                    metadata.frequency,
198                    metadata.size,
199                    metadata.priority,
200                );
201
202                let new_node = self.update_priority_by_node(node);
203                let (_, value) = (*new_node).get_value();
204                Some(value.clone())
205            }
206        } else {
207            None
208        }
209    }
210
211    pub(crate) fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
212    where
213        K: Borrow<Q> + Clone,
214        Q: ?Sized + Hash + Eq,
215    {
216        if let Some(metadata) = self.map.get(key) {
217            let node = metadata.node;
218            unsafe {
219                // SAFETY: node comes from our map
220                let (key_ref, value) = (*node).get_value();
221                let object_size = self.estimate_object_size(key_ref, value);
222                self.metrics.core.record_hit(object_size);
223                self.metrics.record_item_access(
224                    metadata.frequency,
225                    metadata.size,
226                    metadata.priority,
227                );
228
229                let new_node = self.update_priority_by_node(node);
230                let (_, value) = (*new_node).get_value_mut();
231                Some(value)
232            }
233        } else {
234            None
235        }
236    }
237
238    pub(crate) fn contains_key<Q>(&self, key: &Q) -> bool
239    where
240        K: Borrow<Q>,
241        Q: ?Sized + Hash + Eq,
242    {
243        self.map.contains_key(key)
244    }
245
246    pub(crate) fn put(&mut self, key: K, val: V, size: u64) -> Option<V>
247    where
248        K: Clone,
249    {
250        if size == 0 {
251            return None;
252        }
253
254        let object_size = self.estimate_object_size(&key, &val);
255
256        if let Some(mut metadata) = self.map.remove(&key) {
257            unsafe {
258                // SAFETY: metadata.node comes from our map
259                let old_priority_key = (metadata.priority * 1000.0) as u64;
260                let list = self.priority_lists.get_mut(&old_priority_key).unwrap();
261                let entry = list.remove(metadata.node).unwrap();
262
263                if list.is_empty() {
264                    self.priority_lists.remove(&old_priority_key);
265                }
266
267                let entry_ptr = Box::into_raw(entry);
268                let (_, old_value) = (*entry_ptr).get_value().clone();
269                let _ = Box::from_raw(entry_ptr);
270
271                metadata.size = size;
272                metadata.priority = self.calculate_priority(metadata.frequency, size);
273
274                let new_priority_key = (metadata.priority * 1000.0) as u64;
275                let capacity = self.cap();
276                let list = self
277                    .priority_lists
278                    .entry(new_priority_key)
279                    .or_insert_with(|| List::new(capacity));
280
281                if let Some(new_node) = list.add((key.clone(), val)) {
282                    metadata.node = new_node;
283                    self.map.insert(key, metadata);
284                    self.metrics.core.record_insertion(object_size);
285                    return Some(old_value);
286                } else {
287                    return None;
288                }
289            }
290        }
291
292        while self.len() >= self.config.capacity().get() {
293            self.evict_one();
294        }
295
296        let priority = self.calculate_priority(1, size);
297        let priority_key = (priority * 1000.0) as u64;
298
299        let capacity = self.config.capacity();
300        let list = self
301            .priority_lists
302            .entry(priority_key)
303            .or_insert_with(|| List::new(capacity));
304
305        if let Some(entry) = list.add((key.clone(), val)) {
306            let metadata = EntryMetadata {
307                frequency: 1,
308                size,
309                priority,
310                node: entry,
311            };
312
313            self.map.insert(key, metadata);
314
315            if self.len() == 1 || priority < self.min_priority {
316                self.min_priority = priority;
317            }
318
319            self.metrics.core.record_insertion(object_size);
320            self.metrics
321                .record_item_cached(size, self.metrics.average_item_size());
322            self.metrics.record_item_access(1, size, priority);
323
324            None
325        } else {
326            None
327        }
328    }
329
330    fn evict_one(&mut self) {
331        if self.is_empty() {
332            return;
333        }
334
335        let min_priority_key = *self.priority_lists.keys().next().unwrap();
336        let list = self.priority_lists.get_mut(&min_priority_key).unwrap();
337
338        if let Some(entry) = list.remove_last() {
339            unsafe {
340                // SAFETY: entry comes from list.remove_last()
341                let entry_ptr = Box::into_raw(entry);
342                let (entry_key, _entry_value) = (*entry_ptr).get_value();
343
344                let priority_to_update = if let Some(metadata) = self.map.get(entry_key) {
345                    metadata.priority
346                } else {
347                    self.global_age
348                };
349
350                let estimated_size = mem::size_of::<K>() as u64 + mem::size_of::<V>() as u64 + 64;
351
352                self.metrics.core.record_eviction(estimated_size);
353                self.metrics.record_size_based_eviction();
354                self.metrics.record_aging_event(priority_to_update);
355
356                self.global_age = priority_to_update;
357                self.map.remove(entry_key);
358
359                let _ = Box::from_raw(entry_ptr);
360            }
361        }
362
363        if list.is_empty() {
364            self.priority_lists.remove(&min_priority_key);
365        }
366    }
367
368    pub(crate) fn pop<Q>(&mut self, key: &Q) -> Option<V>
369    where
370        K: Borrow<Q>,
371        Q: ?Sized + Hash + Eq,
372    {
373        if let Some(metadata) = self.map.remove(key) {
374            unsafe {
375                // SAFETY: metadata.node comes from our map
376                let priority_key = (metadata.priority * 1000.0) as u64;
377                let list = self.priority_lists.get_mut(&priority_key).unwrap();
378                let entry = list.remove(metadata.node).unwrap();
379
380                if list.is_empty() {
381                    self.priority_lists.remove(&priority_key);
382                }
383
384                let entry_ptr = Box::into_raw(entry);
385                let (_, value) = (*entry_ptr).get_value();
386                let result = value.clone();
387                let _ = Box::from_raw(entry_ptr);
388
389                Some(result)
390            }
391        } else {
392            None
393        }
394    }
395
396    pub(crate) fn clear(&mut self) {
397        self.map.clear();
398        self.priority_lists.clear();
399        self.global_age = 0.0;
400        self.min_priority = 0.0;
401    }
402}
403
404impl<K, V, S> core::fmt::Debug for GdsfSegment<K, V, S> {
405    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
406        f.debug_struct("GdsfSegment")
407            .field("capacity", &self.config.capacity())
408            .field("len", &self.map.len())
409            .field("global_age", &self.global_age)
410            .finish()
411    }
412}
413
414/// An implementation of a Greedy Dual-Size Frequency (GDSF) cache.
415#[derive(Debug)]
416pub struct GdsfCache<K, V, S = DefaultHashBuilder> {
417    segment: GdsfSegment<K, V, S>,
418}
419
420impl<K: Hash + Eq, V: Clone, S: BuildHasher> GdsfCache<K, V, S> {
421    pub fn with_hasher(cap: NonZeroUsize, hash_builder: S) -> Self {
422        Self {
423            segment: GdsfSegment::with_hasher(cap, hash_builder),
424        }
425    }
426
427    #[inline]
428    pub fn cap(&self) -> NonZeroUsize {
429        self.segment.cap()
430    }
431
432    #[inline]
433    pub fn len(&self) -> usize {
434        self.segment.len()
435    }
436
437    #[inline]
438    pub fn is_empty(&self) -> bool {
439        self.segment.is_empty()
440    }
441
442    #[inline]
443    pub fn global_age(&self) -> f64 {
444        self.segment.global_age()
445    }
446
447    #[inline]
448    pub fn record_miss(&mut self, object_size: u64) {
449        self.segment.record_miss(object_size);
450    }
451
452    #[inline]
453    pub fn get<Q>(&mut self, key: &Q) -> Option<V>
454    where
455        K: Borrow<Q> + Clone,
456        Q: ?Sized + Hash + Eq,
457    {
458        self.segment.get(key)
459    }
460
461    #[inline]
462    pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
463    where
464        K: Borrow<Q> + Clone,
465        Q: ?Sized + Hash + Eq,
466    {
467        self.segment.get_mut(key)
468    }
469
470    #[inline]
471    pub fn contains_key<Q>(&self, key: &Q) -> bool
472    where
473        K: Borrow<Q>,
474        Q: ?Sized + Hash + Eq,
475    {
476        self.segment.contains_key(key)
477    }
478
479    #[inline]
480    pub fn put(&mut self, key: K, val: V, size: u64) -> Option<V>
481    where
482        K: Clone,
483    {
484        self.segment.put(key, val, size)
485    }
486
487    #[inline]
488    pub fn pop<Q>(&mut self, key: &Q) -> Option<V>
489    where
490        K: Borrow<Q>,
491        Q: ?Sized + Hash + Eq,
492    {
493        self.segment.pop(key)
494    }
495
496    #[inline]
497    pub fn clear(&mut self) {
498        self.segment.clear()
499    }
500}
501
502impl<K: Hash + Eq, V: Clone, S: BuildHasher> CacheMetrics for GdsfCache<K, V, S> {
503    fn metrics(&self) -> BTreeMap<String, f64> {
504        self.segment.metrics().metrics()
505    }
506
507    fn algorithm_name(&self) -> &'static str {
508        self.segment.metrics().algorithm_name()
509    }
510}
511
512impl<K: Hash + Eq, V: Clone> GdsfCache<K, V, DefaultHashBuilder> {
513    pub fn new(cap: NonZeroUsize) -> Self {
514        let config = GdsfCacheConfig::new(cap);
515        Self::with_hasher(config.capacity(), DefaultHashBuilder::default())
516    }
517}
518
519#[cfg(test)]
520mod tests {
521    use super::*;
522    use core::num::NonZeroUsize;
523
524    #[test]
525    fn test_gdsf_basic_operations() {
526        let mut cache = GdsfCache::new(NonZeroUsize::new(3).unwrap());
527
528        assert_eq!(cache.put("a", 1, 1), None);
529        assert_eq!(cache.put("b", 2, 2), None);
530        assert_eq!(cache.put("c", 3, 1), None);
531        assert_eq!(cache.len(), 3);
532
533        assert_eq!(cache.get(&"a"), Some(1));
534        assert_eq!(cache.get(&"b"), Some(2));
535        assert_eq!(cache.get(&"c"), Some(3));
536
537        assert!(cache.contains_key(&"a"));
538        assert!(!cache.contains_key(&"d"));
539    }
540
541    #[test]
542    fn test_gdsf_frequency_priority() {
543        let mut cache = GdsfCache::new(NonZeroUsize::new(2).unwrap());
544
545        cache.put("a", 1, 1);
546        cache.put("b", 2, 1);
547
548        cache.get(&"a");
549        cache.get(&"a");
550
551        cache.put("c", 3, 1);
552
553        assert!(cache.contains_key(&"a"));
554        assert!(!cache.contains_key(&"b"));
555        assert!(cache.contains_key(&"c"));
556    }
557
558    #[test]
559    fn test_gdsf_size_consideration() {
560        let mut cache = GdsfCache::new(NonZeroUsize::new(2).unwrap());
561
562        cache.put("small", 1, 1);
563        cache.put("large", 2, 10);
564
565        cache.put("medium", 3, 5);
566
567        assert!(cache.contains_key(&"small"));
568        assert!(!cache.contains_key(&"large"));
569        assert!(cache.contains_key(&"medium"));
570    }
571
572    #[test]
573    fn test_gdsf_update_existing() {
574        let mut cache = GdsfCache::new(NonZeroUsize::new(2).unwrap());
575
576        cache.put("key", 1, 1);
577        assert_eq!(cache.get(&"key"), Some(1));
578
579        assert_eq!(cache.put("key", 2, 2), Some(1));
580        assert_eq!(cache.get(&"key"), Some(2));
581        assert_eq!(cache.len(), 1);
582    }
583
584    #[test]
585    fn test_gdsf_zero_size_rejection() {
586        let mut cache = GdsfCache::new(NonZeroUsize::new(2).unwrap());
587
588        assert_eq!(cache.put("key", 1, 0), None);
589        assert_eq!(cache.len(), 0);
590        assert!(!cache.contains_key(&"key"));
591    }
592
593    #[test]
594    fn test_gdsf_pop() {
595        let mut cache = GdsfCache::new(NonZeroUsize::new(2).unwrap());
596
597        cache.put("a", 1, 1);
598        cache.put("b", 2, 1);
599
600        assert_eq!(cache.pop(&"a"), Some(1));
601        assert_eq!(cache.len(), 1);
602        assert!(!cache.contains_key(&"a"));
603        assert!(cache.contains_key(&"b"));
604
605        assert_eq!(cache.pop(&"nonexistent"), None);
606    }
607
608    #[test]
609    fn test_gdsf_clear() {
610        let mut cache = GdsfCache::new(NonZeroUsize::new(2).unwrap());
611
612        cache.put("a", 1, 1);
613        cache.put("b", 2, 1);
614        assert_eq!(cache.len(), 2);
615
616        cache.clear();
617        assert_eq!(cache.len(), 0);
618        assert!(cache.is_empty());
619        assert!(!cache.contains_key(&"a"));
620        assert!(!cache.contains_key(&"b"));
621    }
622
623    #[test]
624    fn test_gdsf_global_aging() {
625        let mut cache = GdsfCache::new(NonZeroUsize::new(2).unwrap());
626
627        cache.put("a", 1, 1);
628        cache.put("b", 2, 1);
629
630        let initial_age = cache.global_age();
631
632        cache.put("c", 3, 1);
633
634        assert!(cache.global_age() > initial_age);
635    }
636
637    #[test]
638    fn test_miri_stacked_borrows_fix() {
639        let mut cache = GdsfCache::new(NonZeroUsize::new(10).unwrap());
640
641        cache.put("a", 1, 10);
642        cache.put("b", 2, 20);
643        cache.put("c", 3, 15);
644
645        for _ in 0..3 {
646            assert_eq!(cache.get(&"a"), Some(1));
647            assert_eq!(cache.get(&"b"), Some(2));
648            assert_eq!(cache.get(&"c"), Some(3));
649        }
650
651        assert_eq!(cache.len(), 3);
652
653        if let Some(val) = cache.get_mut(&"a") {
654            *val += 10;
655        }
656        assert_eq!(cache.get(&"a"), Some(11));
657    }
658
659    #[test]
660    fn test_gdsf_segment_directly() {
661        let mut segment: GdsfSegment<&str, i32, DefaultHashBuilder> =
662            GdsfSegment::with_hasher(NonZeroUsize::new(2).unwrap(), DefaultHashBuilder::default());
663        assert_eq!(segment.len(), 0);
664        assert!(segment.is_empty());
665        assert_eq!(segment.cap().get(), 2);
666        segment.put("a", 1, 1);
667        segment.put("b", 2, 2);
668        assert_eq!(segment.len(), 2);
669        assert_eq!(segment.get(&"a"), Some(1));
670        assert_eq!(segment.get(&"b"), Some(2));
671    }
672
673    #[test]
674    fn test_gdsf_concurrent_access() {
675        extern crate std;
676        use std::sync::{Arc, Mutex};
677        use std::thread;
678        use std::vec::Vec;
679
680        let cache = Arc::new(Mutex::new(GdsfCache::new(NonZeroUsize::new(100).unwrap())));
681        let num_threads = 4;
682        let ops_per_thread = 100;
683
684        let mut handles: Vec<std::thread::JoinHandle<()>> = Vec::new();
685
686        for t in 0..num_threads {
687            let cache = Arc::clone(&cache);
688            handles.push(thread::spawn(move || {
689                for i in 0..ops_per_thread {
690                    let key = std::format!("key_{}_{}", t, i);
691                    let size = ((i % 10) + 1) as u64; // Varying sizes 1-10
692                    let mut guard = cache.lock().unwrap();
693                    guard.put(key.clone(), i, size);
694                    let _ = guard.get(&key);
695                }
696            }));
697        }
698
699        for handle in handles {
700            handle.join().unwrap();
701        }
702
703        let mut guard = cache.lock().unwrap();
704        assert!(guard.len() <= 100);
705        guard.clear(); // Clean up for MIRI
706    }
707}