Skip to main content

oxistore_cache/
tinylfu.rs

1/// Window TinyLFU (W-TinyLFU) cache implementation.
2///
3/// Implements the algorithm described in:
4///   Einziger, G. & Friedman, R. (2017). TinyLFU: A Highly Efficient Cache
5///   Admission Policy. ACM Trans. Storage 13(4).
6///
7/// ## Architecture
8///
9/// The cache is partitioned into two regions:
10///
11/// - **Window** (~1% of total capacity): a small LRU buffer.  New items enter
12///   here; popular items are promoted to the main space.
13/// - **Main** (~99% of total capacity): split between:
14///   - **Protected** (80% of main): hot items that have been accessed at least
15///     twice and promoted from probation.
16///   - **Probation** (remaining main): items evicted from the window that
17///     survived the admission gate.
18///
19/// ## Frequency Estimation
20///
21/// A [`CountMinSketch`] with a [`Doorkeeper`] bloom filter tracks access
22/// frequency.  The doorkeeper prevents one-hit wonders from inflating counters.
23use std::collections::HashMap;
24use std::hash::{Hash, Hasher};
25
26use hashlink::LinkedHashMap;
27
28use crate::sketch::{CountMinSketch, Doorkeeper};
29use crate::{Cache, CacheEntry};
30
31// ---------------------------------------------------------------------------
32// Key → bytes helper for the sketch
33// ---------------------------------------------------------------------------
34
35/// A `Hasher` that accumulates bytes via FNV-1a for use as sketch input.
36struct KeyHasher {
37    state: u64,
38    seed: u64,
39}
40
41impl KeyHasher {
42    fn new(seed: u64) -> Self {
43        KeyHasher {
44            state: 0xcbf29ce484222325u64 ^ seed,
45            seed,
46        }
47    }
48}
49
50impl Hasher for KeyHasher {
51    fn write(&mut self, bytes: &[u8]) {
52        for &b in bytes {
53            self.state ^= b as u64;
54            self.state = self.state.wrapping_mul(0x100000001b3);
55        }
56    }
57
58    fn finish(&self) -> u64 {
59        self.state ^ self.seed
60    }
61}
62
63/// Hash a key to a 16-byte representation for the sketch.
64fn key_bytes<K: Hash>(key: &K) -> [u8; 16] {
65    let mut h1 = KeyHasher::new(0xcbf29ce484222325);
66    key.hash(&mut h1);
67    let a = h1.finish();
68
69    let mut h2 = KeyHasher::new(0x6c62272e07bb0142);
70    key.hash(&mut h2);
71    let b = h2.finish();
72
73    let mut buf = [0u8; 16];
74    buf[..8].copy_from_slice(&a.to_le_bytes());
75    buf[8..].copy_from_slice(&b.to_le_bytes());
76    buf
77}
78
79// ---------------------------------------------------------------------------
80// XOR-shift RNG for tie-breaking
81// ---------------------------------------------------------------------------
82
83fn xorshift64(x: u64) -> u64 {
84    let x = x ^ (x << 13);
85    let x = x ^ (x >> 7);
86    x ^ (x << 17)
87}
88
89// ---------------------------------------------------------------------------
90// Cache segment tag
91// ---------------------------------------------------------------------------
92
93/// Identifies which segment a key currently resides in.
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95enum CacheSegment {
96    Window,
97    Probation,
98    Protected,
99}
100
101// ---------------------------------------------------------------------------
102// WTinyLfuCache
103// ---------------------------------------------------------------------------
104
105/// Window TinyLFU cache with near-optimal hit rates on skewed workloads.
106///
107/// # Capacity split
108///
109/// ```text
110/// window_cap    = max(1, total_cap / 100)
111/// main_cap      = total_cap - window_cap
112/// protected_cap = main_cap * 4 / 5
113/// ```
114///
115/// # Type parameters
116///
117/// - `K`: key type — must be `Eq + Hash + Clone`.
118/// - `V`: value type.
119pub struct WTinyLfuCache<K, V> {
120    total_cap: usize,
121    window_cap: usize,
122    main_cap: usize,
123    protected_cap: usize,
124
125    /// Window LRU segment.  Back = MRU, front = LRU (next candidate for eviction).
126    window: LinkedHashMap<K, CacheEntry<V>, ahash::RandomState>,
127    /// Protected segment (hot items).  Back = MRU, front = LRU.
128    protected: LinkedHashMap<K, CacheEntry<V>, ahash::RandomState>,
129    /// Probation segment.  Back = MRU, front = LRU (eviction candidate).
130    probation: LinkedHashMap<K, CacheEntry<V>, ahash::RandomState>,
131
132    /// Presence/routing index: maps each key to its current segment.
133    segment_index: HashMap<K, CacheSegment, ahash::RandomState>,
134
135    /// Count-Min Sketch for frequency estimation.
136    sketch: CountMinSketch,
137    /// Doorkeeper bloom filter.
138    doorkeeper: Doorkeeper,
139
140    /// XOR-shift RNG state for admission tie-breaking.
141    rng: u64,
142}
143
144impl<K, V> WTinyLfuCache<K, V>
145where
146    K: Eq + Hash + Clone,
147{
148    /// Create a new `WTinyLfuCache` with the given total capacity.
149    ///
150    /// Minimum capacity is 2.
151    #[must_use]
152    pub fn new(total_cap: usize) -> Self {
153        let total_cap = total_cap.max(2);
154        let window_cap = (total_cap / 100).max(1);
155        let main_cap = total_cap - window_cap;
156        let protected_cap = main_cap * 4 / 5;
157        let rng = total_cap as u64 ^ 0xdeadbeef_cafebabe;
158
159        WTinyLfuCache {
160            total_cap,
161            window_cap,
162            main_cap,
163            protected_cap,
164            window: LinkedHashMap::with_hasher(ahash::RandomState::default()),
165            protected: LinkedHashMap::with_hasher(ahash::RandomState::default()),
166            probation: LinkedHashMap::with_hasher(ahash::RandomState::default()),
167            segment_index: HashMap::with_hasher(ahash::RandomState::default()),
168            sketch: CountMinSketch::new(total_cap),
169            doorkeeper: Doorkeeper::new(total_cap),
170            rng,
171        }
172    }
173
174    // -----------------------------------------------------------------------
175    // Sketch helpers
176    // -----------------------------------------------------------------------
177
178    /// Record an access in the doorkeeper + sketch.
179    fn record_access(&mut self, key: &K) {
180        let kb = key_bytes(key);
181        let seen_before = self.doorkeeper.put(&kb);
182        if seen_before {
183            self.sketch.increment(&kb);
184        }
185    }
186
187    /// Estimated access frequency for `key`.
188    fn freq(&self, key: &K) -> u64 {
189        let kb = key_bytes(key);
190        self.sketch.estimate(&kb)
191    }
192
193    // -----------------------------------------------------------------------
194    // TTL helpers
195    // -----------------------------------------------------------------------
196
197    /// Lazily remove `key` if its entry has expired.  Returns `true` if removed.
198    fn remove_if_expired(&mut self, key: &K) -> bool {
199        let seg = match self.segment_index.get(key).copied() {
200            Some(s) => s,
201            None => return false,
202        };
203
204        let expired = match seg {
205            CacheSegment::Window => self
206                .window
207                .get(key)
208                .map(|e| e.is_expired())
209                .unwrap_or(false),
210            CacheSegment::Probation => self
211                .probation
212                .get(key)
213                .map(|e| e.is_expired())
214                .unwrap_or(false),
215            CacheSegment::Protected => self
216                .protected
217                .get(key)
218                .map(|e| e.is_expired())
219                .unwrap_or(false),
220        };
221
222        if expired {
223            self.segment_index.remove(key);
224            match seg {
225                CacheSegment::Window => {
226                    self.window.remove(key);
227                }
228                CacheSegment::Probation => {
229                    self.probation.remove(key);
230                }
231                CacheSegment::Protected => {
232                    self.protected.remove(key);
233                }
234            }
235        }
236        expired
237    }
238
239    // -----------------------------------------------------------------------
240    // Segment promotion helpers
241    // -----------------------------------------------------------------------
242
243    /// Move `key` to the MRU tail of the window segment.
244    fn promote_in_window(&mut self, key: &K) {
245        if let Some(entry) = self.window.remove(key) {
246            self.window.insert(key.clone(), entry);
247        }
248    }
249
250    /// Move `key` to the MRU tail of the protected segment.
251    fn promote_in_protected(&mut self, key: &K) {
252        if let Some(entry) = self.protected.remove(key) {
253            self.protected.insert(key.clone(), entry);
254        }
255    }
256
257    /// Move `key` from probation → protected (MRU tail).
258    /// If protected is now over capacity, demote its LRU front to probation MRU tail.
259    fn promote_probation_to_protected(&mut self, key: &K) {
260        if let Some(entry) = self.probation.remove(key) {
261            self.segment_index
262                .insert(key.clone(), CacheSegment::Protected);
263            self.protected.insert(key.clone(), entry);
264
265            // Demote protected tail to probation if over capacity.
266            if self.protected.len() > self.protected_cap {
267                if let Some((demoted_key, demoted_entry)) = self.protected.pop_front() {
268                    self.segment_index
269                        .insert(demoted_key.clone(), CacheSegment::Probation);
270                    self.probation.insert(demoted_key, demoted_entry);
271                }
272            }
273        }
274    }
275
276    // -----------------------------------------------------------------------
277    // Window eviction + admission gate
278    // -----------------------------------------------------------------------
279
280    /// Evict the LRU item from the window, returning it (key + entry) if over cap.
281    fn evict_from_window_if_over_cap(&mut self) -> Option<(K, CacheEntry<V>)> {
282        if self.window.len() > self.window_cap {
283            if let Some((k, entry)) = self.window.pop_front() {
284                self.segment_index.remove(&k);
285                return Some((k, entry));
286            }
287        }
288        None
289    }
290
291    /// Run the admission gate for `window_candidate` (just evicted from window).
292    ///
293    /// If main space is not full, accept immediately (move to probation).
294    /// Otherwise compare frequencies; higher-frequency item stays; ties use RNG.
295    fn run_admission_gate(&mut self, candidate_key: K, candidate_entry: CacheEntry<V>) {
296        let main_size = self.probation.len() + self.protected.len();
297        if main_size < self.main_cap {
298            // Main not full — accept unconditionally.
299            self.segment_index
300                .insert(candidate_key.clone(), CacheSegment::Probation);
301            self.probation.insert(candidate_key, candidate_entry);
302            return;
303        }
304
305        // Probation tail = main victim.
306        let victim_key = match self.probation.front() {
307            Some((k, _)) => k.clone(),
308            None => {
309                // Probation empty (all in protected) — discard candidate.
310                return;
311            }
312        };
313
314        let candidate_freq = self.freq(&candidate_key);
315        let victim_freq = self.freq(&victim_key);
316
317        let admit = if candidate_freq > victim_freq {
318            true
319        } else if candidate_freq == victim_freq {
320            self.rng = xorshift64(self.rng);
321            (self.rng & 0xFF) < 128
322        } else {
323            false
324        };
325
326        if admit {
327            // Evict victim, admit candidate.
328            self.probation.pop_front();
329            self.segment_index.remove(&victim_key);
330
331            self.segment_index
332                .insert(candidate_key.clone(), CacheSegment::Probation);
333            self.probation.insert(candidate_key, candidate_entry);
334        }
335        // else: candidate is discarded (not inserted anywhere).
336    }
337
338    // -----------------------------------------------------------------------
339    // Core insert
340    // -----------------------------------------------------------------------
341
342    fn insert_entry(&mut self, key: K, entry: CacheEntry<V>) -> Option<V> {
343        // Update existing key in-place.
344        if let Some(seg) = self.segment_index.get(&key).copied() {
345            self.record_access(&key);
346            match seg {
347                CacheSegment::Window => {
348                    if let Some(existing) = self.window.get_mut(&key) {
349                        existing.expires_at = entry.expires_at;
350                        existing.value = entry.value;
351                    }
352                    self.promote_in_window(&key);
353                }
354                CacheSegment::Probation => {
355                    if let Some(existing) = self.probation.get_mut(&key) {
356                        existing.expires_at = entry.expires_at;
357                        existing.value = entry.value;
358                    }
359                    self.promote_probation_to_protected(&key);
360                }
361                CacheSegment::Protected => {
362                    if let Some(existing) = self.protected.get_mut(&key) {
363                        existing.expires_at = entry.expires_at;
364                        existing.value = entry.value;
365                    }
366                    self.promote_in_protected(&key);
367                }
368            }
369            return None;
370        }
371
372        // New key.
373        self.record_access(&key);
374        self.segment_index.insert(key.clone(), CacheSegment::Window);
375        self.window.insert(key.clone(), entry);
376
377        // Drain window if over cap (run admission gate).
378        if self.window.len() > self.window_cap {
379            if let Some((candidate_key, candidate_entry)) = self.evict_from_window_if_over_cap() {
380                self.run_admission_gate(candidate_key, candidate_entry);
381            }
382        }
383
384        None
385    }
386
387    // -----------------------------------------------------------------------
388    // Public API
389    // -----------------------------------------------------------------------
390
391    /// Look up `key`, updating recency and frequency.
392    ///
393    /// Expired entries are lazily removed and treated as misses.
394    pub fn get(&mut self, key: &K) -> Option<&V> {
395        if self.remove_if_expired(key) {
396            return None;
397        }
398
399        let seg = self.segment_index.get(key).copied()?;
400
401        self.record_access(key);
402
403        match seg {
404            CacheSegment::Window => {
405                self.promote_in_window(key);
406                self.window.get(key).map(|e| &e.value)
407            }
408            CacheSegment::Probation => {
409                // Promote from probation to protected.
410                self.promote_probation_to_protected(key);
411                self.protected.get(key).map(|e| &e.value)
412            }
413            CacheSegment::Protected => {
414                self.promote_in_protected(key);
415                self.protected.get(key).map(|e| &e.value)
416            }
417        }
418    }
419
420    /// Insert or update `key` -> `value` without TTL.
421    pub fn put(&mut self, key: K, value: V) -> Option<V> {
422        self.insert_entry(key, CacheEntry::new(value))
423    }
424
425    /// Insert or update `key` -> `value` with a TTL.
426    pub fn put_with_ttl(&mut self, key: K, value: V, ttl: std::time::Duration) -> Option<V> {
427        self.insert_entry(key, CacheEntry::with_ttl(value, ttl))
428    }
429
430    /// Total number of live entries.
431    #[must_use]
432    pub fn len(&self) -> usize {
433        self.window.len() + self.protected.len() + self.probation.len()
434    }
435
436    /// Return `true` if the cache holds no entries.
437    #[must_use]
438    pub fn is_empty(&self) -> bool {
439        self.len() == 0
440    }
441
442    /// Total capacity.
443    #[must_use]
444    pub fn cap(&self) -> usize {
445        self.total_cap
446    }
447
448    /// Read a value without updating recency or frequency.
449    ///
450    /// Returns `None` if absent or expired.
451    #[must_use]
452    pub fn peek(&self, key: &K) -> Option<&V> {
453        let seg = self.segment_index.get(key)?;
454        let entry = match seg {
455            CacheSegment::Window => self.window.get(key)?,
456            CacheSegment::Probation => self.probation.get(key)?,
457            CacheSegment::Protected => self.protected.get(key)?,
458        };
459        if entry.is_expired() {
460            None
461        } else {
462            Some(&entry.value)
463        }
464    }
465
466    /// Return `true` if `key` is present and not expired (no recency update).
467    #[must_use]
468    pub fn contains_key(&self, key: &K) -> bool {
469        let seg = match self.segment_index.get(key) {
470            Some(s) => s,
471            None => return false,
472        };
473        let entry = match seg {
474            CacheSegment::Window => self.window.get(key),
475            CacheSegment::Probation => self.probation.get(key),
476            CacheSegment::Protected => self.protected.get(key),
477        };
478        entry.map(|e| !e.is_expired()).unwrap_or(false)
479    }
480
481    /// Remove the entry for `key`, returning its value if present.
482    pub fn remove(&mut self, key: &K) -> Option<V> {
483        let seg = self.segment_index.remove(key)?;
484        let entry = match seg {
485            CacheSegment::Window => self.window.remove(key)?,
486            CacheSegment::Probation => self.probation.remove(key)?,
487            CacheSegment::Protected => self.protected.remove(key)?,
488        };
489        Some(entry.value)
490    }
491
492    /// Remove all entries from the cache.
493    pub fn clear(&mut self) {
494        self.window.clear();
495        self.protected.clear();
496        self.probation.clear();
497        self.segment_index.clear();
498        self.sketch.clear();
499        self.doorkeeper.clear();
500    }
501
502    /// Dynamically resize the cache capacity.
503    ///
504    /// Recomputes segment sizes and evicts LRU entries as needed.
505    pub fn resize(&mut self, new_cap: usize) {
506        let new_cap = new_cap.max(2);
507        self.total_cap = new_cap;
508        self.window_cap = (new_cap / 100).max(1);
509        self.main_cap = new_cap - self.window_cap;
510        self.protected_cap = self.main_cap * 4 / 5;
511
512        while self.window.len() > self.window_cap {
513            if let Some((k, _)) = self.window.pop_front() {
514                self.segment_index.remove(&k);
515            }
516        }
517        while self.protected.len() > self.protected_cap {
518            if let Some((k, _)) = self.protected.pop_front() {
519                self.segment_index.remove(&k);
520            }
521        }
522        let main_cap = self.main_cap;
523        while self.probation.len() + self.protected.len() > main_cap {
524            if let Some((k, _)) = self.probation.pop_front() {
525                self.segment_index.remove(&k);
526            }
527        }
528    }
529}
530
531impl<K, V> Cache<K, V> for WTinyLfuCache<K, V>
532where
533    K: Eq + Hash + Clone,
534{
535    fn get(&mut self, key: &K) -> Option<&V> {
536        WTinyLfuCache::get(self, key)
537    }
538
539    fn put(&mut self, key: K, value: V) -> Option<V> {
540        WTinyLfuCache::put(self, key, value)
541    }
542
543    fn put_with_ttl(&mut self, key: K, value: V, ttl: std::time::Duration) -> Option<V> {
544        WTinyLfuCache::put_with_ttl(self, key, value, ttl)
545    }
546
547    fn len(&self) -> usize {
548        WTinyLfuCache::len(self)
549    }
550
551    fn cap(&self) -> usize {
552        WTinyLfuCache::cap(self)
553    }
554
555    fn remove(&mut self, key: &K) -> Option<V> {
556        WTinyLfuCache::remove(self, key)
557    }
558
559    fn clear(&mut self) {
560        WTinyLfuCache::clear(self);
561    }
562
563    fn peek(&self, key: &K) -> Option<&V> {
564        WTinyLfuCache::peek(self, key)
565    }
566
567    fn contains_key(&self, key: &K) -> bool {
568        WTinyLfuCache::contains_key(self, key)
569    }
570
571    fn resize(&mut self, new_cap: usize) {
572        WTinyLfuCache::resize(self, new_cap);
573    }
574}