Skip to main content

cache_mod/
tinylfu.rs

1//! TinyLFU cache — Count-Min Sketch frequency estimator + admission filter
2//! on top of an arena-backed LRU main cache.
3
4use core::hash::{Hash, Hasher};
5use std::collections::hash_map::DefaultHasher;
6use std::collections::HashMap;
7use std::num::NonZeroUsize;
8use std::sync::Mutex;
9
10use crate::cache::Cache;
11use crate::error::CacheError;
12use crate::util::MutexExt;
13
14/// Count-Min Sketch depth (number of independent hash rows).
15const SKETCH_DEPTH: usize = 4;
16
17/// Width floor — the sketch is never narrower than this, regardless of
18/// configured capacity. Guards small-capacity caches from degenerate
19/// collision rates.
20const MIN_SKETCH_WIDTH: usize = 64;
21
22/// A bounded, thread-safe cache with **admission control**.
23///
24/// `TinyLfuCache` tracks the access frequency of *every* key it observes —
25/// including keys that aren't (yet) in the cache — using a fixed-size
26/// [Count-Min Sketch][cms]. On capacity overflow, an incoming key is
27/// **admitted only if its estimated frequency exceeds the LRU victim's**.
28/// One-hit-wonders are rejected at the door instead of evicting hot entries.
29///
30/// This is a deliberate semantic deviation from `LruCache` / `LfuCache` /
31/// `TtlCache`: a successful [`insert`](Cache::insert) call **does not
32/// guarantee** that the value is in the cache. The admission filter may
33/// have rejected it. Callers that need strict insertion guarantees should
34/// use `LruCache` or `LfuCache` instead.
35///
36/// 0.6.0 implementation:
37///
38/// - main cache is arena-backed (O(1) promote, O(1) evict — same shape as `LruCache`)
39/// - depth-4 Count-Min Sketch with `u8` saturating counters
40/// - width = `max(MIN_SKETCH_WIDTH, 2 × capacity)`, rounded to the next power of two
41/// - periodic frequency decay: every `10 × capacity` increments, every counter
42///   is right-shifted by 1 (W-TinyLFU "aging" step, keeps the sketch responsive
43///   to shifting workloads)
44/// - main cache uses LRU ordering; eviction victim = least-recently-accessed
45/// - lock-minimized via `&self` + `Mutex<Inner>`; a sharded / lock-free variant
46///   lands in 0.7.0 without changing this public surface
47///
48/// [cms]: https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch
49///
50/// # Example
51///
52/// ```
53/// use cache_mod::{Cache, TinyLfuCache};
54///
55/// let cache: TinyLfuCache<&'static str, u32> =
56///     TinyLfuCache::new(4).expect("capacity > 0");
57///
58/// // Build up the frequency signal for "hot".
59/// for _ in 0..16 {
60///     let _ = cache.get(&"hot");
61///     let _ = cache.insert("hot", 1);
62/// }
63///
64/// // A subsequent insert will see "hot" as warm in the sketch.
65/// assert_eq!(cache.get(&"hot"), Some(1));
66/// ```
67pub struct TinyLfuCache<K, V> {
68    capacity: NonZeroUsize,
69    inner: Mutex<Inner<K, V>>,
70}
71
72struct Node<K, V> {
73    key: K,
74    value: V,
75    prev: Option<usize>,
76    next: Option<usize>,
77}
78
79struct Inner<K, V> {
80    nodes: Vec<Option<Node<K, V>>>,
81    free: Vec<usize>,
82    head: Option<usize>,
83    tail: Option<usize>,
84    map: HashMap<K, usize>,
85    sketch: CountMinSketch,
86}
87
88impl<K, V> Inner<K, V>
89where
90    K: Eq + Hash + Clone,
91{
92    fn with_capacity(cap: usize) -> Self {
93        Self {
94            nodes: Vec::with_capacity(cap),
95            free: Vec::new(),
96            head: None,
97            tail: None,
98            map: HashMap::with_capacity(cap),
99            sketch: CountMinSketch::new(cap),
100        }
101    }
102
103    fn alloc(&mut self, node: Node<K, V>) -> usize {
104        if let Some(idx) = self.free.pop() {
105            self.nodes[idx] = Some(node);
106            idx
107        } else {
108            self.nodes.push(Some(node));
109            self.nodes.len() - 1
110        }
111    }
112
113    fn dealloc(&mut self, idx: usize) -> Node<K, V> {
114        let node = self.nodes[idx]
115            .take()
116            .unwrap_or_else(|| unreachable!("arena slot must be occupied"));
117        self.free.push(idx);
118        node
119    }
120
121    fn unlink(&mut self, idx: usize) {
122        let (prev, next) = {
123            let n = self.nodes[idx]
124                .as_ref()
125                .unwrap_or_else(|| unreachable!("unlink target must be occupied"));
126            (n.prev, n.next)
127        };
128        match prev {
129            Some(p) => {
130                self.nodes[p]
131                    .as_mut()
132                    .unwrap_or_else(|| unreachable!())
133                    .next = next
134            }
135            None => self.head = next,
136        }
137        match next {
138            Some(n) => {
139                self.nodes[n]
140                    .as_mut()
141                    .unwrap_or_else(|| unreachable!())
142                    .prev = prev
143            }
144            None => self.tail = prev,
145        }
146        if let Some(n) = self.nodes[idx].as_mut() {
147            n.prev = None;
148            n.next = None;
149        }
150    }
151
152    fn push_front(&mut self, idx: usize) {
153        let old_head = self.head;
154        if let Some(n) = self.nodes[idx].as_mut() {
155            n.prev = None;
156            n.next = old_head;
157        }
158        if let Some(h) = old_head {
159            if let Some(n) = self.nodes[h].as_mut() {
160                n.prev = Some(idx);
161            }
162        } else {
163            self.tail = Some(idx);
164        }
165        self.head = Some(idx);
166    }
167
168    fn promote(&mut self, idx: usize) {
169        if self.head == Some(idx) {
170            return;
171        }
172        self.unlink(idx);
173        self.push_front(idx);
174    }
175}
176
177impl<K, V> TinyLfuCache<K, V>
178where
179    K: Eq + Hash + Clone,
180    V: Clone,
181{
182    /// Creates a cache with the given entry-count capacity.
183    ///
184    /// Returns [`CacheError::InvalidCapacity`] if `capacity == 0`.
185    ///
186    /// # Example
187    ///
188    /// ```
189    /// use cache_mod::TinyLfuCache;
190    ///
191    /// let cache: TinyLfuCache<String, u32> =
192    ///     TinyLfuCache::new(256).expect("capacity > 0");
193    /// ```
194    pub fn new(capacity: usize) -> Result<Self, CacheError> {
195        let cap = NonZeroUsize::new(capacity).ok_or(CacheError::InvalidCapacity)?;
196        Ok(Self::with_capacity(cap))
197    }
198
199    /// Creates a cache with the given non-zero capacity. Infallible.
200    ///
201    /// # Example
202    ///
203    /// ```
204    /// use std::num::NonZeroUsize;
205    /// use cache_mod::TinyLfuCache;
206    ///
207    /// let cap = NonZeroUsize::new(256).expect("256 != 0");
208    /// let cache: TinyLfuCache<String, u32> = TinyLfuCache::with_capacity(cap);
209    /// ```
210    pub fn with_capacity(capacity: NonZeroUsize) -> Self {
211        let cap = capacity.get();
212        Self {
213            capacity,
214            inner: Mutex::new(Inner::with_capacity(cap)),
215        }
216    }
217}
218
219impl<K, V> Cache<K, V> for TinyLfuCache<K, V>
220where
221    K: Eq + Hash + Clone,
222    V: Clone,
223{
224    fn get(&self, key: &K) -> Option<V> {
225        let mut inner = self.inner.lock_recover();
226        // Every observation feeds the sketch, even on cache miss.
227        inner.sketch.increment(key);
228        let idx = *inner.map.get(key)?;
229        inner.promote(idx);
230        inner.nodes[idx].as_ref().map(|n| n.value.clone())
231    }
232
233    fn insert(&self, key: K, value: V) -> Option<V> {
234        let mut inner = self.inner.lock_recover();
235        inner.sketch.increment(&key);
236
237        // Live update: existing key always succeeds (no admission check).
238        if let Some(&idx) = inner.map.get(&key) {
239            let old = inner.nodes[idx]
240                .as_mut()
241                .map(|n| core::mem::replace(&mut n.value, value))
242                .unwrap_or_else(|| unreachable!("mapped index must be occupied"));
243            inner.promote(idx);
244            return Some(old);
245        }
246
247        // New key. Admission filter kicks in at capacity.
248        if inner.map.len() >= self.capacity.get() {
249            let candidate_freq = inner.sketch.estimate(&key);
250            let tail_idx = inner.tail?;
251            let victim_key = inner.nodes[tail_idx]
252                .as_ref()
253                .map(|n| n.key.clone())
254                .unwrap_or_else(|| unreachable!("tail must be occupied"));
255            let victim_freq = inner.sketch.estimate(&victim_key);
256            if candidate_freq <= victim_freq {
257                // Reject — value dropped at function exit.
258                return None;
259            }
260            inner.unlink(tail_idx);
261            let _ = inner.dealloc(tail_idx);
262            let _ = inner.map.remove(&victim_key);
263        }
264
265        let idx = inner.alloc(Node {
266            key: key.clone(),
267            value,
268            prev: None,
269            next: None,
270        });
271        inner.push_front(idx);
272        let _ = inner.map.insert(key, idx);
273        None
274    }
275
276    fn remove(&self, key: &K) -> Option<V> {
277        let mut inner = self.inner.lock_recover();
278        let idx = inner.map.remove(key)?;
279        inner.unlink(idx);
280        let node = inner.dealloc(idx);
281        Some(node.value)
282    }
283
284    fn contains_key(&self, key: &K) -> bool {
285        self.inner.lock_recover().map.contains_key(key)
286    }
287
288    fn len(&self) -> usize {
289        self.inner.lock_recover().map.len()
290    }
291
292    fn clear(&self) {
293        let mut inner = self.inner.lock_recover();
294        inner.nodes.clear();
295        inner.free.clear();
296        inner.head = None;
297        inner.tail = None;
298        inner.map.clear();
299        inner.sketch.reset();
300    }
301
302    fn capacity(&self) -> usize {
303        self.capacity.get()
304    }
305}
306
307// -----------------------------------------------------------------------------
308// Count-Min Sketch
309// -----------------------------------------------------------------------------
310
311struct CountMinSketch {
312    counters: Vec<u8>,
313    width: usize,
314    width_u64: u64,
315    samples: u64,
316    sample_window: u64,
317}
318
319impl CountMinSketch {
320    fn new(capacity: usize) -> Self {
321        let mut width = capacity.saturating_mul(2).max(MIN_SKETCH_WIDTH);
322        width = width.next_power_of_two();
323        let sample_window = (capacity as u64).saturating_mul(10).max(64);
324        Self {
325            counters: vec![0; width.saturating_mul(SKETCH_DEPTH)],
326            width,
327            width_u64: width as u64,
328            samples: 0,
329            sample_window,
330        }
331    }
332
333    fn estimate<K: Hash>(&self, key: &K) -> u8 {
334        let mut min = u8::MAX;
335        for d in 0..SKETCH_DEPTH {
336            let idx = self.cell(d, key);
337            let observed = *self.counters.get(idx).unwrap_or(&0);
338            if observed < min {
339                min = observed;
340            }
341        }
342        min
343    }
344
345    fn increment<K: Hash>(&mut self, key: &K) {
346        for d in 0..SKETCH_DEPTH {
347            let idx = self.cell(d, key);
348            if let Some(slot) = self.counters.get_mut(idx) {
349                *slot = slot.saturating_add(1);
350            }
351        }
352        self.samples = self.samples.saturating_add(1);
353        if self.samples >= self.sample_window {
354            self.age();
355            self.samples = 0;
356        }
357    }
358
359    fn reset(&mut self) {
360        for c in self.counters.iter_mut() {
361            *c = 0;
362        }
363        self.samples = 0;
364    }
365
366    fn age(&mut self) {
367        for c in self.counters.iter_mut() {
368            *c >>= 1;
369        }
370    }
371
372    fn cell<K: Hash>(&self, d: usize, key: &K) -> usize {
373        let h = hash_with_seed(key, d as u64);
374        let col = (h % self.width_u64) as usize;
375        d.saturating_mul(self.width).saturating_add(col)
376    }
377}
378
379fn hash_with_seed<K: Hash>(key: &K, seed: u64) -> u64 {
380    let mut hasher = DefaultHasher::new();
381    seed.hash(&mut hasher);
382    key.hash(&mut hasher);
383    hasher.finish()
384}