Skip to main content

oxistore_cache/
arc.rs

1/// Full Adaptive Replacement Cache (ARC) implementation with TTL support.
2///
3/// Implements the algorithm from:
4///   Megiddo, N. & Modha, D.S. (2003). ARC: A Self-Tuning, Low Overhead
5///   Replacement Cache. Proceedings of FAST'03.
6///
7/// ## Data structures
8///
9/// Four `LinkedHashMap`s (all with back = MRU, front = LRU):
10/// - `t1`: recently seen exactly once (recency).
11/// - `t2`: seen at least twice (frequency).
12/// - `b1`: ghost entries for t1 (keys only, `()` values).
13/// - `b2`: ghost entries for t2 (keys only, `()` values).
14///
15/// `p` is the adaptive target size for `|t1|` (0..=cap).
16///
17/// TTL support is provided via lazy expiry: any access that encounters an
18/// expired entry removes it and treats it as a miss.
19use std::hash::Hash;
20
21use hashlink::LinkedHashMap;
22
23use crate::{Cache, CacheEntry};
24
25/// Adaptive Replacement Cache with configurable capacity and optional per-entry TTL.
26///
27/// # Type parameters
28///
29/// - `K`: key type -- must be `Eq + Hash + Clone`.
30/// - `V`: value type.
31pub struct ArcCache<K, V> {
32    /// Recently seen once (recency list).
33    t1: LinkedHashMap<K, CacheEntry<V>, ahash::RandomState>,
34    /// Seen at least twice (frequency list).
35    t2: LinkedHashMap<K, CacheEntry<V>, ahash::RandomState>,
36    /// Ghost keys evicted from t1.
37    b1: LinkedHashMap<K, (), ahash::RandomState>,
38    /// Ghost keys evicted from t2.
39    b2: LinkedHashMap<K, (), ahash::RandomState>,
40    /// Adaptive target for |t1| (0..=cap).
41    p: usize,
42    /// Total cache capacity (|t1| + |t2| <= cap).
43    cap: usize,
44}
45
46impl<K, V> ArcCache<K, V>
47where
48    K: Eq + Hash + Clone,
49{
50    /// Create a new `ArcCache` with the given capacity.
51    ///
52    /// Capacity must be at least 1; a capacity of 0 is treated as 1 internally.
53    #[must_use]
54    pub fn new(cap: usize) -> Self {
55        let cap = cap.max(1);
56        ArcCache {
57            t1: LinkedHashMap::with_hasher(ahash::RandomState::default()),
58            t2: LinkedHashMap::with_hasher(ahash::RandomState::default()),
59            b1: LinkedHashMap::with_hasher(ahash::RandomState::default()),
60            b2: LinkedHashMap::with_hasher(ahash::RandomState::default()),
61            p: 0,
62            cap,
63        }
64    }
65
66    /// Return the current adaptive target `p`.
67    #[must_use]
68    pub fn p(&self) -> usize {
69        self.p
70    }
71
72    /// Total live-data entries (|t1| + |t2|).
73    #[must_use]
74    pub fn len(&self) -> usize {
75        self.t1.len() + self.t2.len()
76    }
77
78    /// Return `true` if no live entries are cached.
79    #[must_use]
80    pub fn is_empty(&self) -> bool {
81        self.len() == 0
82    }
83
84    /// Cache capacity.
85    #[must_use]
86    pub fn cap(&self) -> usize {
87        self.cap
88    }
89
90    /// Total directory size: |t1| + |t2| + |b1| + |b2|.
91    fn directory_len(&self) -> usize {
92        self.t1.len() + self.t2.len() + self.b1.len() + self.b2.len()
93    }
94
95    /// Check if an entry is expired and remove it from the given list.
96    /// Returns `true` if the entry was expired and removed.
97    fn remove_if_expired_t1(&mut self, key: &K) -> bool {
98        let expired = self.t1.get(key).map(|e| e.is_expired()).unwrap_or(false);
99        if expired {
100            self.t1.remove(key);
101        }
102        expired
103    }
104
105    /// Check if an entry is expired and remove it from t2.
106    /// Returns `true` if the entry was expired and removed.
107    fn remove_if_expired_t2(&mut self, key: &K) -> bool {
108        let expired = self.t2.get(key).map(|e| e.is_expired()).unwrap_or(false);
109        if expired {
110            self.t2.remove(key);
111        }
112        expired
113    }
114
115    // -------------------------------------------------------------------------
116    // REPLACE sub-routine (Megiddo & Modha, FAST'03, Figure 4)
117    // -------------------------------------------------------------------------
118    fn replace(&mut self) {
119        let t1_len = self.t1.len();
120        let evict_from_t1 = t1_len >= self.p.max(1) && t1_len >= 1;
121
122        if evict_from_t1 {
123            if let Some((k, _v)) = self.t1.pop_front() {
124                if self.directory_len() >= 2 * self.cap {
125                    self.b1.pop_front();
126                }
127                self.b1.insert(k, ());
128            }
129        } else if !self.t2.is_empty() {
130            if let Some((k, _v)) = self.t2.pop_front() {
131                if self.directory_len() >= 2 * self.cap {
132                    self.b2.pop_front();
133                }
134                self.b2.insert(k, ());
135            }
136        } else if !self.t1.is_empty() {
137            if let Some((k, _v)) = self.t1.pop_front() {
138                if self.directory_len() >= 2 * self.cap {
139                    self.b1.pop_front();
140                }
141                self.b1.insert(k, ());
142            }
143        }
144    }
145
146    /// Insert an entry (with expiry) into the cache.
147    fn insert_entry(&mut self, key: K, entry: CacheEntry<V>) -> Option<V> {
148        // If key is already in t1 or t2, update value and promote.
149        if self.t1.contains_key(&key) {
150            let old = self.t1.remove(&key).expect("key confirmed in t1");
151            self.t2.insert(key, entry);
152            return Some(old.value);
153        }
154        if self.t2.contains_key(&key) {
155            return self.t2.insert(key, entry).map(|e| e.value);
156        }
157
158        // Ghost hit in b1: increase p (bias toward recency), evict if needed,
159        // then insert into MRU(t2).
160        if self.b1.remove(&key).is_some() {
161            let b1_len = self.b1.len().max(1);
162            let b2_len = self.b2.len();
163            let delta = (b2_len / b1_len).max(1);
164            self.p = (self.p + delta).min(self.cap);
165            // Ensure live count stays within cap before inserting.
166            if self.t1.len() + self.t2.len() >= self.cap {
167                self.replace();
168            }
169            self.t2.insert(key, entry);
170            return None;
171        }
172
173        // Ghost hit in b2: decrease p (bias toward frequency), evict if needed,
174        // then insert into MRU(t2).
175        if self.b2.remove(&key).is_some() {
176            let b1_len = self.b1.len();
177            let b2_len = self.b2.len().max(1);
178            let delta = (b1_len / b2_len).max(1);
179            self.p = self.p.saturating_sub(delta);
180            // Ensure live count stays within cap before inserting.
181            if self.t1.len() + self.t2.len() >= self.cap {
182                self.replace();
183            }
184            self.t2.insert(key, entry);
185            return None;
186        }
187
188        // Full miss -- enforce directory / cache limits before inserting to t1.
189        let live = self.t1.len() + self.t2.len();
190        if live < self.cap {
191            if self.directory_len() >= self.cap && self.directory_len() >= 2 * self.cap {
192                if !self.b2.is_empty() {
193                    self.b2.pop_front();
194                } else {
195                    self.b1.pop_front();
196                }
197            }
198        } else {
199            // Cache full -- need to evict.
200            if self.t1.len() + self.b1.len() == self.cap {
201                if !self.t1.is_empty() {
202                    self.replace();
203                } else {
204                    self.b1.pop_front();
205                }
206            } else {
207                if self.directory_len() >= 2 * self.cap {
208                    if !self.b2.is_empty() {
209                        self.b2.pop_front();
210                    } else if !self.b1.is_empty() {
211                        self.b1.pop_front();
212                    }
213                }
214                self.replace();
215            }
216        }
217
218        self.t1.insert(key, entry);
219        None
220    }
221
222    /// Look up `key` and return a reference to its value, promoting as needed.
223    ///
224    /// Expired entries are lazily removed and treated as misses.
225    pub fn get(&mut self, key: &K) -> Option<&V> {
226        // Case 1a: hit in t1 -- check expiry, then promote to MRU(t2).
227        if self.t1.contains_key(key) {
228            if self.remove_if_expired_t1(key) {
229                return None;
230            }
231            let (k, v) = self.t1.remove_entry(key).expect("key in t1");
232            self.t2.insert(k, v);
233            return self.t2.get(key).map(|e| &e.value);
234        }
235
236        // Case 1b: hit in t2 -- check expiry, then move to MRU position.
237        if self.t2.contains_key(key) {
238            if self.remove_if_expired_t2(key) {
239                return None;
240            }
241            if let Some(v) = self.t2.remove(key) {
242                self.t2.insert(key.clone(), v);
243            }
244            return self.t2.get(key).map(|e| &e.value);
245        }
246
247        // Case 2: ghost hit in b1 -- adapt p upward.
248        if self.b1.contains_key(key) {
249            let b1_len = self.b1.len().max(1);
250            let b2_len = self.b2.len();
251            let delta = (b2_len / b1_len).max(1);
252            self.p = (self.p + delta).min(self.cap);
253            self.replace();
254            return None;
255        }
256
257        // Case 3: ghost hit in b2 -- adapt p downward.
258        if self.b2.contains_key(key) {
259            let b1_len = self.b1.len();
260            let b2_len = self.b2.len().max(1);
261            let delta = (b1_len / b2_len).max(1);
262            self.p = self.p.saturating_sub(delta);
263            self.replace();
264            return None;
265        }
266
267        // Case 4: complete miss.
268        None
269    }
270
271    /// Insert `key` -> `value` into the cache (no TTL).
272    pub fn put(&mut self, key: K, value: V) -> Option<V> {
273        self.insert_entry(key, CacheEntry::new(value))
274    }
275
276    /// Insert `key` -> `value` into the cache with a TTL.
277    pub fn put_with_ttl(&mut self, key: K, value: V, ttl: std::time::Duration) -> Option<V> {
278        self.insert_entry(key, CacheEntry::with_ttl(value, ttl))
279    }
280
281    /// Read a value without promoting it.
282    ///
283    /// Checks t1 and t2 but does NOT move entries or trigger ghost adaptation.
284    /// Expired entries return `None` (removal happens on next mutable access).
285    #[must_use]
286    pub fn peek(&self, key: &K) -> Option<&V> {
287        let from_t1 = self
288            .t1
289            .get(key)
290            .and_then(|e| if e.is_expired() { None } else { Some(&e.value) });
291        if from_t1.is_some() {
292            return from_t1;
293        }
294        self.t2
295            .get(key)
296            .and_then(|e| if e.is_expired() { None } else { Some(&e.value) })
297    }
298
299    /// Return `true` if `key` is present in t1 or t2 and not expired (without promotion).
300    #[must_use]
301    pub fn contains_key(&self, key: &K) -> bool {
302        let in_t1 = self.t1.get(key).map(|e| !e.is_expired()).unwrap_or(false);
303        let in_t2 = self.t2.get(key).map(|e| !e.is_expired()).unwrap_or(false);
304        in_t1 || in_t2
305    }
306
307    /// Remove the entry for `key` from the cache.
308    ///
309    /// Returns the value if it was present in t1 or t2.  Also removes any
310    /// ghost entry from b1/b2.
311    pub fn remove(&mut self, key: &K) -> Option<V> {
312        if let Some(e) = self.t1.remove(key) {
313            return Some(e.value);
314        }
315        if let Some(e) = self.t2.remove(key) {
316            return Some(e.value);
317        }
318        // Also clear ghost entries to prevent stale ghost hits.
319        self.b1.remove(key);
320        self.b2.remove(key);
321        None
322    }
323
324    /// Remove all entries from the cache (live + ghost).
325    pub fn clear(&mut self) {
326        self.t1.clear();
327        self.t2.clear();
328        self.b1.clear();
329        self.b2.clear();
330        self.p = 0;
331    }
332
333    /// Dynamically resize the cache capacity.
334    ///
335    /// If `new_cap` is smaller than the current live count, entries are
336    /// evicted via the REPLACE routine until the count fits.
337    pub fn resize(&mut self, new_cap: usize) {
338        let new_cap = new_cap.max(1);
339        self.cap = new_cap;
340        // If p exceeds the new cap, clamp it.
341        if self.p > self.cap {
342            self.p = self.cap;
343        }
344        // Evict until live count fits.
345        while self.len() > self.cap {
346            self.replace();
347        }
348    }
349}
350
351impl<K: std::fmt::Debug, V> std::fmt::Debug for ArcCache<K, V> {
352    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
353        f.debug_struct("ArcCache")
354            .field("cap", &self.cap)
355            .field("p", &self.p)
356            .field("t1_len", &self.t1.len())
357            .field("t2_len", &self.t2.len())
358            .field("b1_len", &self.b1.len())
359            .field("b2_len", &self.b2.len())
360            .finish_non_exhaustive()
361    }
362}
363
364impl<K, V> Cache<K, V> for ArcCache<K, V>
365where
366    K: Eq + Hash + Clone,
367{
368    fn get(&mut self, key: &K) -> Option<&V> {
369        ArcCache::get(self, key)
370    }
371
372    fn put(&mut self, key: K, value: V) -> Option<V> {
373        ArcCache::put(self, key, value)
374    }
375
376    fn put_with_ttl(&mut self, key: K, value: V, ttl: std::time::Duration) -> Option<V> {
377        ArcCache::put_with_ttl(self, key, value, ttl)
378    }
379
380    fn len(&self) -> usize {
381        self.len()
382    }
383
384    fn cap(&self) -> usize {
385        self.cap
386    }
387
388    fn remove(&mut self, key: &K) -> Option<V> {
389        ArcCache::remove(self, key)
390    }
391
392    fn clear(&mut self) {
393        ArcCache::clear(self);
394    }
395
396    fn peek(&self, key: &K) -> Option<&V> {
397        ArcCache::peek(self, key)
398    }
399
400    fn contains_key(&self, key: &K) -> bool {
401        ArcCache::contains_key(self, key)
402    }
403
404    fn resize(&mut self, new_cap: usize) {
405        ArcCache::resize(self, new_cap);
406    }
407
408    fn values(&self) -> Vec<&V> {
409        let t1_vals = self
410            .t1
411            .values()
412            .filter(|e| !e.is_expired())
413            .map(|e| &e.value);
414        let t2_vals = self
415            .t2
416            .values()
417            .filter(|e| !e.is_expired())
418            .map(|e| &e.value);
419        t1_vals.chain(t2_vals).collect()
420    }
421}