lfu_cache 1.3.0

A simple constant time LFU cache implementation
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
use std::collections::BTreeSet;
use std::hash::Hash;
use std::iter::FusedIterator;
use std::num::NonZeroUsize;
use std::rc::Rc;
use std::time::{Duration, Instant};

use crate::lfu::LfuCache;
use crate::{iter::LfuCacheIter, Entry};

/// A LFU cache with additional eviction conditions based on the time an entry
/// has been in cache.
///
/// Note that the performance of this cache for all operations that evict
/// expired entries is O(log n).
// This is re-exported at the crate root, so this lint can be safely ignored.
#[allow(clippy::module_name_repetitions)]
#[derive(Debug, Eq)]
pub struct TimedLfuCache<Key: Hash + Eq, Value> {
    cache: LfuCache<Key, Value>,
    expiration: Option<Duration>,
    expiry_set: BTreeSet<ExpirationSetEntry<Key>>,
}

impl<Key: Hash + Eq, Value: PartialEq> PartialEq for TimedLfuCache<Key, Value> {
    fn eq(&self, other: &Self) -> bool {
        // We don't need to compare the expiry_heap here since it's meaningless
        // to. Because we insert instants it's almost guaranteed that the
        // expiry_heap is never equal. As a result, it's easier (and more
        // more reasonable) to compare just the cache and expirations.
        self.cache.eq(&other.cache) && self.expiration == other.expiration
    }
}

unsafe impl<Key: Hash + Eq, Value> Send for TimedLfuCache<Key, Value> {}
unsafe impl<Key: Hash + Eq, Value> Sync for TimedLfuCache<Key, Value> {}

impl<Key: Hash + Eq, Value> TimedLfuCache<Key, Value> {
    /// Constructs an unbounded LFU cache with an time-to-live for its elements.
    /// Cache elements that have been in the cache longer than the provided
    /// duration are automatically evicted.
    #[inline]
    #[must_use]
    pub fn with_expiration(expiration: Duration) -> Self {
        Self::with_capacity_and_expiration(0, expiration)
    }

    /// Constructs a bounded LFU cache with an time-to-live for its elements.
    /// Cache elements that have been in the cache longer than the provided
    /// duration are automatically evicted.
    #[inline]
    #[must_use]
    pub fn with_capacity_and_expiration(capacity: usize, expiration: Duration) -> Self {
        Self {
            cache: LfuCache::with_capacity(capacity),
            expiration: Some(expiration),
            expiry_set: BTreeSet::new(),
        }
    }

    /// Sets the new capacity. If the provided capacity is zero, then this
    /// will turn the cache into an unbounded cache. If the new capacity is less
    /// than the current capacity, the least frequently used items are evicted
    /// until the number of items is equal to the capacity.
    ///
    /// Calling this method will automatically evict expired entries before
    /// inserting the value.
    ///
    /// If the cache becomes unbounded, then users must manually call
    /// [`Self::pop_lfu`] to remove the least frequently used item.
    #[inline]
    pub fn set_capacity(&mut self, new_capacity: usize) {
        self.evict_expired();
        self.cache.set_capacity(new_capacity);
    }

    /// Inserts a value into the cache using the provided key. If the value
    /// already exists, then the value is replaced with the provided value and
    /// the frequency is reset.
    ///
    /// Calling this method will automatically evict expired entries before
    /// inserting the value.
    ///
    /// The returned Option will return an evicted value, if a value needed to
    /// be evicted. This includes the old value, if the previously existing
    /// value was present under the same key.
    pub fn insert(&mut self, key: Key, value: Value) -> Option<Value> {
        self.evict_expired();
        let key_rc = Rc::new(key);
        self.expiry_set
            .insert(ExpirationSetEntry(Rc::clone(&key_rc), Instant::now()));
        self.cache.insert_rc(key_rc, value)
    }

    /// Gets a value and incrementing the internal frequency counter of that
    /// value, if it exists.
    ///
    /// Calling this method will automatically evict expired entries before
    /// looking up the value.
    pub fn get(&mut self, key: &Key) -> Option<&Value> {
        self.evict_expired();
        let (key, value) = self.cache.get_rc_key_value(key)?;
        let entry = ExpirationSetEntry(key, Instant::now());
        // Since ExpirationSetEntry's equality is based on the key itself,
        // replace will remove the old entry (and thus old time).
        self.expiry_set.replace(entry);
        Some(value)
    }

    /// Gets a mutable value and incrementing the internal frequency counter of
    /// that value, if it exists.
    ///
    /// Calling this method will automatically evict expired entries before
    /// looking up the value.
    pub fn get_mut(&mut self, key: &Key) -> Option<&mut Value> {
        self.evict_expired();
        let (key, value) = self.cache.get_rc_key_value_mut(key)?;
        let entry = ExpirationSetEntry(key, Instant::now());
        // Since ExpirationSetEntry's equality is based on the key itself,
        // replace will remove the old entry (and thus old time).
        self.expiry_set.replace(entry);
        Some(value)
    }

    /// Removes a value from the cache by key, if it exists.
    ///
    /// Calling this method will automatically evict expired entries before
    /// removing the value.
    #[inline]
    pub fn remove(&mut self, key: &Key) -> Option<Value> {
        self.evict_expired();
        self.cache.remove(key)
    }

    /// Gets the given key's corresponding entry in the LFU cache for in-place
    /// manipulation. If the key refers to an occupied entry, that entry then is
    /// immediately considered accessed, even if no reading or writing is
    /// performed. This behavior is a limitation of the Entry API.
    ///
    /// Calling this method method will automatically evict expired entries
    /// before returning the entry struct.
    #[inline]
    pub fn entry(&mut self, key: Key) -> Entry<'_, Key, Value> {
        self.evict_expired();
        self.cache.entry(key)
    }

    /// Evicts the least frequently used value and returns it. If the cache is
    /// empty, then this returns None. If there are multiple items that have an
    /// equal access count, then the most recently added value is evicted.
    ///
    /// Calling this method will automatically evict expired entries before
    /// returning the LFU item.
    #[inline]
    pub fn pop_lfu(&mut self) -> Option<Value> {
        self.evict_expired();
        self.cache.pop_lfu()
    }

    /// Evicts the least frequently used key-value pair and returns it. If the
    /// cache is empty, then this returns None. If there are multiple items that
    /// have an equal access count, then the most recently added key-value pair
    /// is evicted.
    ///
    /// Calling this method will automatically evict expired entries before
    /// returning the LFU item.
    #[inline]
    pub fn pop_lfu_key_value(&mut self) -> Option<(Key, Value)> {
        self.evict_expired();
        self.cache.pop_lfu_key_value()
    }

    /// Evicts the least frequently used value and returns it, the key it was
    /// inserted under, and the frequency it had. If the cache is empty, then
    /// this returns None. If there are multiple items that have an equal access
    /// count, then the most recently added key-value pair is evicted.
    ///
    /// Calling this method will automatically evict expired entries before
    /// returning the LFU item.
    #[inline]
    pub fn pop_lfu_key_value_frequency(&mut self) -> Option<(Key, Value, usize)> {
        self.evict_expired();
        self.cache.pop_lfu_key_value_frequency()
    }

    /// Clears the cache, returning the iterator of the previous cached values.
    #[inline]
    pub fn clear(&mut self) -> LfuCacheIter<Key, Value> {
        self.expiry_set.clear();
        self.cache.clear()
    }

    /// Peeks at the next value to be evicted, if there is one. This will not
    /// increment the access counter for that value, or evict expired entries.
    #[inline]
    #[must_use]
    pub fn peek_lfu(&self) -> Option<&Value> {
        self.cache.peek_lfu()
    }

    /// Peeks at the key for the next value to be evicted, if there is one. This
    /// will not increment the access counter for that value.
    #[inline]
    #[must_use]
    pub fn peek_lfu_key(&self) -> Option<&Key> {
        self.cache.peek_lfu_key()
    }

    /// Returns the current capacity of the cache.
    #[inline]
    #[must_use]
    pub const fn capacity(&self) -> Option<NonZeroUsize> {
        self.cache.capacity()
    }

    /// Returns the current number of items in the cache. This is a constant
    /// time operation. Calling this method does not evict expired items, so it
    /// is possible that this will include stale items. Consider calling
    /// [`Self::evict_expired`] if this is a concern.
    #[inline]
    #[must_use]
    pub const fn len(&self) -> usize {
        self.cache.len()
    }

    /// Returns if the cache contains no elements. Calling this method does not
    /// evict expired items, so it is possible that this will include stale
    /// items. Consider calling [`Self::evict_expired`] if this is a concern.
    #[inline]
    #[must_use]
    pub const fn is_empty(&self) -> bool {
        self.cache.is_empty()
    }

    /// Returns if the cache is unbounded.
    #[inline]
    #[must_use]
    pub const fn is_unbounded(&self) -> bool {
        self.cache.is_unbounded()
    }

    /// Returns the frequencies that this cache has. This is a linear time
    /// operation. Calling this method does not evict expired items, so it is
    /// possible that this will include stale items. Consider calling
    /// [`Self::evict_expired`] if this is a concern.
    #[inline]
    #[must_use]
    pub fn frequencies(&self) -> Vec<usize> {
        self.cache.frequencies()
    }

    /// Sets the capacity to the amount of objects currently in the cache. If
    /// no items were in the cache, the cache becomes unbounded. Note that this
    /// _will_ evict expired items before shrinking, so it is recommended to
    /// directly call [`Self::set_capacity`] for a more consistent size.
    #[inline]
    pub fn shrink_to_fit(&mut self) {
        self.evict_expired();
        self.set_capacity(self.len());
    }

    /// Returns an iterator over the keys of the LFU cache in any order. Note
    /// that this will not automatically evict expired items, so stale items
    /// may appear in the iterator. Consider calling [`Self::evict_expired`] if
    /// this is a concern.
    #[inline]
    pub fn keys(&self) -> impl Iterator<Item = &Key> + FusedIterator + '_ {
        self.cache.keys()
    }

    /// Returns an iterator over the values of the LFU cache in any order. Note
    /// that this does **not** increment the count for any of the values and
    /// will not automatically evict expired items, so stale items may appear
    /// in the iterator. Consider calling [`Self::evict_expired`] if this is a
    /// concern.
    #[inline]
    pub fn peek_values(&self) -> impl Iterator<Item = &Value> + FusedIterator + '_ {
        self.cache.peek_values()
    }

    /// Returns an iterator over the keys and values of the LFU cache in any
    /// order. Note that this does **not** increment the count for any of the
    /// values and will not automatically evict expired items, so stale items
    /// may appear in the iterator. Consider calling [`Self::evict_expired`]
    /// if this is a concern.
    #[inline]
    pub fn peek_iter(&self) -> impl Iterator<Item = (&Key, &Value)> + FusedIterator + '_ {
        self.cache.peek_iter()
    }

    /// Manually evict expired entires. Note that it is possible to have stale
    /// entries _after_ this method was called as it is possible for entries to
    /// have expired right after calling this function.
    pub fn evict_expired(&mut self) {
        if let Some(duration) = self.expiration {
            let cut_off = Instant::now() - duration;
            let mut break_next = false;
            let mut drain_key = None;
            for key in &self.expiry_set {
                if break_next {
                    // Necessarily needs to clone here, as otherwise the borrow
                    // on self.expiry_set lasts for drain_key's lifetime and
                    // we need to mutably borrow later
                    drain_key = Some(key.clone());
                    break;
                }

                break_next = key.1 <= cut_off;
            }

            if let Some(key) = drain_key {
                let mut to_evict = self.expiry_set.split_off(&key);
                // to_evict contains the values we want to retain, so do a quick
                // swap here
                std::mem::swap(&mut to_evict, &mut self.expiry_set);
                for ExpirationSetEntry(key, _) in to_evict {
                    self.cache.remove(&key);
                }
            } else if break_next {
                // all entries are stale
                self.clear();
            }
        }
    }
}

/// A struct whose order is dependent on instant, but whose equality is based
/// on the provided key. This allows us to fetch key values by the key only,
/// but allows us to have an ordered set of elements based on when it was added
/// for quick eviction lookup.
#[derive(Debug, Eq)]
struct ExpirationSetEntry<Key: Hash>(Rc<Key>, Instant);

impl<Key: PartialEq + Hash> PartialEq for ExpirationSetEntry<Key> {
    fn eq(&self, other: &Self) -> bool {
        self.0.eq(&other.0)
    }
}

impl<Key: Hash> Clone for ExpirationSetEntry<Key> {
    fn clone(&self) -> Self {
        Self(Rc::clone(&self.0), self.1)
    }
}

impl<Key: Hash + Eq> Ord for ExpirationSetEntry<Key> {
    #[inline]
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.1.cmp(&other.1)
    }
}

impl<Key: Hash + Eq> PartialOrd for ExpirationSetEntry<Key> {
    #[inline]
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl<Key: Hash + Eq, Value> Extend<(Key, Value)> for TimedLfuCache<Key, Value> {
    /// Inserts the items from the iterator into the cache. Note that this may
    /// evict items if the number of elements in the iterator plus the number of
    /// current items in the cache exceeds the capacity of the cache.
    fn extend<T: IntoIterator<Item = (Key, Value)>>(&mut self, iter: T) {
        for (k, v) in iter {
            self.insert(k, v);
        }
    }
}

impl<Key: Hash + Eq, Value> IntoIterator for TimedLfuCache<Key, Value> {
    type Item = (Key, Value);

    type IntoIter = LfuCacheIter<Key, Value>;

    fn into_iter(self) -> Self::IntoIter {
        LfuCacheIter(self.cache)
    }
}

#[cfg(test)]
mod timed {
    use crate::TimedLfuCache;
    use std::time::Duration;

    #[test]
    fn old_items_are_evicted() {
        let duration = Duration::from_millis(250);
        let mut cache = TimedLfuCache::with_expiration(duration);
        cache.insert(1, 2);
        assert_eq!(cache.get(&1), Some(&2));
        std::thread::sleep(duration * 2);
        assert!(cache.get(&1).is_none());
    }

    #[test]
    fn non_expired_remains() {
        let duration = Duration::from_millis(250);
        let mut cache = TimedLfuCache::with_expiration(duration);
        cache.insert(1, 2);
        assert_eq!(cache.get(&1), Some(&2));
        std::thread::sleep(duration / 2);
        cache.insert(3, 4);
        assert_eq!(cache.get(&1), Some(&2));
        assert_eq!(cache.get(&3), Some(&4));
        std::thread::sleep(duration + duration / 4);
        assert!(cache.get(&1).is_none());
        assert_eq!(cache.get(&3), Some(&4));
    }
}