common_cache/
lib.rs

1#![warn(missing_docs, rustdoc::missing_crate_level_docs)]
2#![doc = include_str!("../README.md")]
3use core::borrow::Borrow;
4use core::hash::Hash;
5use core::marker::PhantomData;
6
7use indexmap::IndexMap;
8use rand::prelude::*;
9use replace_with::replace_with_or_abort;
10#[cfg(feature = "serde")]
11use serde::{Deserialize, Serialize};
12
13/// A collection which keeps and prioritizes the most recently and commonly used items.
14///
15/// See the module level documentation for details.
16#[derive(Debug, Clone)]
17#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
18pub struct CommonCache<K, V, R: Rng = StdRng> {
19    /// The base for the exponentially growing size of levels.
20    base: usize,
21    /// All active levels in the cache
22    ///
23    /// These will at most have size [1, base, base^2, base^3, ...] and the last
24    /// will not be empty.
25    #[cfg_attr(
26        feature = "serde",
27        serde(bound(
28            deserialize = "K: Deserialize<'de> + Eq + Hash, V: Deserialize<'de>",
29            serialize = "K: Serialize + Eq + Hash, V: Serialize",
30        ))
31    )]
32    levels: Vec<Level<K, V>>,
33
34    /// A random number generator.
35    #[cfg_attr(
36        feature = "serde",
37        serde(skip, default = "SeedableRng::from_entropy", bound = "R: SeedableRng")
38    )]
39    rng: R,
40
41    /// An upper bound of the number of elements in the cache. Might be set to
42    /// `usize::MAX`.
43    max_size: usize,
44
45    /// A counter that increments every time elements are moved between levels in the cache.
46    ///
47    /// Used to prevent that a `Index` might randomly be invalidated because some random element
48    /// has been moved to another level. Instead, an `Index` is invalid if the generation on the
49    /// index and the cache differs.
50    generation: u64,
51}
52
53/// A level in the cache.
54#[derive(Debug, Clone)]
55#[cfg_attr(
56    feature = "serde",
57    derive(Serialize, Deserialize),
58    serde(bound(
59        deserialize = "K: Deserialize<'de> + Eq + Hash, V: Deserialize<'de>",
60        serialize = "K: Serialize + Eq + Hash, V: Serialize",
61    ))
62)]
63struct Level<K, V> {
64    /// The items in the level. Will at most contain `base^n` items where `n` is the index of this
65    /// level.
66    items: IndexMap<K, V>,
67    /// An instance of a uniform distribution to generate random numbers in the range
68    /// `[0..base^n]`, where `n` is the index of this level.
69    rand_range: rand::distributions::Uniform<usize>,
70}
71
72impl<K, V> CommonCache<K, V> {
73    /// Create a new `CommonCache` with a specific base and `Rng` generated from some entropy.
74    ///
75    /// Takes a base which must be `>1` and optionally a max_size which must be `>= 2`.
76    pub fn new(base: usize, max_size: Option<usize>) -> Self {
77        Self::new_with_rng(base, max_size, StdRng::from_entropy())
78    }
79
80    /// Set the max size. Note that if this might cause many elements to be
81    /// removed.
82    ///
83    /// PRE: max_size >= 2
84    ///
85    /// Runs in linear time if max_size < self.size(), constant time otherwise.
86    ///
87    /// If the new max_size is less than the previous, all indexes to this cache
88    /// will be invalidated. This is because some elements might be removed
89    /// randomly from the cache and we don't want some index lookup to
90    /// randomly work/fail.
91    pub fn set_max_size(&mut self, max_size: usize) {
92        assert!(
93            max_size >= 2,
94            "max_size must be >=2 in CommonCache::set_max_size()"
95        );
96        if max_size >= self.max_size {
97            self.max_size = max_size;
98            return;
99        }
100        let mut sum = 0;
101        for (i, level) in self.levels.iter_mut().enumerate() {
102            if sum == max_size {
103                self.levels.truncate(i);
104                break;
105            }
106            sum += level.items.len();
107            if sum > max_size {
108                for _ in max_size..sum {
109                    let to_remove = self.rng.gen_range(0..level.items.len());
110                    level.items.swap_remove_index(to_remove);
111                }
112                self.levels.truncate(i + 1);
113                break;
114            }
115        }
116
117        // Some random elements might have been removed so let's increase the generation
118        // to invalidate any indexes to the cache.
119        self.generation += 1;
120    }
121
122    /// Clear the cache.
123    pub fn clear(&mut self) {
124        self.levels.clear();
125        self.generation += 1;
126    }
127}
128
129impl<K, V, R: Rng> CommonCache<K, V, R> {
130    /// Create a new `CommonCache` with a given random generator. This can be
131    /// useful if you have a psuedo random generator and want deterministic
132    /// and reproduceable behaviour.
133    ///
134    /// Also takes in a base which must be >1 and optionally a max_size which
135    /// must be >=2.
136    pub fn new_with_rng(base: usize, max_size: Option<usize>, rng: R) -> Self {
137        let max_size = max_size.unwrap_or(usize::MAX);
138        assert!(max_size >= 2, "max_size in CommonCache must be >= 2");
139        assert!(base >= 2, "base in CommonCache must be >=2.");
140        Self {
141            base,
142            rng,
143            levels: Vec::new(),
144            max_size,
145            generation: 0,
146        }
147    }
148
149    /// Get the number of elements in the cache.
150    ///
151    /// Runs in `O(log[base](n))` time, since the len of all levels must be summed up.
152    pub fn size(&self) -> usize {
153        self.levels.iter().map(|x| x.items.len()).sum()
154    }
155
156    /// Get the currently configured max size for the cache.
157    pub fn max_size(&self) -> usize {
158        self.max_size
159    }
160}
161
162impl<K, V, R> CommonCache<K, V, R>
163where
164    K: Eq + Hash,
165    R: Rng,
166{
167    /// Insert a value into the cache.
168    ///
169    /// If the value is new, it will be inserted at the second lowest level. So if `self.base = 2`,
170    /// then it will be inserted in the second quarter of the cache.
171    ///
172    /// If the value exists in the cache already though, it will be updated with the new key and
173    /// value and be moved to one level above its previous position.
174    ///
175    /// **IMPORTANT: All `Index` to elements in this cache will be invalidated**
176    /// This is because some random elements will be moved between levels in
177    /// the cache, and we don't want indexes to be invalidated randomly. It
178    /// might cause some erroneous tests to pass undeterministicly.
179    ///
180    /// # Returns
181    ///
182    /// Returns the entry for the newly inserted item.
183    ///
184    /// # Examples
185    ///
186    /// ```rust
187    /// use common_cache::CommonCache;
188    ///
189    /// let mut cache = CommonCache::new(2, None);
190    /// let mut entry = cache.insert(4, "Hello");
191    /// assert!(matches!(*entry.get_value(), "Hello"));
192    /// ```
193    pub fn insert(&mut self, key: K, value: V) -> Entry<'_, K, V, R> {
194        // Check if the item is already in the cache.
195        let insert_level = if let Some(entry) = self.entry(&key) {
196            let level = entry.level;
197            let _old_item = entry.remove();
198            // Insert the item at the level above.
199            level.saturating_sub(1)
200        } else {
201            // If the item is new, insert it in the second lowest level.
202            self.levels.len().saturating_sub(2)
203        };
204        self.insert_at_level::<true>(key, value, insert_level)
205    }
206
207    /// Insert an item at a specific level in the cache and possibly push an
208    /// item to lower levels.
209    ///
210    /// This is the core function of the algorithm. It will, with probability
211    /// k/n, (where n is the maximum number of items at the level and k is
212    /// the actual number of items), remove an item from the level and
213    /// insert it on the level below. This will be repeated for all lower
214    /// levels. If an item is selected at the lowest level, a new lowest level
215    /// will be created.
216    ///
217    /// The function will of course also insert the given item at the given
218    /// level.
219    ///
220    /// `self.generation` is increased, so all `Index`es to this cache are
221    /// invalidated.
222    fn insert_at_level<const CREATE_NEW_LEVEL_IF_NEEDED: bool>(
223        &mut self,
224        key: K,
225        value: V,
226        level: usize,
227    ) -> Entry<'_, K, V, R> {
228        // Let's increment the generation immediately so we don't forget it.
229        self.generation += 1;
230
231        if self.size() == self.max_size {
232            // If the max size has been reached.
233            let last_level_items = &mut self.levels.last_mut().unwrap().items;
234            let to_remove = self.rng.gen_range(0..last_level_items.len());
235            last_level_items.swap_remove_index(to_remove);
236            if last_level_items.is_empty() {
237                self.levels.pop();
238            }
239        }
240
241        if self.levels.is_empty() {
242            // If there are no levels, add one.
243            self.levels.push(Level {
244                items: IndexMap::with_capacity(1),
245                rand_range: (0..1).into(),
246            });
247        }
248
249        // Loop through all levels from the lowest to the current (`level`).c For each
250        // level, randomly decide whether to move one item down to the level
251        // below. The fuller a level is, the higher probability it is that an
252        // item will be moved down from that level.
253        for level in (level..self.levels.len()).rev() {
254            let current_level = &mut self.levels[level];
255            // Generate an integer in the range of the total capacity of the level.
256            let i = current_level.rand_range.sample(&mut self.rng);
257            if let Some(move_down_item) = current_level.items.swap_remove_index(i) {
258                if level != self.levels.len() - 1 {
259                    // Insert the item on the level below.
260                    self.levels[level + 1]
261                        .items
262                        .insert(move_down_item.0, move_down_item.1);
263                } else if CREATE_NEW_LEVEL_IF_NEEDED {
264                    // This was the lowest level. So let's create a new one.
265                    let new_level_size = self
266                        .base
267                        .checked_pow((level + 1).try_into().unwrap_or(u32::MAX))
268                        .unwrap_or(usize::MAX);
269                    self.levels.push(Level {
270                        items: IndexMap::from([move_down_item]),
271                        rand_range: (0..new_level_size).into(),
272                    });
273                }
274            }
275        }
276        // Finally, add the item to the desired level.
277        let (idx, None) = self.levels[level].items.insert_full(key, value) else {
278            unreachable!()
279        };
280        Entry {
281            cache: self,
282            level,
283            idx,
284        }
285    }
286
287    /// Get a handle to an entry in the cache.
288    ///
289    /// Runs in `O(log[base](n))` time.
290    pub fn entry<Q>(&mut self, key: &Q) -> Option<Entry<'_, K, V, R>>
291    where
292        K: Borrow<Q>,
293        Q: Eq + Hash + ?Sized,
294    {
295        if let Some((level, idx)) = self
296            .levels
297            .iter_mut()
298            .enumerate()
299            .filter_map(|(i, x)| x.items.get_index_of(key).map(|x| (i, x)))
300            .next()
301        {
302            Some(Entry {
303                cache: self,
304                level,
305                idx,
306            })
307        } else {
308            None
309        }
310    }
311
312    /// Iterate over the elements in the cache so that all items on any level
313    /// will come before any item on any lower level.
314    ///
315    /// This does not alter the cache in any way. So no items are promoted to
316    /// higher levels in the cache when iterated over.
317    pub fn iter(&self) -> impl DoubleEndedIterator<Item = (&'_ K, &'_ V)> + '_ {
318        self.levels.iter().flat_map(|x| x.items.iter())
319    }
320
321    /// Iterate over mutable references to the elements in the cache. All items
322    /// on any level will come before any item on any lower level.
323    ///
324    /// This does not alter the structure of the cache. So no items are promoted
325    /// to higher levels in the cache when iterated over.
326    pub fn iter_mut(&mut self) -> impl Iterator<Item = (&'_ K, &'_ mut V)> {
327        self.levels.iter_mut().flat_map(|x| x.items.iter_mut())
328    }
329
330    /// Iterate over indices to the elements in the cache so that all items on
331    /// any level will come before any item on any lower level.
332    ///
333    /// This does not alter the cache in any way. So no items are promoted to
334    /// higher levels in the cache when iterated over.
335    pub fn iter_indices(&self) -> impl DoubleEndedIterator<Item = Index<K, V, R>> + '_ {
336        self.levels
337            .iter()
338            .enumerate()
339            .flat_map(move |(i, x)| (0..x.items.len()).map(move |j| Index::new(i, j, self)))
340    }
341
342    /// Find the first item in the cache matching a predicate.
343    ///
344    /// The advantage of using this method over `self.iter().find()` is that you
345    /// get an `Entry` from this which can be used to promote or remove the
346    /// item with.
347    pub fn find_first(
348        &mut self,
349        mut pred: impl FnMut(&K, &V) -> bool,
350    ) -> Option<Entry<'_, K, V, R>> {
351        if let Some((level, (idx, _))) = self
352            .levels
353            .iter()
354            .enumerate()
355            .flat_map(|(i, level)| level.items.iter().enumerate().map(move |x| (i, x)))
356            .find(|(_, (_, (key, val)))| pred(key, val))
357        {
358            Some(Entry {
359                cache: self,
360                level,
361                idx,
362            })
363        } else {
364            None
365        }
366    }
367}
368
369/// A reference to an occupied entry in the cache.
370#[derive(Debug)]
371pub struct Entry<'a, K, V, R: Rng = StdRng> {
372    /// A reference to the entire cache.
373    cache: &'a mut CommonCache<K, V, R>,
374    /// The index of the level for the entry.
375    level: usize,
376    /// The index for the entry in the level.
377    idx: usize,
378}
379
380impl<'a, K: Eq + Hash, V, R: Rng> Entry<'a, K, V, R> {
381    /// Read the key and value at the entry without touching the rest of the
382    /// cache. This operation will hence not be taken into account when
383    /// considering which elements are most commonly used.
384    pub fn peek_key_value(&self) -> (&K, &V) {
385        self.cache.levels[self.level]
386            .items
387            .get_index(self.idx)
388            .unwrap()
389    }
390
391    /// Silently read the key at this entry.
392    pub fn peek_key(&self) -> &K {
393        self.peek_key_value().0
394    }
395
396    /// Read the value at the entry without touching the rest of the cache. This
397    /// operation will hence not be taken into account when considering
398    /// which elements are most commonly used.
399    pub fn peek_value(&self) -> &V {
400        self.peek_key_value().1
401    }
402
403    /// Read the entry mutably without touching the rest of the cache. This
404    /// operation will not be taken into account when considering which
405    /// elements are most commonly used.
406    pub fn peek_key_value_mut(&mut self) -> (&K, &mut V) {
407        let (key, value) = self.cache.levels[self.level]
408            .items
409            .get_index_mut(self.idx)
410            .unwrap();
411        (key, value)
412    }
413
414    /// Read the value mutably without touching the rest of the cache. This
415    /// operation will not be taken into account when considering which
416    /// elements are most commonly used.
417    pub fn peek_value_mut(&mut self) -> &mut V {
418        self.peek_key_value_mut().1
419    }
420
421    /// Read the item at this entry and destroy the `Entry` struct. The item
422    /// will still be in the cache but this allows us to get a reference
423    /// with the full lifetime of this entry.
424    pub fn peek_long(self) -> (&'a K, &'a mut V) {
425        let (key, value) = self.cache.levels[self.level]
426            .items
427            .get_index_mut(self.idx)
428            .unwrap();
429        (key, value)
430    }
431
432    /// Get the key and value at this entry and promote this entry to a higher
433    /// level in the cache.
434    ///
435    /// This function will promote this entry to a higher level in the cache and
436    /// based on some probability move other items down in the cache.
437    pub fn get_key_value(&mut self) -> (&K, &mut V) {
438        replace_with_or_abort(self, |self_| {
439            let curr_level = self_.level;
440            let (index, cache) = self_.index_and_cache();
441            let (key, value) = index.remove_from(cache);
442            cache.insert_at_level::<false>(key, value, curr_level.saturating_sub(1))
443        });
444        self.peek_key_value_mut()
445    }
446
447    /// Get the value at this entry and promote this entry to a higher level in
448    /// the cache.
449    ///
450    /// This function will promote this entry to a higher level in the cache and
451    /// based on some probability move other items down in the cache.
452    pub fn get_value(&mut self) -> &mut V {
453        self.get_key_value().1
454    }
455
456    /// Get the key and value at this entry and promote this entry to a higher
457    /// level in the cache.
458    ///
459    ///
460    /// Unlike `Self::get_key_value`, this method consumes the `Entry` allowing
461    /// for a longer lifetime of the returned reference. Note that the item
462    /// will still remain in the cache though.
463    ///
464    /// This function will promote this entry to a higher level in the cache and
465    /// based on some probability move other items down in the cache.
466    pub fn get_long(mut self) -> (&'a K, &'a mut V) {
467        self.get_key_value();
468        self.peek_long()
469    }
470
471    /// Remove this entry from the cache. Leaving the rest of the cache intact.
472    ///
473    /// Runs in O(1) time.
474    pub fn remove(self) -> (K, V) {
475        let (index, cache) = self.index_and_cache();
476        index.remove_from(cache)
477    }
478
479    /// Get an index for this entry.
480    ///
481    /// This is like the `Entry` without the reference to the cache. The `Index`
482    /// will be invalidated though if the cache is altered in any way,
483    /// including insertian of new elements or promotion of existing
484    /// elements.
485    pub fn index(self) -> Index<K, V, R> {
486        self.index_and_cache().0
487    }
488
489    /// Split this entry to an index and the cache.
490    ///
491    /// The `Index` is like the `Entry` without the reference to the cache. The
492    /// `Index` will be invalidated though if the cache is altered in any
493    /// way, including insertian of new elements or promotion of existing
494    /// elements.
495    pub fn index_and_cache(self) -> (Index<K, V, R>, &'a mut CommonCache<K, V, R>) {
496        (Index::new(self.level, self.idx, self.cache), self.cache)
497    }
498}
499
500/// An index into a `CommonCache`.
501///
502/// This should be used when an `Entry` is not sufficient due to life time
503/// problems. Note however that all indexes to a cache will be invalidated
504/// whenever the cache is altered in any way. Including insertian of new
505/// elements and promotion of existing elements. This is because the
506/// index is just a pointer to a specific level and index in that level of the
507/// cache. If some element is inserted or promoted, a few other elements will
508/// **randomly** be moved down some levels in the cache, causing their indexes
509/// to be invalid and potentially point to other items. However, this happens
510/// randomly, so tests might not recognize such a bug, and it will not be
511/// reproduceable.
512///
513/// The solution is to use an internal counter, (generation), which increments
514/// each time the cache is altered. Each index has the generation of the cache
515/// when the index was created, and if the index is used with a newer version of
516/// the cache it will be invalid.
517#[derive(Debug, PartialEq, Eq, Hash, Clone)]
518pub struct Index<K, V, R: Rng = StdRng> {
519    /// The index of the level for the item.
520    level: usize,
521    /// The index for the item whithin the level.
522    idx: usize,
523    /// The generation when this index was created.
524    generation: u64,
525    _key_ty: PhantomData<K>,
526    _val_ty: PhantomData<V>,
527    _rng_ty: PhantomData<R>,
528}
529
530impl<K: Eq + Hash, V, R: Rng> Index<K, V, R> {
531    /// Create a new index from a level and an index on that level.
532    fn new(level: usize, idx: usize, in_cache: &CommonCache<K, V, R>) -> Self {
533        Self {
534            level,
535            idx,
536            generation: in_cache.generation,
537            _key_ty: PhantomData,
538            _val_ty: PhantomData,
539            _rng_ty: PhantomData,
540        }
541    }
542
543    /// Assert that this index has the same generation as that of a cache.
544    /// Panics otherwise.
545    fn assert_generation(&self, cache: &CommonCache<K, V, R>) {
546        assert_eq!(
547            self.generation, cache.generation,
548            "The generations of an `Index` and a `CommonCache` differs"
549        );
550    }
551
552    /// Get an entry corresponding to this index.
553    ///
554    /// # Panics
555    ///
556    /// Panics if the generation of the cache and this index differs, I.E that
557    /// something has been inserted or promoted in the cache since this
558    /// index was created.
559    ///
560    /// Might also panic when trying to read the entry if the item corresponding
561    /// to this index has been removed.
562    pub fn entry(self, cache: &mut CommonCache<K, V, R>) -> Entry<'_, K, V, R> {
563        self.assert_generation(cache);
564        Entry {
565            cache,
566            level: self.level,
567            idx: self.idx,
568        }
569    }
570
571    /// Read the key and value at the index without touching the rest of the
572    /// cache. This operation will hence not be taken into account when
573    /// considering which elements are most commonly used.
574    pub fn peek_key_value<'a>(&'a self, cache: &'a CommonCache<K, V, R>) -> (&'a K, &'a V) {
575        self.assert_generation(cache);
576        cache.levels[self.level].items.get_index(self.idx).unwrap()
577    }
578
579    /// Silently read the key at this index.
580    pub fn peek_key<'a>(&'a self, cache: &'a CommonCache<K, V, R>) -> &'a K {
581        self.peek_key_value(cache).0
582    }
583
584    /// Read the value at the index without touching the rest of the cache. This
585    /// operation will hence not be taken into account when considering
586    /// which elements are most commonly used.
587    pub fn peek_value<'a>(&'a self, cache: &'a CommonCache<K, V, R>) -> &'a V {
588        self.peek_key_value(cache).1
589    }
590
591    /// Read the item at this index mutably without touching the rest of the
592    /// cache. This operation will not be taken into account when
593    /// considering which elements are most commonly used.
594    ///
595    /// Note that this does not count as altering the cache so the index is
596    /// still valid after this.
597    pub fn peek_key_value_mut<'a>(
598        &'a self,
599        cache: &'a mut CommonCache<K, V, R>,
600    ) -> (&'a K, &'a mut V) {
601        self.assert_generation(cache);
602        let (key, value) = cache.levels[self.level]
603            .items
604            .get_index_mut(self.idx)
605            .unwrap();
606        (key, value)
607    }
608
609    /// Read the value at this index mutably without touching the rest of the
610    /// cache. This operation will not be taken into account when
611    /// considering which elements are most commonly used.
612    ///
613    /// Note that this does not count as altering the cache so the index is
614    /// still valid after this.
615    pub fn peek_value_mut<'a>(&'a self, cache: &'a mut CommonCache<K, V, R>) -> &'a mut V {
616        self.peek_key_value_mut(cache).1
617    }
618
619    /// Get the key and value at this index and promote the item to a higher
620    /// level in the cache.
621    ///
622    /// This function will promote the item to a higher level in the cache and
623    /// based on some probability move other items down in the cache.
624    ///
625    /// **The index will be invalidated after this operation.**
626    pub fn get_key_value(self, cache: &mut CommonCache<K, V, R>) -> (&K, &mut V) {
627        let curr_level = self.level;
628        let (key, value) = self.remove_from(cache);
629        cache
630            .insert_at_level::<false>(key, value, curr_level.saturating_sub(1))
631            .peek_long()
632    }
633
634    /// Get the value at this index and promote this index to a higher level in
635    /// the cache.
636    ///
637    /// This function will promote this index to a higher level in the cache and
638    /// based on some probability move other items down in the cache.
639    pub fn get_value(self, cache: &mut CommonCache<K, V, R>) -> &mut V {
640        self.get_key_value(cache).1
641    }
642
643    /// Remove the item at this index from the cache.
644    fn remove_from(self, cache: &mut CommonCache<K, V, R>) -> (K, V) {
645        self.assert_generation(cache);
646        let level_items = &mut cache.levels[self.level].items;
647        let (key, value) = level_items.swap_remove_index(self.idx).unwrap();
648        if level_items.is_empty() && self.level == cache.levels.len() - 1 {
649            // If the last level became empty, we shall remove it.
650            cache.levels.pop();
651        }
652        (key, value)
653    }
654}